geooptparser.py 16.7 KB
Newer Older
1
2
from __future__ import print_function
from __future__ import absolute_import
3
4
from builtins import next
from builtins import range
5
from nomadcore.simple_parser import SimpleMatcher as SM
6
from nomadcore.baseclasses import MainHierarchicalParser
7
8
import nomadcore.configurationreading
import nomadcore.csvparsing
9
from .commonparser import CP2KCommonParser
10
from nomadcore.caching_backend import CachingLevel
11
12
13
14
15
16
17
18
import logging
logger = logging.getLogger("nomad")


class CP2KGeoOptParser(MainHierarchicalParser):
    """Used to parse the CP2K calculation with run types:
        -GEO_OPT/GEOMETRY_OPTIMIZATION
    """
19
    def __init__(self, parser_context):
20
21
        """
        """
22
        super(CP2KGeoOptParser, self).__init__(parser_context)
23
        self.setup_common_matcher(CP2KCommonParser(parser_context))
24
        self.traj_iterator = None
25
        self.energy_reeval_quickstep = None
26

27
        #=======================================================================
28
        # Globally cached values
29
30
31
32
        self.cache_service.add("number_of_frames_in_sequence", 0)
        self.cache_service.add("frame_sequence_potential_energy", [])
        self.cache_service.add("frame_sequence_local_frames_ref", [])
        self.cache_service.add("geometry_optimization_method")
33
34
35

        #=======================================================================
        # Cache levels
36
        self.caching_levels.update({
37
38
39
            'x_cp2k_section_geometry_optimization_step': CachingLevel.ForwardAndCache,
            'x_cp2k_section_quickstep_calculation': CachingLevel.ForwardAndCache,
            'x_cp2k_section_geometry_optimization': CachingLevel.ForwardAndCache,
40
            # 'x_cp2k_section_geometry_optimization_energy_reevaluation': CachingLevel.ForwardAndCache,
41
42
43
44
        })

        #=======================================================================
        # SimpleMatchers
45
        self.geo_opt = SM(
46
            " ***                     STARTING GEOMETRY OPTIMIZATION                      ***".replace("*", "\*"),
47
            sections=["section_frame_sequence", "x_cp2k_section_geometry_optimization"],
48
            subMatchers=[
49
                SM( " ***                           CONJUGATE GRADIENTS                           ***".replace("*", "\*"),
50
                    adHoc=self.adHoc_conjugate_gradient(),
51
                    otherMetaInfo=["geometry_optimization_method"],
52
                ),
53
54
                SM( " ***                                   BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
55
                    otherMetaInfo=["geometry_optimization_method"],
56
57
58
                ),
                SM( " ***                                 L-BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
59
                    otherMetaInfo=["geometry_optimization_method"],
60
                ),
61
62
63
64
65
66
                # SM( "",
                    # forwardMatch=True,
                    # sections=["section_single_configuration_calculation", "section_system", "x_cp2k_section_geometry_optimization_step"],
                    # subMatchers=[
                        # self.cm.quickstep_calculation(),
                        # SM( " --------  Informations at step"),
67
68
                        # SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.word)),
                        # SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.float),
69
70
71
72
73
74
                            # otherMetaInfo=["frame_sequence_potential_energy"]
                        # ),
                    # ],
                    # otherMetaInfo=["atom_positions"],
                    # adHoc=self.adHoc_step(),
                # ),
75
                SM( " OPTIMIZATION STEP:",
76
                    endReStr="  Conv. in RMS gradients     =",
77
78
                    name="geooptstep",
                    repeats=True,
79
                    subMatchers=[
80
81
                        SM( "",
                            forwardMatch=True,
82
                            sections=["x_cp2k_section_geometry_optimization_step"],
83
84
85
                            otherMetaInfo=[
                                "atom_positions",
                            ],
86
                            subMatchers=[
87
88
                                # SM( "",
                                    # forwardMatch=True,
89
                                    # endReStr=" ***                 MNBRACK - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.int),
90
91
92
93
94
95
96
97
98
99
100
101
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
                                # SM( "",
                                    # forwardMatch=True,
102
                                    # endReStr=" ***                 BRENT   - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.int),
103
104
105
106
107
108
109
110
111
112
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
113
                                SM( " --------  Informations at step"),
114
115
                                SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.word)),
                                SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.float),
116
117
                                    otherMetaInfo=["frame_sequence_potential_energy"]
                                ),
118
119
120
121
122
                                SM( "  Real energy change         =\s+(?P<x_cp2k_optimization_energy_change__hartree>{})".format(self.regexs.float)),
                                SM( "  Decrease in energy         =\s+(?P<x_cp2k_optimization_energy_decrease>{})".format(self.regexs.word)),
                                SM( "  Used time                  =\s+(?P<x_cp2k_optimization_used_time>{})".format(self.regexs.float)),
                                SM( "  Max. step size             =\s+(?P<x_cp2k_optimization_max_step_size__bohr>{})".format(self.regexs.float)),
                                SM( "  Conv. limit for step size  =\s+(?P<x_cp2k_optimization_step_size_convergence_limit__bohr>{})".format(self.regexs.float),
123
124
                                    otherMetaInfo=["geometry_optimization_geometry_change"]
                                ),
125
126
127
128
129
                                SM( "  Convergence in step size   =\s+(?P<x_cp2k_optimization_step_size_convergence>{})".format(self.regexs.word)),
                                SM( "  RMS step size              =\s+(?P<x_cp2k_optimization_rms_step_size__bohr>{})".format(self.regexs.float)),
                                SM( "  Convergence in RMS step    =\s+(?P<x_cp2k_optimization_rms_step_size_convergence>{})".format(self.regexs.word)),
                                SM( "  Max. gradient              =\s+(?P<x_cp2k_optimization_max_gradient__bohr_1hartree>{})".format(self.regexs.float)),
                                SM( "  Conv. limit for gradients  =\s+(?P<x_cp2k_optimization_gradient_convergence_limit__bohr_1hartree>{})".format(self.regexs.float),
130
131
                                    otherMetaInfo=["geometry_optimization_threshold_force"]
                                ),
132
133
134
                                SM( "  Conv. for gradients        =\s+(?P<x_cp2k_optimization_max_gradient_convergence>{})".format(self.regexs.word)),
                                SM( "  RMS gradient               =\s+(?P<x_cp2k_optimization_rms_gradient__bohr_1hartree>{})".format(self.regexs.float)),
                                SM( "  Conv. in RMS gradients     =\s+(?P<x_cp2k_optimization_rms_gradient_convergence>{})".format(self.regexs.word)),
135
                            ],
136
                            # adHoc=self.adHoc_step()
137
138
                        ),
                    ]
139
140
                ),
                SM( " ***                    GEOMETRY OPTIMIZATION COMPLETED                      ***".replace("*", "\*"),
141
142
143
                    adHoc=self.adHoc_geo_opt_converged(),
                    otherMetaInfo=["geometry_optimization_converged"]
                ),
144
                SM( "                    Reevaluating energy at the minimum",
145
                    # sections=["x_cp2k_section_geometry_optimization_energy_reevaluation"],
146
147
                    subMatchers=[
                        self.cm.quickstep_calculation(),
148
149
150
                        # SM("",
                            # adHoc=self.adHoc_save_energy_reeval_quickstep()
                        # )
151
                    ],
152
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
153
                ),
154
155
156
157
                # SM( "",
                    # forwardMatch=True,
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
                # )
158
            ],
159
160
161
162
163
164
165
        )

        # Compose root matcher according to the run type. This way the
        # unnecessary regex parsers will not be compiled and searched. Saves
        # computational time.
        self.root_matcher = SM("",
            forwardMatch=True,
166
            sections=["section_run", "section_sampling_method"],
167
            subMatchers=[
168
169
170
171
172
                SM( "",
                    forwardMatch=True,
                    sections=["section_method"],
                    subMatchers=[
                        self.cm.header(),
173
                        self.cm.quickstep_header(),
174
                    ],
175
                ),
176
177
                self.geo_opt,
                self.cm.footer(),
178
179
180
181
            ]
        )

    #===========================================================================
182
    # onClose triggers
183
184
185
    def onClose_x_cp2k_section_geometry_optimization(self, backend, gIndex, section):

        # Get the re-evaluated energy and add it to frame_sequence_potential_energy
186
187
188
189
190
        reeval_quickstep = self.energy_reeval_quickstep
        if reeval_quickstep is not None:
            energy = reeval_quickstep.get_latest_value("x_cp2k_energy_total")
            if energy is not None:
                self.cache_service["frame_sequence_potential_energy"].append(energy)
191

192
        # Push values from cache
193
194
        self.cache_service.addArrayValues("frame_sequence_potential_energy")
        self.cache_service.addValue("geometry_optimization_method")
195
        self.backend.addValue("frame_sequence_to_sampling_ref", 0)
196

197
198
199
200
201
202
203
204
205
206
207
208
        # Get the optimization convergence criteria from the last optimization
        # step
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_step_size_convergence_limit"],
            "geometry_optimization_geometry_change",
        )
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_gradient_convergence_limit"],
            "geometry_optimization_threshold_force",
        )
209

210
211
212
213
214
215
216
217
218
219
220
        # Push the information into single configuration and system
        steps = section["x_cp2k_section_geometry_optimization_step"]
        each = self.cache_service["each_geo_opt"]
        add_last = False
        add_last_setting = self.cache_service["traj_add_last"]
        if add_last_setting == "NUMERIC" or add_last_setting == "SYMBOLIC":
            add_last = True

        # Push the trajectory
        n_steps = len(steps) + 1
        last_step = n_steps - 1
221
222
223

        # First push the original system geometry
        # print(self.cache_service["atom_positions"])
224
225
226
227
228
229
230
231
232
        for i_step in range(n_steps):
            singleId = backend.openSection("section_single_configuration_calculation")
            systemId = backend.openSection("section_system")

            if self.traj_iterator is not None:
                if (i_step + 1) % each == 0 or (i_step == last_step and add_last):
                    try:
                        pos = next(self.traj_iterator)
                    except StopIteration:
233
234
235
236
237
238
239
                        logger.error(
                            "Could not get next geometry from an external"
                            " file. It seems that the number of optimization "
                            "steps in the CP2K output file doesn't match the "
                            "number of steps found in the external trajectory "
                            "file."
                        )
240
241
242
243
244
                    else:
                        backend.addArrayValues("atom_positions", pos, unit="angstrom")
            backend.closeSection("section_system", systemId)
            backend.closeSection("section_single_configuration_calculation", singleId)

245
        self.cache_service.addArrayValues("frame_sequence_local_frames_ref")
246
247
        backend.addValue("number_of_frames_in_sequence", n_steps)

248
249
250
    def onClose_section_sampling_method(self, backend, gIndex, section):
        self.backend.addValue("sampling_method", "geometry_optimization")

251
252
253
    def onClose_x_cp2k_section_quickstep_calculation(self, backend, gIndex, section):
        self.energy_reeval_quickstep = section

254
255
256
257
    def onClose_x_cp2k_section_geometry_optimization_step(self, backend, gIndex, section):
        energy = section["x_cp2k_optimization_energy"]
        if energy is not None:
            self.cache_service["frame_sequence_potential_energy"].append(energy[0])
258

259
    def onClose_section_system(self, backend, gIndex, section):
260
        self.cache_service.addArrayValues("simulation_cell", unit="angstrom")
261
        self.cache_service.addArrayValues("lattice_vectors", unit="angstrom")
262

263
264
    def onClose_section_method(self, backend, gIndex, section):
        traj_file = self.file_service.get_file_by_id("trajectory")
265
266
267
268
269
        traj_format = self.cache_service["trajectory_format"]
        if traj_format is not None and traj_file is not None:

            # Use special parsing for CP2K pdb files because they don't follow the proper syntax
            if traj_format == "PDB":
270
                self.traj_iterator = nomadcore.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END")
271
272
            else:
                try:
273
                    self.traj_iterator = nomadcore.configurationreading.iread(traj_file)
274
275
                except ValueError:
                    pass
276

277
278
279
    def onClose_section_single_configuration_calculation(self, backend, gIndex, section):
        self.cache_service["frame_sequence_local_frames_ref"].append(gIndex)

280
    #===========================================================================
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
    # adHoc functions
    def adHoc_geo_opt_converged(self):
        """Called when the geometry optimization converged.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", True)
        return wrapper

    def adHoc_geo_opt_not_converged(self):
        """Called when the geometry optimization did not converge.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", False)
        return wrapper

296
297
298
299
    def adHoc_conjugate_gradient(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
300
            self.cache_service["geometry_optimization_method"] = "conjugate_gradient"
301
302
        return wrapper

303
304
305
306
    def adHoc_bfgs(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
307
            self.cache_service["geometry_optimization_method"] = "bfgs"
308
309
        return wrapper

310
    # def adHoc_save_energy_reeval_quickstep(self):
311
        # def wrapper(parser):
312
313
314
315
316
317
318
            # section_managers = parser.backend.sectionManagers
            # section_run_manager = section_managers["section_run"]
            # section_run = section_run_manager.openSections[0]
            # print section_run.subSectionValues
            # # quickstep = section_run.get_latest_value("x_cp2k_section_quickstep_calculation")
            # # print quickstep
            # # self.energy_reeval_quickstep = quickstep
319
        # return wrapper
320
321
322

    def debug(self):
        def wrapper(parser):
323
            print("DEBUG")
324
        return wrapper