geooptparser.py 16.3 KB
Newer Older
1
2
from __future__ import print_function
from __future__ import absolute_import
3
4
from builtins import next
from builtins import range
5
from nomadcore.simple_parser import SimpleMatcher as SM
6
from nomadcore.baseclasses import MainHierarchicalParser
7
8
import nomadcore.configurationreading
import nomadcore.csvparsing
9
from .commonparser import CP2KCommonParser
10
from nomadcore.caching_backend import CachingLevel
11
12
13
14
15
16
17
18
19
20
21
22
import logging
logger = logging.getLogger("nomad")


class CP2KGeoOptParser(MainHierarchicalParser):
    """Used to parse the CP2K calculation with run types:
        -GEO_OPT/GEOMETRY_OPTIMIZATION
    """
    def __init__(self, file_path, parser_context):
        """
        """
        super(CP2KGeoOptParser, self).__init__(file_path, parser_context)
23
        self.setup_common_matcher(CP2KCommonParser(parser_context))
24
        self.traj_iterator = None
25
        self.energy_reeval_quickstep = None
26

27
        #=======================================================================
28
        # Globally cached values
29
30
31
32
        self.cache_service.add("number_of_frames_in_sequence", 0)
        self.cache_service.add("frame_sequence_potential_energy", [])
        self.cache_service.add("frame_sequence_local_frames_ref", [])
        self.cache_service.add("geometry_optimization_method")
33
34
35

        #=======================================================================
        # Cache levels
36
        self.caching_levels.update({
37
38
39
            'x_cp2k_section_geometry_optimization_step': CachingLevel.ForwardAndCache,
            'x_cp2k_section_quickstep_calculation': CachingLevel.ForwardAndCache,
            'x_cp2k_section_geometry_optimization': CachingLevel.ForwardAndCache,
40
            # 'x_cp2k_section_geometry_optimization_energy_reevaluation': CachingLevel.ForwardAndCache,
41
42
43
44
        })

        #=======================================================================
        # SimpleMatchers
45
        self.geo_opt = SM(
46
            " ***                     STARTING GEOMETRY OPTIMIZATION                      ***".replace("*", "\*"),
47
            sections=["section_frame_sequence", "x_cp2k_section_geometry_optimization"],
48
            subMatchers=[
49
                SM( " ***                           CONJUGATE GRADIENTS                           ***".replace("*", "\*"),
50
                    adHoc=self.adHoc_conjugate_gradient(),
51
                    otherMetaInfo=["geometry_optimization_method"],
52
                ),
53
54
                SM( " ***                                   BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
55
                    otherMetaInfo=["geometry_optimization_method"],
56
57
58
                ),
                SM( " ***                                 L-BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
59
                    otherMetaInfo=["geometry_optimization_method"],
60
                ),
61
62
63
64
65
66
                # SM( "",
                    # forwardMatch=True,
                    # sections=["section_single_configuration_calculation", "section_system", "x_cp2k_section_geometry_optimization_step"],
                    # subMatchers=[
                        # self.cm.quickstep_calculation(),
                        # SM( " --------  Informations at step"),
67
68
                        # SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.word)),
                        # SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.float),
69
70
71
72
73
74
                            # otherMetaInfo=["frame_sequence_potential_energy"]
                        # ),
                    # ],
                    # otherMetaInfo=["atom_positions"],
                    # adHoc=self.adHoc_step(),
                # ),
75
                SM( " OPTIMIZATION STEP:",
76
                    endReStr="  Conv. in RMS gradients     =",
77
78
                    name="geooptstep",
                    repeats=True,
79
                    subMatchers=[
80
81
                        SM( "",
                            forwardMatch=True,
82
                            sections=["x_cp2k_section_geometry_optimization_step"],
83
84
85
                            otherMetaInfo=[
                                "atom_positions",
                            ],
86
                            subMatchers=[
87
88
                                # SM( "",
                                    # forwardMatch=True,
89
                                    # endReStr=" ***                 MNBRACK - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.int),
90
91
92
93
94
95
96
97
98
99
100
101
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
                                # SM( "",
                                    # forwardMatch=True,
102
                                    # endReStr=" ***                 BRENT   - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.int),
103
104
105
106
107
108
109
110
111
112
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
113
                                SM( " --------  Informations at step"),
114
115
                                SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.word)),
                                SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.float),
116
117
                                    otherMetaInfo=["frame_sequence_potential_energy"]
                                ),
118
119
120
121
122
                                SM( "  Real energy change         =\s+(?P<x_cp2k_optimization_energy_change__hartree>{})".format(self.regexs.float)),
                                SM( "  Decrease in energy         =\s+(?P<x_cp2k_optimization_energy_decrease>{})".format(self.regexs.word)),
                                SM( "  Used time                  =\s+(?P<x_cp2k_optimization_used_time>{})".format(self.regexs.float)),
                                SM( "  Max. step size             =\s+(?P<x_cp2k_optimization_max_step_size__bohr>{})".format(self.regexs.float)),
                                SM( "  Conv. limit for step size  =\s+(?P<x_cp2k_optimization_step_size_convergence_limit__bohr>{})".format(self.regexs.float),
123
124
                                    otherMetaInfo=["geometry_optimization_geometry_change"]
                                ),
125
126
127
128
129
                                SM( "  Convergence in step size   =\s+(?P<x_cp2k_optimization_step_size_convergence>{})".format(self.regexs.word)),
                                SM( "  RMS step size              =\s+(?P<x_cp2k_optimization_rms_step_size__bohr>{})".format(self.regexs.float)),
                                SM( "  Convergence in RMS step    =\s+(?P<x_cp2k_optimization_rms_step_size_convergence>{})".format(self.regexs.word)),
                                SM( "  Max. gradient              =\s+(?P<x_cp2k_optimization_max_gradient__bohr_1hartree>{})".format(self.regexs.float)),
                                SM( "  Conv. limit for gradients  =\s+(?P<x_cp2k_optimization_gradient_convergence_limit__bohr_1hartree>{})".format(self.regexs.float),
130
131
                                    otherMetaInfo=["geometry_optimization_threshold_force"]
                                ),
132
133
134
                                SM( "  Conv. for gradients        =\s+(?P<x_cp2k_optimization_max_gradient_convergence>{})".format(self.regexs.word)),
                                SM( "  RMS gradient               =\s+(?P<x_cp2k_optimization_rms_gradient__bohr_1hartree>{})".format(self.regexs.float)),
                                SM( "  Conv. in RMS gradients     =\s+(?P<x_cp2k_optimization_rms_gradient_convergence>{})".format(self.regexs.word)),
135
                            ],
136
                            # adHoc=self.adHoc_step()
137
138
                        ),
                    ]
139
140
                ),
                SM( " ***                    GEOMETRY OPTIMIZATION COMPLETED                      ***".replace("*", "\*"),
141
142
143
                    adHoc=self.adHoc_geo_opt_converged(),
                    otherMetaInfo=["geometry_optimization_converged"]
                ),
144
                SM( "                    Reevaluating energy at the minimum",
145
                    # sections=["x_cp2k_section_geometry_optimization_energy_reevaluation"],
146
147
                    subMatchers=[
                        self.cm.quickstep_calculation(),
148
149
150
                        # SM("",
                            # adHoc=self.adHoc_save_energy_reeval_quickstep()
                        # )
151
                    ],
152
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
153
                ),
154
155
156
157
                # SM( "",
                    # forwardMatch=True,
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
                # )
158
            ],
159
160
161
162
163
164
165
        )

        # Compose root matcher according to the run type. This way the
        # unnecessary regex parsers will not be compiled and searched. Saves
        # computational time.
        self.root_matcher = SM("",
            forwardMatch=True,
166
            sections=["section_run", "section_sampling_method"],
167
            subMatchers=[
168
169
170
171
172
                SM( "",
                    forwardMatch=True,
                    sections=["section_method"],
                    subMatchers=[
                        self.cm.header(),
173
                        self.cm.quickstep_header(),
174
                    ],
175
                ),
176
177
                self.geo_opt,
                self.cm.footer(),
178
179
180
181
            ]
        )

    #===========================================================================
182
    # onClose triggers
183
184
185
    def onClose_x_cp2k_section_geometry_optimization(self, backend, gIndex, section):

        # Get the re-evaluated energy and add it to frame_sequence_potential_energy
186
187
188
189
190
        reeval_quickstep = self.energy_reeval_quickstep
        if reeval_quickstep is not None:
            energy = reeval_quickstep.get_latest_value("x_cp2k_energy_total")
            if energy is not None:
                self.cache_service["frame_sequence_potential_energy"].append(energy)
191

192
        # Push values from cache
193
194
        self.cache_service.addArrayValues("frame_sequence_potential_energy")
        self.cache_service.addValue("geometry_optimization_method")
195
        self.backend.addValue("frame_sequence_to_sampling_ref", 0)
196

197
198
199
200
201
202
203
204
205
206
207
208
        # Get the optimization convergence criteria from the last optimization
        # step
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_step_size_convergence_limit"],
            "geometry_optimization_geometry_change",
        )
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_gradient_convergence_limit"],
            "geometry_optimization_threshold_force",
        )
209

210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
        # Push the information into single configuration and system
        steps = section["x_cp2k_section_geometry_optimization_step"]
        each = self.cache_service["each_geo_opt"]
        add_last = False
        add_last_setting = self.cache_service["traj_add_last"]
        if add_last_setting == "NUMERIC" or add_last_setting == "SYMBOLIC":
            add_last = True

        # Push the trajectory
        n_steps = len(steps) + 1
        last_step = n_steps - 1
        for i_step in range(n_steps):
            singleId = backend.openSection("section_single_configuration_calculation")
            systemId = backend.openSection("section_system")

            if self.traj_iterator is not None:
                if (i_step + 1) % each == 0 or (i_step == last_step and add_last):
                    try:
                        pos = next(self.traj_iterator)
                    except StopIteration:
                        logger.error("Could not get the next geometries from an external file. It seems that the number of optimization steps in the CP2K outpufile doesn't match the number of steps found in the external trajectory file.")
                    else:
                        backend.addArrayValues("atom_positions", pos, unit="angstrom")
            backend.closeSection("section_system", systemId)
            backend.closeSection("section_single_configuration_calculation", singleId)

236
        self.cache_service.addArrayValues("frame_sequence_local_frames_ref")
237
238
        backend.addValue("number_of_frames_in_sequence", n_steps)

239
240
241
    def onClose_section_sampling_method(self, backend, gIndex, section):
        self.backend.addValue("sampling_method", "geometry_optimization")

242
243
244
    def onClose_x_cp2k_section_quickstep_calculation(self, backend, gIndex, section):
        self.energy_reeval_quickstep = section

245
246
247
248
    def onClose_x_cp2k_section_geometry_optimization_step(self, backend, gIndex, section):
        energy = section["x_cp2k_optimization_energy"]
        if energy is not None:
            self.cache_service["frame_sequence_potential_energy"].append(energy[0])
249

250
    def onClose_section_system(self, backend, gIndex, section):
251
        self.cache_service.addArrayValues("simulation_cell", unit="angstrom")
252

253
254
    def onClose_section_method(self, backend, gIndex, section):
        traj_file = self.file_service.get_file_by_id("trajectory")
255
256
257
258
259
        traj_format = self.cache_service["trajectory_format"]
        if traj_format is not None and traj_file is not None:

            # Use special parsing for CP2K pdb files because they don't follow the proper syntax
            if traj_format == "PDB":
260
                self.traj_iterator = nomadcore.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END")
261
262
            else:
                try:
263
                    self.traj_iterator = nomadcore.configurationreading.iread(traj_file)
264
265
                except ValueError:
                    pass
266

267
268
269
    def onClose_section_single_configuration_calculation(self, backend, gIndex, section):
        self.cache_service["frame_sequence_local_frames_ref"].append(gIndex)

270
    #===========================================================================
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
    # adHoc functions
    def adHoc_geo_opt_converged(self):
        """Called when the geometry optimization converged.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", True)
        return wrapper

    def adHoc_geo_opt_not_converged(self):
        """Called when the geometry optimization did not converge.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", False)
        return wrapper

286
287
288
289
    def adHoc_conjugate_gradient(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
290
            self.cache_service["geometry_optimization_method"] = "conjugate_gradient"
291
292
        return wrapper

293
294
295
296
    def adHoc_bfgs(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
297
            self.cache_service["geometry_optimization_method"] = "bfgs"
298
299
        return wrapper

300
    # def adHoc_save_energy_reeval_quickstep(self):
301
        # def wrapper(parser):
302
303
304
305
306
307
308
            # section_managers = parser.backend.sectionManagers
            # section_run_manager = section_managers["section_run"]
            # section_run = section_run_manager.openSections[0]
            # print section_run.subSectionValues
            # # quickstep = section_run.get_latest_value("x_cp2k_section_quickstep_calculation")
            # # print quickstep
            # # self.energy_reeval_quickstep = quickstep
309
        # return wrapper
310
311
312

    def debug(self):
        def wrapper(parser):
313
            print("DEBUG")
314
        return wrapper