geooptparser.py 16.5 KB
Newer Older
1
2
from __future__ import print_function
from __future__ import absolute_import
3
4
from builtins import next
from builtins import range
5
from nomadcore.simple_parser import SimpleMatcher as SM
6
from nomadcore.baseclasses import MainHierarchicalParser
7
from .commonmatcher import CP2KCommonMatcher
8
9
import cp2kparser.generic.configurationreading
import cp2kparser.generic.csvparsing
10
from nomadcore.caching_backend import CachingLevel
11
12
13
14
15
16
17
18
19
20
21
22
23
import logging
logger = logging.getLogger("nomad")


#===============================================================================
class CP2KGeoOptParser(MainHierarchicalParser):
    """Used to parse the CP2K calculation with run types:
        -GEO_OPT/GEOMETRY_OPTIMIZATION
    """
    def __init__(self, file_path, parser_context):
        """
        """
        super(CP2KGeoOptParser, self).__init__(file_path, parser_context)
24
        self.setup_common_matcher(CP2KCommonMatcher(parser_context))
25
        self.traj_iterator = None
26
        self.energy_reeval_quickstep = None
27

28
        #=======================================================================
29
        # Globally cached values
30
31
32
33
        self.cache_service.add("number_of_frames_in_sequence", 0)
        self.cache_service.add("frame_sequence_potential_energy", [])
        self.cache_service.add("frame_sequence_local_frames_ref", [])
        self.cache_service.add("geometry_optimization_method")
34
35
36

        #=======================================================================
        # Cache levels
37
        self.caching_levels.update({
38
39
40
            'x_cp2k_section_geometry_optimization_step': CachingLevel.ForwardAndCache,
            'x_cp2k_section_quickstep_calculation': CachingLevel.ForwardAndCache,
            'x_cp2k_section_geometry_optimization': CachingLevel.ForwardAndCache,
41
            # 'x_cp2k_section_geometry_optimization_energy_reevaluation': CachingLevel.ForwardAndCache,
42
43
44
45
        })

        #=======================================================================
        # SimpleMatchers
46
        self.geo_opt = SM(
47
            " ***                     STARTING GEOMETRY OPTIMIZATION                      ***".replace("*", "\*"),
48
            sections=["section_frame_sequence", "x_cp2k_section_geometry_optimization"],
49
            subMatchers=[
50
                SM( " ***                           CONJUGATE GRADIENTS                           ***".replace("*", "\*"),
51
                    adHoc=self.adHoc_conjugate_gradient(),
52
                    otherMetaInfo=["geometry_optimization_method"],
53
                ),
54
55
                SM( " ***                                   BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
56
                    otherMetaInfo=["geometry_optimization_method"],
57
58
59
                ),
                SM( " ***                                 L-BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
60
                    otherMetaInfo=["geometry_optimization_method"],
61
                ),
62
63
64
65
66
67
                # SM( "",
                    # forwardMatch=True,
                    # sections=["section_single_configuration_calculation", "section_system", "x_cp2k_section_geometry_optimization_step"],
                    # subMatchers=[
                        # self.cm.quickstep_calculation(),
                        # SM( " --------  Informations at step"),
68
69
                        # SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.regex_word)),
                        # SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.regex_f),
70
71
72
73
74
75
                            # otherMetaInfo=["frame_sequence_potential_energy"]
                        # ),
                    # ],
                    # otherMetaInfo=["atom_positions"],
                    # adHoc=self.adHoc_step(),
                # ),
76
                SM( " OPTIMIZATION STEP:",
77
                    endReStr="  Conv. in RMS gradients     =",
78
79
                    name="geooptstep",
                    repeats=True,
80
                    subMatchers=[
81
82
                        SM( "",
                            forwardMatch=True,
83
                            sections=["x_cp2k_section_geometry_optimization_step"],
84
85
86
                            otherMetaInfo=[
                                "atom_positions",
                            ],
87
                            subMatchers=[
88
89
                                # SM( "",
                                    # forwardMatch=True,
90
                                    # endReStr=" ***                 MNBRACK - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.regex_i),
91
92
93
94
95
96
97
98
99
100
101
102
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
                                # SM( "",
                                    # forwardMatch=True,
103
                                    # endReStr=" ***                 BRENT   - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.regex_i),
104
105
106
107
108
109
110
111
112
113
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
114
                                SM( " --------  Informations at step"),
115
116
                                SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.regex_word)),
                                SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.regex_f),
117
118
                                    otherMetaInfo=["frame_sequence_potential_energy"]
                                ),
119
120
121
122
123
                                SM( "  Real energy change         =\s+(?P<x_cp2k_optimization_energy_change__hartree>{})".format(self.regexs.regex_f)),
                                SM( "  Decrease in energy         =\s+(?P<x_cp2k_optimization_energy_decrease>{})".format(self.regexs.regex_word)),
                                SM( "  Used time                  =\s+(?P<x_cp2k_optimization_used_time>{})".format(self.regexs.regex_f)),
                                SM( "  Max. step size             =\s+(?P<x_cp2k_optimization_max_step_size__bohr>{})".format(self.regexs.regex_f)),
                                SM( "  Conv. limit for step size  =\s+(?P<x_cp2k_optimization_step_size_convergence_limit__bohr>{})".format(self.regexs.regex_f),
124
125
                                    otherMetaInfo=["geometry_optimization_geometry_change"]
                                ),
126
127
128
129
130
                                SM( "  Convergence in step size   =\s+(?P<x_cp2k_optimization_step_size_convergence>{})".format(self.regexs.regex_word)),
                                SM( "  RMS step size              =\s+(?P<x_cp2k_optimization_rms_step_size__bohr>{})".format(self.regexs.regex_f)),
                                SM( "  Convergence in RMS step    =\s+(?P<x_cp2k_optimization_rms_step_size_convergence>{})".format(self.regexs.regex_word)),
                                SM( "  Max. gradient              =\s+(?P<x_cp2k_optimization_max_gradient__bohr_1hartree>{})".format(self.regexs.regex_f)),
                                SM( "  Conv. limit for gradients  =\s+(?P<x_cp2k_optimization_gradient_convergence_limit__bohr_1hartree>{})".format(self.regexs.regex_f),
131
132
                                    otherMetaInfo=["geometry_optimization_threshold_force"]
                                ),
133
134
135
                                SM( "  Conv. for gradients        =\s+(?P<x_cp2k_optimization_max_gradient_convergence>{})".format(self.regexs.regex_word)),
                                SM( "  RMS gradient               =\s+(?P<x_cp2k_optimization_rms_gradient__bohr_1hartree>{})".format(self.regexs.regex_f)),
                                SM( "  Conv. in RMS gradients     =\s+(?P<x_cp2k_optimization_rms_gradient_convergence>{})".format(self.regexs.regex_word)),
136
                            ],
137
                            # adHoc=self.adHoc_step()
138
139
                        ),
                    ]
140
141
                ),
                SM( " ***                    GEOMETRY OPTIMIZATION COMPLETED                      ***".replace("*", "\*"),
142
143
144
                    adHoc=self.adHoc_geo_opt_converged(),
                    otherMetaInfo=["geometry_optimization_converged"]
                ),
145
                SM( "                    Reevaluating energy at the minimum",
146
                    # sections=["x_cp2k_section_geometry_optimization_energy_reevaluation"],
147
148
                    subMatchers=[
                        self.cm.quickstep_calculation(),
149
150
151
                        # SM("",
                            # adHoc=self.adHoc_save_energy_reeval_quickstep()
                        # )
152
                    ],
153
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
154
                ),
155
156
157
158
                # SM( "",
                    # forwardMatch=True,
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
                # )
159
            ],
160
161
162
163
164
165
166
        )

        # Compose root matcher according to the run type. This way the
        # unnecessary regex parsers will not be compiled and searched. Saves
        # computational time.
        self.root_matcher = SM("",
            forwardMatch=True,
167
            sections=["section_run", "section_sampling_method"],
168
            subMatchers=[
169
170
171
172
173
                SM( "",
                    forwardMatch=True,
                    sections=["section_method"],
                    subMatchers=[
                        self.cm.header(),
174
                        self.cm.quickstep_header(),
175
                    ],
176
                ),
177
178
179
180
181
                self.geo_opt
            ]
        )

    #===========================================================================
182
    # onClose triggers
183
184
185
    def onClose_x_cp2k_section_geometry_optimization(self, backend, gIndex, section):

        # Get the re-evaluated energy and add it to frame_sequence_potential_energy
186
187
188
189
190
        reeval_quickstep = self.energy_reeval_quickstep
        if reeval_quickstep is not None:
            energy = reeval_quickstep.get_latest_value("x_cp2k_energy_total")
            if energy is not None:
                self.cache_service["frame_sequence_potential_energy"].append(energy)
191

192
        # Push values from cache
193
        self.cache_service.push_array_values("frame_sequence_potential_energy")
194
195
        self.cache_service.push_value("geometry_optimization_method")
        self.backend.addValue("frame_sequence_to_sampling_ref", 0)
196

197
198
199
200
201
202
203
204
205
206
207
208
        # Get the optimization convergence criteria from the last optimization
        # step
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_step_size_convergence_limit"],
            "geometry_optimization_geometry_change",
        )
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_gradient_convergence_limit"],
            "geometry_optimization_threshold_force",
        )
209

210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
        # Push the information into single configuration and system
        steps = section["x_cp2k_section_geometry_optimization_step"]
        each = self.cache_service["each_geo_opt"]
        add_last = False
        add_last_setting = self.cache_service["traj_add_last"]
        if add_last_setting == "NUMERIC" or add_last_setting == "SYMBOLIC":
            add_last = True

        # Push the trajectory
        n_steps = len(steps) + 1
        last_step = n_steps - 1
        for i_step in range(n_steps):
            singleId = backend.openSection("section_single_configuration_calculation")
            systemId = backend.openSection("section_system")

            if self.traj_iterator is not None:
                if (i_step + 1) % each == 0 or (i_step == last_step and add_last):
                    try:
                        pos = next(self.traj_iterator)
                    except StopIteration:
                        logger.error("Could not get the next geometries from an external file. It seems that the number of optimization steps in the CP2K outpufile doesn't match the number of steps found in the external trajectory file.")
                    else:
                        backend.addArrayValues("atom_positions", pos, unit="angstrom")
            backend.closeSection("section_system", systemId)
            backend.closeSection("section_single_configuration_calculation", singleId)

        self.cache_service.push_array_values("frame_sequence_local_frames_ref")
        backend.addValue("number_of_frames_in_sequence", n_steps)

239
240
241
    def onClose_section_sampling_method(self, backend, gIndex, section):
        self.backend.addValue("sampling_method", "geometry_optimization")

242
243
244
    def onClose_x_cp2k_section_quickstep_calculation(self, backend, gIndex, section):
        self.energy_reeval_quickstep = section

245
246
247
248
    def onClose_x_cp2k_section_geometry_optimization_step(self, backend, gIndex, section):
        energy = section["x_cp2k_optimization_energy"]
        if energy is not None:
            self.cache_service["frame_sequence_potential_energy"].append(energy[0])
249

250
251
252
    def onClose_section_system(self, backend, gIndex, section):
        self.cache_service.push_array_values("simulation_cell", unit="angstrom")

253
254
    def onClose_section_method(self, backend, gIndex, section):
        traj_file = self.file_service.get_file_by_id("trajectory")
255
256
257
258
259
260
261
262
263
264
265
        traj_format = self.cache_service["trajectory_format"]
        if traj_format is not None and traj_file is not None:

            # Use special parsing for CP2K pdb files because they don't follow the proper syntax
            if traj_format == "PDB":
                self.traj_iterator = cp2kparser.generic.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END")
            else:
                try:
                    self.traj_iterator = cp2kparser.generic.configurationreading.iread(traj_file)
                except ValueError:
                    pass
266

267
268
269
    def onClose_section_single_configuration_calculation(self, backend, gIndex, section):
        self.cache_service["frame_sequence_local_frames_ref"].append(gIndex)

270
    #===========================================================================
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
    # adHoc functions
    def adHoc_geo_opt_converged(self):
        """Called when the geometry optimization converged.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", True)
        return wrapper

    def adHoc_geo_opt_not_converged(self):
        """Called when the geometry optimization did not converge.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", False)
        return wrapper

286
287
288
289
    def adHoc_conjugate_gradient(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
290
            self.cache_service["geometry_optimization_method"] = "conjugate_gradient"
291
292
        return wrapper

293
294
295
296
    def adHoc_bfgs(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
297
            self.cache_service["geometry_optimization_method"] = "bfgs"
298
299
        return wrapper

300
    # def adHoc_save_energy_reeval_quickstep(self):
301
        # def wrapper(parser):
302
303
304
305
306
307
308
            # section_managers = parser.backend.sectionManagers
            # section_run_manager = section_managers["section_run"]
            # section_run = section_run_manager.openSections[0]
            # print section_run.subSectionValues
            # # quickstep = section_run.get_latest_value("x_cp2k_section_quickstep_calculation")
            # # print quickstep
            # # self.energy_reeval_quickstep = quickstep
309
        # return wrapper
310
311
312

    def debug(self):
        def wrapper(parser):
313
            print("DEBUG")
314
        return wrapper