geooptparser.py 16.5 KB
Newer Older
1
2
from __future__ import print_function
from __future__ import absolute_import
3
4
from builtins import next
from builtins import range
5
from nomadcore.simple_parser import SimpleMatcher as SM
6
from nomadcore.baseclasses import MainHierarchicalParser
7
from .commonparser import CP2KCommonParser
8
9
import cp2kparser.generic.configurationreading
import cp2kparser.generic.csvparsing
10
from nomadcore.caching_backend import CachingLevel
11
12
13
14
15
16
17
18
19
20
21
22
23
import logging
logger = logging.getLogger("nomad")


#===============================================================================
class CP2KGeoOptParser(MainHierarchicalParser):
    """Used to parse the CP2K calculation with run types:
        -GEO_OPT/GEOMETRY_OPTIMIZATION
    """
    def __init__(self, file_path, parser_context):
        """
        """
        super(CP2KGeoOptParser, self).__init__(file_path, parser_context)
24
        self.setup_common_matcher(CP2KCommonParser(parser_context))
25
        self.traj_iterator = None
26
        self.energy_reeval_quickstep = None
27

28
        #=======================================================================
29
        # Globally cached values
30
31
32
33
        self.cache_service.add("number_of_frames_in_sequence", 0)
        self.cache_service.add("frame_sequence_potential_energy", [])
        self.cache_service.add("frame_sequence_local_frames_ref", [])
        self.cache_service.add("geometry_optimization_method")
34
35
36

        #=======================================================================
        # Cache levels
37
        self.caching_levels.update({
38
39
40
            'x_cp2k_section_geometry_optimization_step': CachingLevel.ForwardAndCache,
            'x_cp2k_section_quickstep_calculation': CachingLevel.ForwardAndCache,
            'x_cp2k_section_geometry_optimization': CachingLevel.ForwardAndCache,
41
            # 'x_cp2k_section_geometry_optimization_energy_reevaluation': CachingLevel.ForwardAndCache,
42
43
44
45
        })

        #=======================================================================
        # SimpleMatchers
46
        self.geo_opt = SM(
47
            " ***                     STARTING GEOMETRY OPTIMIZATION                      ***".replace("*", "\*"),
48
            sections=["section_frame_sequence", "x_cp2k_section_geometry_optimization"],
49
            subMatchers=[
50
                SM( " ***                           CONJUGATE GRADIENTS                           ***".replace("*", "\*"),
51
                    adHoc=self.adHoc_conjugate_gradient(),
52
                    otherMetaInfo=["geometry_optimization_method"],
53
                ),
54
55
                SM( " ***                                   BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
56
                    otherMetaInfo=["geometry_optimization_method"],
57
58
59
                ),
                SM( " ***                                 L-BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
60
                    otherMetaInfo=["geometry_optimization_method"],
61
                ),
62
63
64
65
66
67
                # SM( "",
                    # forwardMatch=True,
                    # sections=["section_single_configuration_calculation", "section_system", "x_cp2k_section_geometry_optimization_step"],
                    # subMatchers=[
                        # self.cm.quickstep_calculation(),
                        # SM( " --------  Informations at step"),
68
69
                        # SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.regex_word)),
                        # SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.regex_f),
70
71
72
73
74
75
                            # otherMetaInfo=["frame_sequence_potential_energy"]
                        # ),
                    # ],
                    # otherMetaInfo=["atom_positions"],
                    # adHoc=self.adHoc_step(),
                # ),
76
                SM( " OPTIMIZATION STEP:",
77
                    endReStr="  Conv. in RMS gradients     =",
78
79
                    name="geooptstep",
                    repeats=True,
80
                    subMatchers=[
81
82
                        SM( "",
                            forwardMatch=True,
83
                            sections=["x_cp2k_section_geometry_optimization_step"],
84
85
86
                            otherMetaInfo=[
                                "atom_positions",
                            ],
87
                            subMatchers=[
88
89
                                # SM( "",
                                    # forwardMatch=True,
90
                                    # endReStr=" ***                 MNBRACK - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.regex_i),
91
92
93
94
95
96
97
98
99
100
101
102
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
                                # SM( "",
                                    # forwardMatch=True,
103
                                    # endReStr=" ***                 BRENT   - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.regex_i),
104
105
106
107
108
109
110
111
112
113
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
114
                                SM( " --------  Informations at step"),
115
116
                                SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.regex_word)),
                                SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.regex_f),
117
118
                                    otherMetaInfo=["frame_sequence_potential_energy"]
                                ),
119
120
121
122
123
                                SM( "  Real energy change         =\s+(?P<x_cp2k_optimization_energy_change__hartree>{})".format(self.regexs.regex_f)),
                                SM( "  Decrease in energy         =\s+(?P<x_cp2k_optimization_energy_decrease>{})".format(self.regexs.regex_word)),
                                SM( "  Used time                  =\s+(?P<x_cp2k_optimization_used_time>{})".format(self.regexs.regex_f)),
                                SM( "  Max. step size             =\s+(?P<x_cp2k_optimization_max_step_size__bohr>{})".format(self.regexs.regex_f)),
                                SM( "  Conv. limit for step size  =\s+(?P<x_cp2k_optimization_step_size_convergence_limit__bohr>{})".format(self.regexs.regex_f),
124
125
                                    otherMetaInfo=["geometry_optimization_geometry_change"]
                                ),
126
127
128
129
130
                                SM( "  Convergence in step size   =\s+(?P<x_cp2k_optimization_step_size_convergence>{})".format(self.regexs.regex_word)),
                                SM( "  RMS step size              =\s+(?P<x_cp2k_optimization_rms_step_size__bohr>{})".format(self.regexs.regex_f)),
                                SM( "  Convergence in RMS step    =\s+(?P<x_cp2k_optimization_rms_step_size_convergence>{})".format(self.regexs.regex_word)),
                                SM( "  Max. gradient              =\s+(?P<x_cp2k_optimization_max_gradient__bohr_1hartree>{})".format(self.regexs.regex_f)),
                                SM( "  Conv. limit for gradients  =\s+(?P<x_cp2k_optimization_gradient_convergence_limit__bohr_1hartree>{})".format(self.regexs.regex_f),
131
132
                                    otherMetaInfo=["geometry_optimization_threshold_force"]
                                ),
133
134
135
                                SM( "  Conv. for gradients        =\s+(?P<x_cp2k_optimization_max_gradient_convergence>{})".format(self.regexs.regex_word)),
                                SM( "  RMS gradient               =\s+(?P<x_cp2k_optimization_rms_gradient__bohr_1hartree>{})".format(self.regexs.regex_f)),
                                SM( "  Conv. in RMS gradients     =\s+(?P<x_cp2k_optimization_rms_gradient_convergence>{})".format(self.regexs.regex_word)),
136
                            ],
137
                            # adHoc=self.adHoc_step()
138
139
                        ),
                    ]
140
141
                ),
                SM( " ***                    GEOMETRY OPTIMIZATION COMPLETED                      ***".replace("*", "\*"),
142
143
144
                    adHoc=self.adHoc_geo_opt_converged(),
                    otherMetaInfo=["geometry_optimization_converged"]
                ),
145
                SM( "                    Reevaluating energy at the minimum",
146
                    # sections=["x_cp2k_section_geometry_optimization_energy_reevaluation"],
147
148
                    subMatchers=[
                        self.cm.quickstep_calculation(),
149
150
151
                        # SM("",
                            # adHoc=self.adHoc_save_energy_reeval_quickstep()
                        # )
152
                    ],
153
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
154
                ),
155
156
157
158
                # SM( "",
                    # forwardMatch=True,
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
                # )
159
            ],
160
161
162
163
164
165
166
        )

        # Compose root matcher according to the run type. This way the
        # unnecessary regex parsers will not be compiled and searched. Saves
        # computational time.
        self.root_matcher = SM("",
            forwardMatch=True,
167
            sections=["section_run", "section_sampling_method"],
168
            subMatchers=[
169
170
171
172
173
                SM( "",
                    forwardMatch=True,
                    sections=["section_method"],
                    subMatchers=[
                        self.cm.header(),
174
                        self.cm.quickstep_header(),
175
                    ],
176
                ),
177
178
                self.geo_opt,
                self.cm.footer(),
179
180
181
182
            ]
        )

    #===========================================================================
183
    # onClose triggers
184
185
186
    def onClose_x_cp2k_section_geometry_optimization(self, backend, gIndex, section):

        # Get the re-evaluated energy and add it to frame_sequence_potential_energy
187
188
189
190
191
        reeval_quickstep = self.energy_reeval_quickstep
        if reeval_quickstep is not None:
            energy = reeval_quickstep.get_latest_value("x_cp2k_energy_total")
            if energy is not None:
                self.cache_service["frame_sequence_potential_energy"].append(energy)
192

193
        # Push values from cache
194
        self.cache_service.push_array_values("frame_sequence_potential_energy")
195
196
        self.cache_service.push_value("geometry_optimization_method")
        self.backend.addValue("frame_sequence_to_sampling_ref", 0)
197

198
199
200
201
202
203
204
205
206
207
208
209
        # Get the optimization convergence criteria from the last optimization
        # step
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_step_size_convergence_limit"],
            "geometry_optimization_geometry_change",
        )
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_gradient_convergence_limit"],
            "geometry_optimization_threshold_force",
        )
210

211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
        # Push the information into single configuration and system
        steps = section["x_cp2k_section_geometry_optimization_step"]
        each = self.cache_service["each_geo_opt"]
        add_last = False
        add_last_setting = self.cache_service["traj_add_last"]
        if add_last_setting == "NUMERIC" or add_last_setting == "SYMBOLIC":
            add_last = True

        # Push the trajectory
        n_steps = len(steps) + 1
        last_step = n_steps - 1
        for i_step in range(n_steps):
            singleId = backend.openSection("section_single_configuration_calculation")
            systemId = backend.openSection("section_system")

            if self.traj_iterator is not None:
                if (i_step + 1) % each == 0 or (i_step == last_step and add_last):
                    try:
                        pos = next(self.traj_iterator)
                    except StopIteration:
                        logger.error("Could not get the next geometries from an external file. It seems that the number of optimization steps in the CP2K outpufile doesn't match the number of steps found in the external trajectory file.")
                    else:
                        backend.addArrayValues("atom_positions", pos, unit="angstrom")
            backend.closeSection("section_system", systemId)
            backend.closeSection("section_single_configuration_calculation", singleId)

        self.cache_service.push_array_values("frame_sequence_local_frames_ref")
        backend.addValue("number_of_frames_in_sequence", n_steps)

240
241
242
    def onClose_section_sampling_method(self, backend, gIndex, section):
        self.backend.addValue("sampling_method", "geometry_optimization")

243
244
245
    def onClose_x_cp2k_section_quickstep_calculation(self, backend, gIndex, section):
        self.energy_reeval_quickstep = section

246
247
248
249
    def onClose_x_cp2k_section_geometry_optimization_step(self, backend, gIndex, section):
        energy = section["x_cp2k_optimization_energy"]
        if energy is not None:
            self.cache_service["frame_sequence_potential_energy"].append(energy[0])
250

251
252
253
    def onClose_section_system(self, backend, gIndex, section):
        self.cache_service.push_array_values("simulation_cell", unit="angstrom")

254
255
    def onClose_section_method(self, backend, gIndex, section):
        traj_file = self.file_service.get_file_by_id("trajectory")
256
257
258
259
260
261
262
263
264
265
266
        traj_format = self.cache_service["trajectory_format"]
        if traj_format is not None and traj_file is not None:

            # Use special parsing for CP2K pdb files because they don't follow the proper syntax
            if traj_format == "PDB":
                self.traj_iterator = cp2kparser.generic.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END")
            else:
                try:
                    self.traj_iterator = cp2kparser.generic.configurationreading.iread(traj_file)
                except ValueError:
                    pass
267

268
269
270
    def onClose_section_single_configuration_calculation(self, backend, gIndex, section):
        self.cache_service["frame_sequence_local_frames_ref"].append(gIndex)

271
    #===========================================================================
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
    # adHoc functions
    def adHoc_geo_opt_converged(self):
        """Called when the geometry optimization converged.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", True)
        return wrapper

    def adHoc_geo_opt_not_converged(self):
        """Called when the geometry optimization did not converge.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", False)
        return wrapper

287
288
289
290
    def adHoc_conjugate_gradient(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
291
            self.cache_service["geometry_optimization_method"] = "conjugate_gradient"
292
293
        return wrapper

294
295
296
297
    def adHoc_bfgs(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
298
            self.cache_service["geometry_optimization_method"] = "bfgs"
299
300
        return wrapper

301
    # def adHoc_save_energy_reeval_quickstep(self):
302
        # def wrapper(parser):
303
304
305
306
307
308
309
            # section_managers = parser.backend.sectionManagers
            # section_run_manager = section_managers["section_run"]
            # section_run = section_run_manager.openSections[0]
            # print section_run.subSectionValues
            # # quickstep = section_run.get_latest_value("x_cp2k_section_quickstep_calculation")
            # # print quickstep
            # # self.energy_reeval_quickstep = quickstep
310
        # return wrapper
311
312
313

    def debug(self):
        def wrapper(parser):
314
            print("DEBUG")
315
        return wrapper