geooptparser.py 16.4 KB
Newer Older
1
2
from __future__ import print_function
from __future__ import absolute_import
3
from nomadcore.simple_parser import SimpleMatcher as SM
4
from nomadcore.baseclasses import MainHierarchicalParser
5
from .commonmatcher import CommonMatcher
6
7
import cp2kparser.generic.configurationreading
import cp2kparser.generic.csvparsing
8
from nomadcore.caching_backend import CachingLevel
9
10
11
12
13
14
15
16
17
18
19
20
21
import logging
logger = logging.getLogger("nomad")


#===============================================================================
class CP2KGeoOptParser(MainHierarchicalParser):
    """Used to parse the CP2K calculation with run types:
        -GEO_OPT/GEOMETRY_OPTIMIZATION
    """
    def __init__(self, file_path, parser_context):
        """
        """
        super(CP2KGeoOptParser, self).__init__(file_path, parser_context)
22
        self.setup_common_matcher(CommonMatcher(parser_context))
23
        self.traj_iterator = None
24
        self.energy_reeval_quickstep = None
25

26
        #=======================================================================
27
        # Globally cached values
28
29
30
31
        self.cache_service.add("number_of_frames_in_sequence", 0)
        self.cache_service.add("frame_sequence_potential_energy", [])
        self.cache_service.add("frame_sequence_local_frames_ref", [])
        self.cache_service.add("geometry_optimization_method")
32
33
34
35

        #=======================================================================
        # Cache levels
        self.caching_level_for_metaname.update({
36
37
38
            'x_cp2k_section_geometry_optimization_step': CachingLevel.ForwardAndCache,
            'x_cp2k_section_quickstep_calculation': CachingLevel.ForwardAndCache,
            'x_cp2k_section_geometry_optimization': CachingLevel.ForwardAndCache,
39
            # 'x_cp2k_section_geometry_optimization_energy_reevaluation': CachingLevel.ForwardAndCache,
40
41
42
43
        })

        #=======================================================================
        # SimpleMatchers
44
        self.geo_opt = SM(
45
            " ***                     STARTING GEOMETRY OPTIMIZATION                      ***".replace("*", "\*"),
46
            sections=["section_frame_sequence", "x_cp2k_section_geometry_optimization"],
47
            subMatchers=[
48
                SM( " ***                           CONJUGATE GRADIENTS                           ***".replace("*", "\*"),
49
                    adHoc=self.adHoc_conjugate_gradient(),
50
                    otherMetaInfo=["geometry_optimization_method"],
51
                ),
52
53
                SM( " ***                                   BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
54
                    otherMetaInfo=["geometry_optimization_method"],
55
56
57
                ),
                SM( " ***                                 L-BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
58
                    otherMetaInfo=["geometry_optimization_method"],
59
                ),
60
61
62
63
64
65
66
67
68
69
70
71
72
73
                # SM( "",
                    # forwardMatch=True,
                    # sections=["section_single_configuration_calculation", "section_system", "x_cp2k_section_geometry_optimization_step"],
                    # subMatchers=[
                        # self.cm.quickstep_calculation(),
                        # SM( " --------  Informations at step"),
                        # SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.cm.regex_word)),
                        # SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.cm.regex_f),
                            # otherMetaInfo=["frame_sequence_potential_energy"]
                        # ),
                    # ],
                    # otherMetaInfo=["atom_positions"],
                    # adHoc=self.adHoc_step(),
                # ),
74
                SM( " OPTIMIZATION STEP:",
75
                    endReStr="  Conv. in RMS gradients     =",
76
77
                    name="geooptstep",
                    repeats=True,
78
                    subMatchers=[
79
80
                        SM( "",
                            forwardMatch=True,
81
                            sections=["x_cp2k_section_geometry_optimization_step"],
82
83
84
                            otherMetaInfo=[
                                "atom_positions",
                            ],
85
                            subMatchers=[
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
                                # SM( "",
                                    # forwardMatch=True,
                                    # endReStr=" ***                 MNBRACK - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.cm.regex_i),
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
                                # SM( "",
                                    # forwardMatch=True,
                                    # endReStr=" ***                 BRENT   - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.cm.regex_i),
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
112
                                SM( " --------  Informations at step"),
113
                                SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.cm.regex_word)),
114
115
116
                                SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.cm.regex_f),
                                    otherMetaInfo=["frame_sequence_potential_energy"]
                                ),
117
118
119
120
                                SM( "  Real energy change         =\s+(?P<x_cp2k_optimization_energy_change__hartree>{})".format(self.cm.regex_f)),
                                SM( "  Decrease in energy         =\s+(?P<x_cp2k_optimization_energy_decrease>{})".format(self.cm.regex_word)),
                                SM( "  Used time                  =\s+(?P<x_cp2k_optimization_used_time>{})".format(self.cm.regex_f)),
                                SM( "  Max. step size             =\s+(?P<x_cp2k_optimization_max_step_size__bohr>{})".format(self.cm.regex_f)),
121
122
123
                                SM( "  Conv. limit for step size  =\s+(?P<x_cp2k_optimization_step_size_convergence_limit__bohr>{})".format(self.cm.regex_f),
                                    otherMetaInfo=["geometry_optimization_geometry_change"]
                                ),
124
125
126
127
                                SM( "  Convergence in step size   =\s+(?P<x_cp2k_optimization_step_size_convergence>{})".format(self.cm.regex_word)),
                                SM( "  RMS step size              =\s+(?P<x_cp2k_optimization_rms_step_size__bohr>{})".format(self.cm.regex_f)),
                                SM( "  Convergence in RMS step    =\s+(?P<x_cp2k_optimization_rms_step_size_convergence>{})".format(self.cm.regex_word)),
                                SM( "  Max. gradient              =\s+(?P<x_cp2k_optimization_max_gradient__bohr_1hartree>{})".format(self.cm.regex_f)),
128
129
130
                                SM( "  Conv. limit for gradients  =\s+(?P<x_cp2k_optimization_gradient_convergence_limit__bohr_1hartree>{})".format(self.cm.regex_f),
                                    otherMetaInfo=["geometry_optimization_threshold_force"]
                                ),
131
132
133
134
                                SM( "  Conv. for gradients        =\s+(?P<x_cp2k_optimization_max_gradient_convergence>{})".format(self.cm.regex_word)),
                                SM( "  RMS gradient               =\s+(?P<x_cp2k_optimization_rms_gradient__bohr_1hartree>{})".format(self.cm.regex_f)),
                                SM( "  Conv. in RMS gradients     =\s+(?P<x_cp2k_optimization_rms_gradient_convergence>{})".format(self.cm.regex_word)),
                            ],
135
                            # adHoc=self.adHoc_step()
136
137
                        ),
                    ]
138
139
                ),
                SM( " ***                    GEOMETRY OPTIMIZATION COMPLETED                      ***".replace("*", "\*"),
140
141
142
                    adHoc=self.adHoc_geo_opt_converged(),
                    otherMetaInfo=["geometry_optimization_converged"]
                ),
143
                SM( "                    Reevaluating energy at the minimum",
144
                    # sections=["x_cp2k_section_geometry_optimization_energy_reevaluation"],
145
146
                    subMatchers=[
                        self.cm.quickstep_calculation(),
147
148
149
                        # SM("",
                            # adHoc=self.adHoc_save_energy_reeval_quickstep()
                        # )
150
                    ],
151
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
152
                ),
153
154
155
156
                # SM( "",
                    # forwardMatch=True,
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
                # )
157
            ],
158
159
160
161
162
163
164
        )

        # Compose root matcher according to the run type. This way the
        # unnecessary regex parsers will not be compiled and searched. Saves
        # computational time.
        self.root_matcher = SM("",
            forwardMatch=True,
165
            sections=["section_run", "section_sampling_method"],
166
            subMatchers=[
167
168
169
170
171
                SM( "",
                    forwardMatch=True,
                    sections=["section_method"],
                    subMatchers=[
                        self.cm.header(),
172
                        self.cm.quickstep_header(),
173
                    ],
174
                ),
175
176
177
178
179
                self.geo_opt
            ]
        )

    #===========================================================================
180
    # onClose triggers
181
182
183
    def onClose_x_cp2k_section_geometry_optimization(self, backend, gIndex, section):

        # Get the re-evaluated energy and add it to frame_sequence_potential_energy
184
185
186
187
188
        reeval_quickstep = self.energy_reeval_quickstep
        if reeval_quickstep is not None:
            energy = reeval_quickstep.get_latest_value("x_cp2k_energy_total")
            if energy is not None:
                self.cache_service["frame_sequence_potential_energy"].append(energy)
189

190
        # Push values from cache
191
        self.cache_service.push_array_values("frame_sequence_potential_energy")
192
193
        self.cache_service.push_value("geometry_optimization_method")
        self.backend.addValue("frame_sequence_to_sampling_ref", 0)
194

195
196
197
198
199
200
201
202
203
204
205
206
        # Get the optimization convergence criteria from the last optimization
        # step
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_step_size_convergence_limit"],
            "geometry_optimization_geometry_change",
        )
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_gradient_convergence_limit"],
            "geometry_optimization_threshold_force",
        )
207

208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
        # Push the information into single configuration and system
        steps = section["x_cp2k_section_geometry_optimization_step"]
        each = self.cache_service["each_geo_opt"]
        add_last = False
        add_last_setting = self.cache_service["traj_add_last"]
        if add_last_setting == "NUMERIC" or add_last_setting == "SYMBOLIC":
            add_last = True

        # Push the trajectory
        n_steps = len(steps) + 1
        last_step = n_steps - 1
        for i_step in range(n_steps):
            singleId = backend.openSection("section_single_configuration_calculation")
            systemId = backend.openSection("section_system")

            if self.traj_iterator is not None:
                if (i_step + 1) % each == 0 or (i_step == last_step and add_last):
                    try:
                        pos = next(self.traj_iterator)
                    except StopIteration:
                        logger.error("Could not get the next geometries from an external file. It seems that the number of optimization steps in the CP2K outpufile doesn't match the number of steps found in the external trajectory file.")
                    else:
                        backend.addArrayValues("atom_positions", pos, unit="angstrom")
            backend.closeSection("section_system", systemId)
            backend.closeSection("section_single_configuration_calculation", singleId)

        self.cache_service.push_array_values("frame_sequence_local_frames_ref")
        backend.addValue("number_of_frames_in_sequence", n_steps)

237
238
239
    def onClose_section_sampling_method(self, backend, gIndex, section):
        self.backend.addValue("sampling_method", "geometry_optimization")

240
241
242
    def onClose_x_cp2k_section_quickstep_calculation(self, backend, gIndex, section):
        self.energy_reeval_quickstep = section

243
244
245
246
    def onClose_x_cp2k_section_geometry_optimization_step(self, backend, gIndex, section):
        energy = section["x_cp2k_optimization_energy"]
        if energy is not None:
            self.cache_service["frame_sequence_potential_energy"].append(energy[0])
247

248
249
250
    def onClose_section_system(self, backend, gIndex, section):
        self.cache_service.push_array_values("simulation_cell", unit="angstrom")

251
252
    def onClose_section_method(self, backend, gIndex, section):
        traj_file = self.file_service.get_file_by_id("trajectory")
253
254
255
256
257
258
259
260
261
262
263
        traj_format = self.cache_service["trajectory_format"]
        if traj_format is not None and traj_file is not None:

            # Use special parsing for CP2K pdb files because they don't follow the proper syntax
            if traj_format == "PDB":
                self.traj_iterator = cp2kparser.generic.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END")
            else:
                try:
                    self.traj_iterator = cp2kparser.generic.configurationreading.iread(traj_file)
                except ValueError:
                    pass
264

265
266
267
    def onClose_section_single_configuration_calculation(self, backend, gIndex, section):
        self.cache_service["frame_sequence_local_frames_ref"].append(gIndex)

268
    #===========================================================================
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
    # adHoc functions
    def adHoc_geo_opt_converged(self):
        """Called when the geometry optimization converged.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", True)
        return wrapper

    def adHoc_geo_opt_not_converged(self):
        """Called when the geometry optimization did not converge.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", False)
        return wrapper

284
285
286
287
    def adHoc_conjugate_gradient(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
288
            self.cache_service["geometry_optimization_method"] = "conjugate_gradient"
289
290
        return wrapper

291
292
293
294
    def adHoc_bfgs(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
295
            self.cache_service["geometry_optimization_method"] = "bfgs"
296
297
        return wrapper

298
    # def adHoc_save_energy_reeval_quickstep(self):
299
        # def wrapper(parser):
300
301
302
303
304
305
306
            # section_managers = parser.backend.sectionManagers
            # section_run_manager = section_managers["section_run"]
            # section_run = section_run_manager.openSections[0]
            # print section_run.subSectionValues
            # # quickstep = section_run.get_latest_value("x_cp2k_section_quickstep_calculation")
            # # print quickstep
            # # self.energy_reeval_quickstep = quickstep
307
        # return wrapper
308
309
310

    def debug(self):
        def wrapper(parser):
311
            print("DEBUG")
312
        return wrapper