geooptparser.py 16.4 KB
Newer Older
1
2
from __future__ import print_function
from __future__ import absolute_import
3
4
from builtins import next
from builtins import range
5
from nomadcore.simple_parser import SimpleMatcher as SM
6
from nomadcore.baseclasses import MainHierarchicalParser
7
8
import nomadcore.configurationreading
import nomadcore.csvparsing
9
from .commonparser import CP2KCommonParser
10
from nomadcore.caching_backend import CachingLevel
11
12
13
14
15
16
17
18
import logging
logger = logging.getLogger("nomad")


class CP2KGeoOptParser(MainHierarchicalParser):
    """Used to parse the CP2K calculation with run types:
        -GEO_OPT/GEOMETRY_OPTIMIZATION
    """
19
    def __init__(self, parser_context):
20
21
        """
        """
22
        super(CP2KGeoOptParser, self).__init__(parser_context)
23
        self.setup_common_matcher(CP2KCommonParser(parser_context))
24
        self.traj_iterator = None
25
        self.energy_reeval_quickstep = None
26

27
        #=======================================================================
28
        # Globally cached values
29
30
31
32
        self.cache_service.add("number_of_frames_in_sequence", 0)
        self.cache_service.add("frame_sequence_potential_energy", [])
        self.cache_service.add("frame_sequence_local_frames_ref", [])
        self.cache_service.add("geometry_optimization_method")
33
34
35

        #=======================================================================
        # Cache levels
36
        self.caching_levels.update({
37
38
39
            'x_cp2k_section_geometry_optimization_step': CachingLevel.ForwardAndCache,
            'x_cp2k_section_quickstep_calculation': CachingLevel.ForwardAndCache,
            'x_cp2k_section_geometry_optimization': CachingLevel.ForwardAndCache,
40
            # 'x_cp2k_section_geometry_optimization_energy_reevaluation': CachingLevel.ForwardAndCache,
41
42
43
44
        })

        #=======================================================================
        # SimpleMatchers
45
        self.geo_opt = SM(
46
            " ***                     STARTING GEOMETRY OPTIMIZATION                      ***".replace("*", "\*"),
47
            sections=["section_frame_sequence", "x_cp2k_section_geometry_optimization"],
48
            subMatchers=[
49
                SM( " ***                           CONJUGATE GRADIENTS                           ***".replace("*", "\*"),
50
                    adHoc=self.adHoc_conjugate_gradient(),
51
                    otherMetaInfo=["geometry_optimization_method"],
52
                ),
53
54
                SM( " ***                                   BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
55
                    otherMetaInfo=["geometry_optimization_method"],
56
57
58
                ),
                SM( " ***                                 L-BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
59
                    otherMetaInfo=["geometry_optimization_method"],
60
                ),
61
62
63
64
65
66
                # SM( "",
                    # forwardMatch=True,
                    # sections=["section_single_configuration_calculation", "section_system", "x_cp2k_section_geometry_optimization_step"],
                    # subMatchers=[
                        # self.cm.quickstep_calculation(),
                        # SM( " --------  Informations at step"),
67
68
                        # SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.word)),
                        # SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.float),
69
70
71
72
73
74
                            # otherMetaInfo=["frame_sequence_potential_energy"]
                        # ),
                    # ],
                    # otherMetaInfo=["atom_positions"],
                    # adHoc=self.adHoc_step(),
                # ),
75
                SM( " OPTIMIZATION STEP:",
76
                    endReStr="  Conv. in RMS gradients     =",
77
78
                    name="geooptstep",
                    repeats=True,
79
                    subMatchers=[
80
81
                        SM( "",
                            forwardMatch=True,
82
                            sections=["x_cp2k_section_geometry_optimization_step"],
83
84
85
                            otherMetaInfo=[
                                "atom_positions",
                            ],
86
                            subMatchers=[
87
88
                                # SM( "",
                                    # forwardMatch=True,
89
                                    # endReStr=" ***                 MNBRACK - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.int),
90
91
92
93
94
95
96
97
98
99
100
101
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
                                # SM( "",
                                    # forwardMatch=True,
102
                                    # endReStr=" ***                 BRENT   - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.int),
103
104
105
106
107
108
109
110
111
112
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
113
                                SM( " --------  Informations at step"),
114
115
                                SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.word)),
                                SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.float),
116
117
                                    otherMetaInfo=["frame_sequence_potential_energy"]
                                ),
118
119
120
121
122
                                SM( "  Real energy change         =\s+(?P<x_cp2k_optimization_energy_change__hartree>{})".format(self.regexs.float)),
                                SM( "  Decrease in energy         =\s+(?P<x_cp2k_optimization_energy_decrease>{})".format(self.regexs.word)),
                                SM( "  Used time                  =\s+(?P<x_cp2k_optimization_used_time>{})".format(self.regexs.float)),
                                SM( "  Max. step size             =\s+(?P<x_cp2k_optimization_max_step_size__bohr>{})".format(self.regexs.float)),
                                SM( "  Conv. limit for step size  =\s+(?P<x_cp2k_optimization_step_size_convergence_limit__bohr>{})".format(self.regexs.float),
123
124
                                    otherMetaInfo=["geometry_optimization_geometry_change"]
                                ),
125
126
127
128
129
                                SM( "  Convergence in step size   =\s+(?P<x_cp2k_optimization_step_size_convergence>{})".format(self.regexs.word)),
                                SM( "  RMS step size              =\s+(?P<x_cp2k_optimization_rms_step_size__bohr>{})".format(self.regexs.float)),
                                SM( "  Convergence in RMS step    =\s+(?P<x_cp2k_optimization_rms_step_size_convergence>{})".format(self.regexs.word)),
                                SM( "  Max. gradient              =\s+(?P<x_cp2k_optimization_max_gradient__bohr_1hartree>{})".format(self.regexs.float)),
                                SM( "  Conv. limit for gradients  =\s+(?P<x_cp2k_optimization_gradient_convergence_limit__bohr_1hartree>{})".format(self.regexs.float),
130
131
                                    otherMetaInfo=["geometry_optimization_threshold_force"]
                                ),
132
133
134
                                SM( "  Conv. for gradients        =\s+(?P<x_cp2k_optimization_max_gradient_convergence>{})".format(self.regexs.word)),
                                SM( "  RMS gradient               =\s+(?P<x_cp2k_optimization_rms_gradient__bohr_1hartree>{})".format(self.regexs.float)),
                                SM( "  Conv. in RMS gradients     =\s+(?P<x_cp2k_optimization_rms_gradient_convergence>{})".format(self.regexs.word)),
135
                            ],
136
                            # adHoc=self.adHoc_step()
137
138
                        ),
                    ]
139
140
                ),
                SM( " ***                    GEOMETRY OPTIMIZATION COMPLETED                      ***".replace("*", "\*"),
141
142
143
                    adHoc=self.adHoc_geo_opt_converged(),
                    otherMetaInfo=["geometry_optimization_converged"]
                ),
144
                SM( "                    Reevaluating energy at the minimum",
145
                    # sections=["x_cp2k_section_geometry_optimization_energy_reevaluation"],
146
147
                    subMatchers=[
                        self.cm.quickstep_calculation(),
148
149
150
                        # SM("",
                            # adHoc=self.adHoc_save_energy_reeval_quickstep()
                        # )
151
                    ],
152
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
153
                ),
154
155
156
157
                # SM( "",
                    # forwardMatch=True,
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
                # )
158
            ],
159
160
161
162
163
164
165
        )

        # Compose root matcher according to the run type. This way the
        # unnecessary regex parsers will not be compiled and searched. Saves
        # computational time.
        self.root_matcher = SM("",
            forwardMatch=True,
166
            sections=["section_run", "section_sampling_method"],
167
            subMatchers=[
168
169
170
171
172
                SM( "",
                    forwardMatch=True,
                    sections=["section_method"],
                    subMatchers=[
                        self.cm.header(),
173
                        self.cm.quickstep_header(),
174
                    ],
175
                ),
176
177
                self.geo_opt,
                self.cm.footer(),
178
179
180
181
            ]
        )

    #===========================================================================
182
    # onClose triggers
183
184
185
    def onClose_x_cp2k_section_geometry_optimization(self, backend, gIndex, section):

        # Get the re-evaluated energy and add it to frame_sequence_potential_energy
186
187
188
189
190
        reeval_quickstep = self.energy_reeval_quickstep
        if reeval_quickstep is not None:
            energy = reeval_quickstep.get_latest_value("x_cp2k_energy_total")
            if energy is not None:
                self.cache_service["frame_sequence_potential_energy"].append(energy)
191

192
        # Push values from cache
193
194
        self.cache_service.addArrayValues("frame_sequence_potential_energy")
        self.cache_service.addValue("geometry_optimization_method")
195
        self.backend.addValue("frame_sequence_to_sampling_ref", 0)
196

197
198
199
200
201
202
203
204
205
206
207
208
        # Get the optimization convergence criteria from the last optimization
        # step
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_step_size_convergence_limit"],
            "geometry_optimization_geometry_change",
        )
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_gradient_convergence_limit"],
            "geometry_optimization_threshold_force",
        )
209

210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
        # Push the information into single configuration and system
        steps = section["x_cp2k_section_geometry_optimization_step"]
        each = self.cache_service["each_geo_opt"]
        add_last = False
        add_last_setting = self.cache_service["traj_add_last"]
        if add_last_setting == "NUMERIC" or add_last_setting == "SYMBOLIC":
            add_last = True

        # Push the trajectory
        n_steps = len(steps) + 1
        last_step = n_steps - 1
        for i_step in range(n_steps):
            singleId = backend.openSection("section_single_configuration_calculation")
            systemId = backend.openSection("section_system")

            if self.traj_iterator is not None:
                if (i_step + 1) % each == 0 or (i_step == last_step and add_last):
                    try:
                        pos = next(self.traj_iterator)
                    except StopIteration:
                        logger.error("Could not get the next geometries from an external file. It seems that the number of optimization steps in the CP2K outpufile doesn't match the number of steps found in the external trajectory file.")
                    else:
                        backend.addArrayValues("atom_positions", pos, unit="angstrom")
            backend.closeSection("section_system", systemId)
            backend.closeSection("section_single_configuration_calculation", singleId)

236
        self.cache_service.addArrayValues("frame_sequence_local_frames_ref")
237
238
        backend.addValue("number_of_frames_in_sequence", n_steps)

239
240
241
    def onClose_section_sampling_method(self, backend, gIndex, section):
        self.backend.addValue("sampling_method", "geometry_optimization")

242
243
244
    def onClose_x_cp2k_section_quickstep_calculation(self, backend, gIndex, section):
        self.energy_reeval_quickstep = section

245
246
247
248
    def onClose_x_cp2k_section_geometry_optimization_step(self, backend, gIndex, section):
        energy = section["x_cp2k_optimization_energy"]
        if energy is not None:
            self.cache_service["frame_sequence_potential_energy"].append(energy[0])
249

250
    def onClose_section_system(self, backend, gIndex, section):
251
        self.cache_service.addArrayValues("simulation_cell", unit="angstrom")
252
        self.cache_service.addArrayValues("lattice_vectors", unit="angstrom")
253

254
255
    def onClose_section_method(self, backend, gIndex, section):
        traj_file = self.file_service.get_file_by_id("trajectory")
256
257
258
259
260
        traj_format = self.cache_service["trajectory_format"]
        if traj_format is not None and traj_file is not None:

            # Use special parsing for CP2K pdb files because they don't follow the proper syntax
            if traj_format == "PDB":
261
                self.traj_iterator = nomadcore.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END")
262
263
            else:
                try:
264
                    self.traj_iterator = nomadcore.configurationreading.iread(traj_file)
265
266
                except ValueError:
                    pass
267

268
269
270
    def onClose_section_single_configuration_calculation(self, backend, gIndex, section):
        self.cache_service["frame_sequence_local_frames_ref"].append(gIndex)

271
    #===========================================================================
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
    # adHoc functions
    def adHoc_geo_opt_converged(self):
        """Called when the geometry optimization converged.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", True)
        return wrapper

    def adHoc_geo_opt_not_converged(self):
        """Called when the geometry optimization did not converge.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", False)
        return wrapper

287
288
289
290
    def adHoc_conjugate_gradient(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
291
            self.cache_service["geometry_optimization_method"] = "conjugate_gradient"
292
293
        return wrapper

294
295
296
297
    def adHoc_bfgs(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
298
            self.cache_service["geometry_optimization_method"] = "bfgs"
299
300
        return wrapper

301
    # def adHoc_save_energy_reeval_quickstep(self):
302
        # def wrapper(parser):
303
304
305
306
307
308
309
            # section_managers = parser.backend.sectionManagers
            # section_run_manager = section_managers["section_run"]
            # section_run = section_run_manager.openSections[0]
            # print section_run.subSectionValues
            # # quickstep = section_run.get_latest_value("x_cp2k_section_quickstep_calculation")
            # # print quickstep
            # # self.energy_reeval_quickstep = quickstep
310
        # return wrapper
311
312
313

    def debug(self):
        def wrapper(parser):
314
            print("DEBUG")
315
        return wrapper