geooptparser.py 16.1 KB
Newer Older
1
from nomadcore.simple_parser import SimpleMatcher as SM
2
from nomadcore.baseclasses import MainHierarchicalParser
3
from commonmatcher import CommonMatcher
4
5
import cp2kparser.generic.configurationreading
import cp2kparser.generic.csvparsing
6
from nomadcore.caching_backend import CachingLevel
7
8
9
10
11
12
13
14
15
16
17
18
19
import logging
logger = logging.getLogger("nomad")


#===============================================================================
class CP2KGeoOptParser(MainHierarchicalParser):
    """Used to parse the CP2K calculation with run types:
        -GEO_OPT/GEOMETRY_OPTIMIZATION
    """
    def __init__(self, file_path, parser_context):
        """
        """
        super(CP2KGeoOptParser, self).__init__(file_path, parser_context)
20
        self.setup_common_matcher(CommonMatcher(parser_context))
21
        self.traj_iterator = None
22
        self.energy_reeval_quickstep = None
23

24
        #=======================================================================
25
        # Globally cached values
26
27
28
29
        self.cache_service.add("number_of_frames_in_sequence", 0)
        self.cache_service.add("frame_sequence_potential_energy", [])
        self.cache_service.add("frame_sequence_local_frames_ref", [])
        self.cache_service.add("geometry_optimization_method")
30
31
32
33

        #=======================================================================
        # Cache levels
        self.caching_level_for_metaname.update({
34
35
36
            'x_cp2k_section_geometry_optimization_step': CachingLevel.ForwardAndCache,
            'x_cp2k_section_quickstep_calculation': CachingLevel.ForwardAndCache,
            'x_cp2k_section_geometry_optimization': CachingLevel.ForwardAndCache,
37
            # 'x_cp2k_section_geometry_optimization_energy_reevaluation': CachingLevel.ForwardAndCache,
38
39
40
41
        })

        #=======================================================================
        # SimpleMatchers
42
        self.geo_opt = SM(
43
            " ***                     STARTING GEOMETRY OPTIMIZATION                      ***".replace("*", "\*"),
44
            sections=["section_frame_sequence", "x_cp2k_section_geometry_optimization"],
45
            subMatchers=[
46
                SM( " ***                           CONJUGATE GRADIENTS                           ***".replace("*", "\*"),
47
                    adHoc=self.adHoc_conjugate_gradient(),
48
                    otherMetaInfo=["geometry_optimization_method"],
49
                ),
50
51
                SM( " ***                                   BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
52
                    otherMetaInfo=["geometry_optimization_method"],
53
54
55
                ),
                SM( " ***                                 L-BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
56
                    otherMetaInfo=["geometry_optimization_method"],
57
                ),
58
59
60
61
62
63
64
65
66
67
68
69
70
71
                # SM( "",
                    # forwardMatch=True,
                    # sections=["section_single_configuration_calculation", "section_system", "x_cp2k_section_geometry_optimization_step"],
                    # subMatchers=[
                        # self.cm.quickstep_calculation(),
                        # SM( " --------  Informations at step"),
                        # SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.cm.regex_word)),
                        # SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.cm.regex_f),
                            # otherMetaInfo=["frame_sequence_potential_energy"]
                        # ),
                    # ],
                    # otherMetaInfo=["atom_positions"],
                    # adHoc=self.adHoc_step(),
                # ),
72
                SM( " OPTIMIZATION STEP:",
73
                    endReStr="  Conv. in RMS gradients     =",
74
75
                    name="geooptstep",
                    repeats=True,
76
                    subMatchers=[
77
78
                        SM( "",
                            forwardMatch=True,
79
                            sections=["x_cp2k_section_geometry_optimization_step"],
80
81
82
                            otherMetaInfo=[
                                "atom_positions",
                            ],
83
                            subMatchers=[
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
                                # SM( "",
                                    # forwardMatch=True,
                                    # endReStr=" ***                 MNBRACK - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.cm.regex_i),
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
                                # SM( "",
                                    # forwardMatch=True,
                                    # endReStr=" ***                 BRENT   - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.cm.regex_i),
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
110
                                SM( " --------  Informations at step"),
111
                                SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.cm.regex_word)),
112
113
114
                                SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.cm.regex_f),
                                    otherMetaInfo=["frame_sequence_potential_energy"]
                                ),
115
116
117
118
                                SM( "  Real energy change         =\s+(?P<x_cp2k_optimization_energy_change__hartree>{})".format(self.cm.regex_f)),
                                SM( "  Decrease in energy         =\s+(?P<x_cp2k_optimization_energy_decrease>{})".format(self.cm.regex_word)),
                                SM( "  Used time                  =\s+(?P<x_cp2k_optimization_used_time>{})".format(self.cm.regex_f)),
                                SM( "  Max. step size             =\s+(?P<x_cp2k_optimization_max_step_size__bohr>{})".format(self.cm.regex_f)),
119
120
121
                                SM( "  Conv. limit for step size  =\s+(?P<x_cp2k_optimization_step_size_convergence_limit__bohr>{})".format(self.cm.regex_f),
                                    otherMetaInfo=["geometry_optimization_geometry_change"]
                                ),
122
123
124
125
                                SM( "  Convergence in step size   =\s+(?P<x_cp2k_optimization_step_size_convergence>{})".format(self.cm.regex_word)),
                                SM( "  RMS step size              =\s+(?P<x_cp2k_optimization_rms_step_size__bohr>{})".format(self.cm.regex_f)),
                                SM( "  Convergence in RMS step    =\s+(?P<x_cp2k_optimization_rms_step_size_convergence>{})".format(self.cm.regex_word)),
                                SM( "  Max. gradient              =\s+(?P<x_cp2k_optimization_max_gradient__bohr_1hartree>{})".format(self.cm.regex_f)),
126
127
128
                                SM( "  Conv. limit for gradients  =\s+(?P<x_cp2k_optimization_gradient_convergence_limit__bohr_1hartree>{})".format(self.cm.regex_f),
                                    otherMetaInfo=["geometry_optimization_threshold_force"]
                                ),
129
130
131
132
                                SM( "  Conv. for gradients        =\s+(?P<x_cp2k_optimization_max_gradient_convergence>{})".format(self.cm.regex_word)),
                                SM( "  RMS gradient               =\s+(?P<x_cp2k_optimization_rms_gradient__bohr_1hartree>{})".format(self.cm.regex_f)),
                                SM( "  Conv. in RMS gradients     =\s+(?P<x_cp2k_optimization_rms_gradient_convergence>{})".format(self.cm.regex_word)),
                            ],
133
                            # adHoc=self.adHoc_step()
134
135
                        ),
                    ]
136
137
                ),
                SM( " ***                    GEOMETRY OPTIMIZATION COMPLETED                      ***".replace("*", "\*"),
138
139
140
                    adHoc=self.adHoc_geo_opt_converged(),
                    otherMetaInfo=["geometry_optimization_converged"]
                ),
141
                SM( "                    Reevaluating energy at the minimum",
142
                    # sections=["x_cp2k_section_geometry_optimization_energy_reevaluation"],
143
144
                    subMatchers=[
                        self.cm.quickstep_calculation(),
145
146
147
                        # SM("",
                            # adHoc=self.adHoc_save_energy_reeval_quickstep()
                        # )
148
                    ],
149
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
150
                ),
151
152
153
154
                # SM( "",
                    # forwardMatch=True,
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
                # )
155
            ],
156
157
158
159
160
161
162
        )

        # Compose root matcher according to the run type. This way the
        # unnecessary regex parsers will not be compiled and searched. Saves
        # computational time.
        self.root_matcher = SM("",
            forwardMatch=True,
163
            sections=["section_run", "section_sampling_method"],
164
            subMatchers=[
165
166
167
168
169
                SM( "",
                    forwardMatch=True,
                    sections=["section_method"],
                    subMatchers=[
                        self.cm.header(),
170
                        self.cm.quickstep_header(),
171
                    ],
172
                ),
173
174
175
176
177
                self.geo_opt
            ]
        )

    #===========================================================================
178
    # onClose triggers
179
180
181
    def onClose_x_cp2k_section_geometry_optimization(self, backend, gIndex, section):

        # Get the re-evaluated energy and add it to frame_sequence_potential_energy
182
183
184
185
186
        reeval_quickstep = self.energy_reeval_quickstep
        if reeval_quickstep is not None:
            energy = reeval_quickstep.get_latest_value("x_cp2k_energy_total")
            if energy is not None:
                self.cache_service["frame_sequence_potential_energy"].append(energy)
187

188
        # Push values from cache
189
        self.cache_service.push_array_values("frame_sequence_potential_energy")
190
191
        self.cache_service.push_value("geometry_optimization_method")
        self.backend.addValue("frame_sequence_to_sampling_ref", 0)
192

193
194
195
196
197
198
199
200
201
202
203
204
        # Get the optimization convergence criteria from the last optimization
        # step
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_step_size_convergence_limit"],
            "geometry_optimization_geometry_change",
        )
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_gradient_convergence_limit"],
            "geometry_optimization_threshold_force",
        )
205

206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
        # Push the information into single configuration and system
        steps = section["x_cp2k_section_geometry_optimization_step"]
        each = self.cache_service["each_geo_opt"]
        add_last = False
        add_last_setting = self.cache_service["traj_add_last"]
        if add_last_setting == "NUMERIC" or add_last_setting == "SYMBOLIC":
            add_last = True

        # Push the trajectory
        n_steps = len(steps) + 1
        last_step = n_steps - 1
        for i_step in range(n_steps):
            singleId = backend.openSection("section_single_configuration_calculation")
            systemId = backend.openSection("section_system")

            if self.traj_iterator is not None:
                if (i_step + 1) % each == 0 or (i_step == last_step and add_last):
                    try:
                        pos = next(self.traj_iterator)
                    except StopIteration:
                        logger.error("Could not get the next geometries from an external file. It seems that the number of optimization steps in the CP2K outpufile doesn't match the number of steps found in the external trajectory file.")
                    else:
                        backend.addArrayValues("atom_positions", pos, unit="angstrom")
            backend.closeSection("section_system", systemId)
            backend.closeSection("section_single_configuration_calculation", singleId)

        self.cache_service.push_array_values("frame_sequence_local_frames_ref")
        backend.addValue("number_of_frames_in_sequence", n_steps)

235
236
237
    def onClose_section_sampling_method(self, backend, gIndex, section):
        self.backend.addValue("sampling_method", "geometry_optimization")

238
239
240
    def onClose_x_cp2k_section_quickstep_calculation(self, backend, gIndex, section):
        self.energy_reeval_quickstep = section

241
242
243
244
    def onClose_x_cp2k_section_geometry_optimization_step(self, backend, gIndex, section):
        energy = section["x_cp2k_optimization_energy"]
        if energy is not None:
            self.cache_service["frame_sequence_potential_energy"].append(energy[0])
245

246
247
    def onClose_section_method(self, backend, gIndex, section):
        traj_file = self.file_service.get_file_by_id("trajectory")
248
249
250
251
252
253
254
255
256
257
258
        traj_format = self.cache_service["trajectory_format"]
        if traj_format is not None and traj_file is not None:

            # Use special parsing for CP2K pdb files because they don't follow the proper syntax
            if traj_format == "PDB":
                self.traj_iterator = cp2kparser.generic.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END")
            else:
                try:
                    self.traj_iterator = cp2kparser.generic.configurationreading.iread(traj_file)
                except ValueError:
                    pass
259

260
261
262
    def onClose_section_single_configuration_calculation(self, backend, gIndex, section):
        self.cache_service["frame_sequence_local_frames_ref"].append(gIndex)

263
    #===========================================================================
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
    # adHoc functions
    def adHoc_geo_opt_converged(self):
        """Called when the geometry optimization converged.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", True)
        return wrapper

    def adHoc_geo_opt_not_converged(self):
        """Called when the geometry optimization did not converge.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", False)
        return wrapper

279
280
281
282
    def adHoc_conjugate_gradient(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
283
            self.cache_service["geometry_optimization_method"] = "conjugate_gradient"
284
285
        return wrapper

286
287
288
289
    def adHoc_bfgs(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
290
            self.cache_service["geometry_optimization_method"] = "bfgs"
291
292
        return wrapper

293
    # def adHoc_save_energy_reeval_quickstep(self):
294
        # def wrapper(parser):
295
296
297
298
299
300
301
            # section_managers = parser.backend.sectionManagers
            # section_run_manager = section_managers["section_run"]
            # section_run = section_run_manager.openSections[0]
            # print section_run.subSectionValues
            # # quickstep = section_run.get_latest_value("x_cp2k_section_quickstep_calculation")
            # # print quickstep
            # # self.energy_reeval_quickstep = quickstep
302
        # return wrapper
303
304
305
306
307

    def debug(self):
        def wrapper(parser):
            print "DEBUG"
        return wrapper