geooptparser.py 14.7 KB
Newer Older
1
from nomadcore.simple_parser import SimpleMatcher as SM
2
from nomadcore.baseclasses import MainHierarchicalParser
3
from commonmatcher import CommonMatcher
4
5
import cp2kparser.generic.configurationreading
import cp2kparser.generic.csvparsing
6
from nomadcore.caching_backend import CachingLevel
7
8
9
10
11
12
13
14
15
16
17
18
19
import logging
logger = logging.getLogger("nomad")


#===============================================================================
class CP2KGeoOptParser(MainHierarchicalParser):
    """Used to parse the CP2K calculation with run types:
        -GEO_OPT/GEOMETRY_OPTIMIZATION
    """
    def __init__(self, file_path, parser_context):
        """
        """
        super(CP2KGeoOptParser, self).__init__(file_path, parser_context)
20
        self.setup_common_matcher(CommonMatcher(parser_context))
21
        self.traj_iterator = None
22

23
        #=======================================================================
24
        # Globally cached values
25
26
        self.cache_service.add_cache_object("number_of_frames_in_sequence", 0)
        self.cache_service.add_cache_object("frame_sequence_potential_energy", [])
27
28
        self.cache_service.add_cache_object("frame_sequence_local_frames_ref", [])
        self.cache_service.add_cache_object("geometry_optimization_method")
29
30
31
32
33

        #=======================================================================
        # Cache levels
        self.caching_level_for_metaname.update({
            'x_cp2k_optimization_energy': CachingLevel.ForwardAndCache,
34
35
36
37
            'x_cp2k_section_geometry_optimization_step': CachingLevel.ForwardAndCache,
            'x_cp2k_section_quickstep_calculation': CachingLevel.ForwardAndCache,
            'x_cp2k_section_geometry_optimization': CachingLevel.ForwardAndCache,
            'x_cp2k_section_geometry_optimization_energy_reevaluation': CachingLevel.ForwardAndCache,
38
39
40
41
        })

        #=======================================================================
        # SimpleMatchers
42
        self.geo_opt = SM(
43
            " ***                     STARTING GEOMETRY OPTIMIZATION                      ***".replace("*", "\*"),
44
            sections=["section_frame_sequence", "x_cp2k_section_geometry_optimization"],
45
            subMatchers=[
46
                SM( " ***                           CONJUGATE GRADIENTS                           ***".replace("*", "\*"),
47
                    adHoc=self.adHoc_conjugate_gradient(),
48
                    otherMetaInfo=["geometry_optimization_method"],
49
                ),
50
51
                SM( " ***                                   BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
52
                    otherMetaInfo=["geometry_optimization_method"],
53
54
55
                ),
                SM( " ***                                 L-BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
56
                    otherMetaInfo=["geometry_optimization_method"],
57
                ),
58
59
60
61
62
63
64
65
66
67
68
69
70
71
                # SM( "",
                    # forwardMatch=True,
                    # sections=["section_single_configuration_calculation", "section_system", "x_cp2k_section_geometry_optimization_step"],
                    # subMatchers=[
                        # self.cm.quickstep_calculation(),
                        # SM( " --------  Informations at step"),
                        # SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.cm.regex_word)),
                        # SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.cm.regex_f),
                            # otherMetaInfo=["frame_sequence_potential_energy"]
                        # ),
                    # ],
                    # otherMetaInfo=["atom_positions"],
                    # adHoc=self.adHoc_step(),
                # ),
72
                SM( " OPTIMIZATION STEP:",
73
                    endReStr="  Conv. in RMS gradients     =",
74
75
                    name="geooptstep",
                    repeats=True,
76
                    sections=["section_single_configuration_calculation", "section_system"],
77
                    subMatchers=[
78
79
                        SM( "",
                            forwardMatch=True,
80
                            sections=["x_cp2k_section_geometry_optimization_step"],
81
82
83
                            otherMetaInfo=[
                                "atom_positions",
                            ],
84
                            subMatchers=[
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
                                # SM( "",
                                    # forwardMatch=True,
                                    # endReStr=" ***                 MNBRACK - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.cm.regex_i),
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
                                # SM( "",
                                    # forwardMatch=True,
                                    # endReStr=" ***                 BRENT   - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.cm.regex_i),
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
111
                                SM( " --------  Informations at step"),
112
                                SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.cm.regex_word)),
113
114
115
                                SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.cm.regex_f),
                                    otherMetaInfo=["frame_sequence_potential_energy"]
                                ),
116
117
118
119
                                SM( "  Real energy change         =\s+(?P<x_cp2k_optimization_energy_change__hartree>{})".format(self.cm.regex_f)),
                                SM( "  Decrease in energy         =\s+(?P<x_cp2k_optimization_energy_decrease>{})".format(self.cm.regex_word)),
                                SM( "  Used time                  =\s+(?P<x_cp2k_optimization_used_time>{})".format(self.cm.regex_f)),
                                SM( "  Max. step size             =\s+(?P<x_cp2k_optimization_max_step_size__bohr>{})".format(self.cm.regex_f)),
120
121
122
                                SM( "  Conv. limit for step size  =\s+(?P<x_cp2k_optimization_step_size_convergence_limit__bohr>{})".format(self.cm.regex_f),
                                    otherMetaInfo=["geometry_optimization_geometry_change"]
                                ),
123
124
125
126
                                SM( "  Convergence in step size   =\s+(?P<x_cp2k_optimization_step_size_convergence>{})".format(self.cm.regex_word)),
                                SM( "  RMS step size              =\s+(?P<x_cp2k_optimization_rms_step_size__bohr>{})".format(self.cm.regex_f)),
                                SM( "  Convergence in RMS step    =\s+(?P<x_cp2k_optimization_rms_step_size_convergence>{})".format(self.cm.regex_word)),
                                SM( "  Max. gradient              =\s+(?P<x_cp2k_optimization_max_gradient__bohr_1hartree>{})".format(self.cm.regex_f)),
127
128
129
                                SM( "  Conv. limit for gradients  =\s+(?P<x_cp2k_optimization_gradient_convergence_limit__bohr_1hartree>{})".format(self.cm.regex_f),
                                    otherMetaInfo=["geometry_optimization_threshold_force"]
                                ),
130
131
132
133
134
                                SM( "  Conv. for gradients        =\s+(?P<x_cp2k_optimization_max_gradient_convergence>{})".format(self.cm.regex_word)),
                                SM( "  RMS gradient               =\s+(?P<x_cp2k_optimization_rms_gradient__bohr_1hartree>{})".format(self.cm.regex_f)),
                                SM( "  Conv. in RMS gradients     =\s+(?P<x_cp2k_optimization_rms_gradient_convergence>{})".format(self.cm.regex_word)),
                            ],
                            adHoc=self.adHoc_step()
135
136
                        ),
                    ]
137
138
                ),
                SM( " ***                    GEOMETRY OPTIMIZATION COMPLETED                      ***".replace("*", "\*"),
139
140
141
                    adHoc=self.adHoc_geo_opt_converged(),
                    otherMetaInfo=["geometry_optimization_converged"]
                ),
142
143
144
145
146
147
148
                SM( "                    Reevaluating energy at the minimum",
                    sections=["x_cp2k_section_geometry_optimization_energy_reevaluation", "section_system"],
                    subMatchers=[
                        self.cm.quickstep_calculation(),
                    ],
                    adHoc=self.adHoc_step()
                ),
149
            ],
150
151
152
153
154
155
156
        )

        # Compose root matcher according to the run type. This way the
        # unnecessary regex parsers will not be compiled and searched. Saves
        # computational time.
        self.root_matcher = SM("",
            forwardMatch=True,
157
            sections=["section_run", "section_sampling_method"],
158
            subMatchers=[
159
160
161
162
163
                SM( "",
                    forwardMatch=True,
                    sections=["section_method"],
                    subMatchers=[
                        self.cm.header(),
164
                        self.cm.quickstep_header(),
165
                    ],
166
                ),
167
168
169
170
171
                self.geo_opt
            ]
        )

    #===========================================================================
172
    # onClose triggers
173
174
175
    def onClose_x_cp2k_section_geometry_optimization(self, backend, gIndex, section):

        # Get the re-evaluated energy and add it to frame_sequence_potential_energy
176
177
178
179
180
181
182
        energy = section.get_latest_value([
            "x_cp2k_section_geometry_optimization_energy_reevaluation",
            "x_cp2k_section_quickstep_calculation",
            "x_cp2k_energy_total"]
        )
        if energy is not None:
            self.cache_service["frame_sequence_potential_energy"].append(energy)
183

184
        # Push values from cache
185
186
        self.cache_service.push_value("number_of_frames_in_sequence")
        self.cache_service.push_array_values("frame_sequence_potential_energy")
187
188
189
        self.cache_service.push_array_values("frame_sequence_local_frames_ref")
        self.cache_service.push_value("geometry_optimization_method")
        self.backend.addValue("frame_sequence_to_sampling_ref", 0)
190

191
192
193
194
195
196
197
198
199
200
201
202
        # Get the optimization convergence criteria from the last optimization
        # step
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_step_size_convergence_limit"],
            "geometry_optimization_geometry_change",
        )
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_gradient_convergence_limit"],
            "geometry_optimization_threshold_force",
        )
203
204
205
206

    def onClose_section_sampling_method(self, backend, gIndex, section):
        self.backend.addValue("sampling_method", "geometry_optimization")

207
208
209
210
    def onClose_x_cp2k_section_geometry_optimization_step(self, backend, gIndex, section):
        energy = section["x_cp2k_optimization_energy"]
        if energy is not None:
            self.cache_service["frame_sequence_potential_energy"].append(energy[0])
211

212
213
    def onClose_section_method(self, backend, gIndex, section):
        traj_file = self.file_service.get_file_by_id("trajectory")
214
215
216
217
218
219
220
221
222
223
224
        traj_format = self.cache_service["trajectory_format"]
        if traj_format is not None and traj_file is not None:

            # Use special parsing for CP2K pdb files because they don't follow the proper syntax
            if traj_format == "PDB":
                self.traj_iterator = cp2kparser.generic.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END")
            else:
                try:
                    self.traj_iterator = cp2kparser.generic.configurationreading.iread(traj_file)
                except ValueError:
                    pass
225

226
227
228
    def onClose_section_single_configuration_calculation(self, backend, gIndex, section):
        self.cache_service["frame_sequence_local_frames_ref"].append(gIndex)

229
    #===========================================================================
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
    # adHoc functions
    def adHoc_geo_opt_converged(self):
        """Called when the geometry optimization converged.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", True)
        return wrapper

    def adHoc_geo_opt_not_converged(self):
        """Called when the geometry optimization did not converge.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", False)
        return wrapper

245
246
247
248
    def adHoc_conjugate_gradient(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
249
            self.cache_service["geometry_optimization_method"] = "conjugate_gradient"
250
251
        return wrapper

252
253
254
255
    def adHoc_bfgs(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
256
            self.cache_service["geometry_optimization_method"] = "bfgs"
257
258
        return wrapper

259
    def adHoc_step(self):
260
261
        """Called when all the step information has been retrieved from the
        output file. Here further information is gathered from external files.
262
263
264
        """
        def wrapper(parser):
            self.cache_service["number_of_frames_in_sequence"] += 1
265
266
267

            # Get the next position from the trajectory file
            if self.traj_iterator is not None:
268
269
270
271
272
273
                try:
                    pos = next(self.traj_iterator)
                except StopIteration:
                    logger.error("Could not get the next geometries from an external file. It seems that the number of optimization steps in the CP2K outpufile doesn't match the number of steps found in the external trajectory file.")
                else:
                    self.cache_service["atom_positions"] = pos
274
275

        return wrapper