geooptparser.py 14.5 KB
Newer Older
1
from nomadcore.simple_parser import SimpleMatcher as SM
2
from nomadcore.baseclasses import MainHierarchicalParser
3
from commonmatcher import CommonMatcher
4
5
import cp2kparser.generic.configurationreading
import cp2kparser.generic.csvparsing
6
from nomadcore.caching_backend import CachingLevel
7
import logging
8
import ase.io
9
10
import numpy as np
import math
11
12
13
14
15
16
17
18
19
20
21
22
logger = logging.getLogger("nomad")


#===============================================================================
class CP2KGeoOptParser(MainHierarchicalParser):
    """Used to parse the CP2K calculation with run types:
        -GEO_OPT/GEOMETRY_OPTIMIZATION
    """
    def __init__(self, file_path, parser_context):
        """
        """
        super(CP2KGeoOptParser, self).__init__(file_path, parser_context)
23
        self.setup_common_matcher(CommonMatcher(parser_context))
24
        self.traj_iterator = None
25

26
27
        #=======================================================================
        # Cached values
28
29
        self.cache_service.add_cache_object("number_of_frames_in_sequence", 0)
        self.cache_service.add_cache_object("frame_sequence_potential_energy", [])
30
31
32
33
34

        #=======================================================================
        # Cache levels
        self.caching_level_for_metaname.update({
            'x_cp2k_optimization_energy': CachingLevel.ForwardAndCache,
35
36
37
38
            'x_cp2k_section_geometry_optimization_step': CachingLevel.ForwardAndCache,
            'x_cp2k_section_quickstep_calculation': CachingLevel.ForwardAndCache,
            'x_cp2k_section_geometry_optimization': CachingLevel.ForwardAndCache,
            'x_cp2k_section_geometry_optimization_energy_reevaluation': CachingLevel.ForwardAndCache,
39
40
41
42
        })

        #=======================================================================
        # SimpleMatchers
43
        self.geo_opt = SM(
44
            " ***                     STARTING GEOMETRY OPTIMIZATION                      ***".replace("*", "\*"),
45
            sections=["section_frame_sequence", "x_cp2k_section_geometry_optimization"],
46
            subMatchers=[
47
                SM( " ***                           CONJUGATE GRADIENTS                           ***".replace("*", "\*"),
48
                    adHoc=self.adHoc_conjugate_gradient(),
49
                    otherMetaInfo=["geometry_optimization_method"],
50
                ),
51
52
                SM( " ***                                   BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
53
                    otherMetaInfo=["geometry_optimization_method"],
54
55
56
                ),
                SM( " ***                                 L-BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
57
                    otherMetaInfo=["geometry_optimization_method"],
58
                ),
59
60
61
62
63
64
65
66
67
68
69
70
71
72
                # SM( "",
                    # forwardMatch=True,
                    # sections=["section_single_configuration_calculation", "section_system", "x_cp2k_section_geometry_optimization_step"],
                    # subMatchers=[
                        # self.cm.quickstep_calculation(),
                        # SM( " --------  Informations at step"),
                        # SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.cm.regex_word)),
                        # SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.cm.regex_f),
                            # otherMetaInfo=["frame_sequence_potential_energy"]
                        # ),
                    # ],
                    # otherMetaInfo=["atom_positions"],
                    # adHoc=self.adHoc_step(),
                # ),
73
                SM( " OPTIMIZATION STEP:",
74
                    endReStr="  Conv. in RMS gradients     =",
75
76
                    name="geooptstep",
                    repeats=True,
77
                    sections=["section_system"],
78
                    subMatchers=[
79
80
                        SM( "",
                            forwardMatch=True,
81
                            sections=["x_cp2k_section_geometry_optimization_step"],
82
83
84
                            otherMetaInfo=[
                                "atom_positions",
                            ],
85
                            subMatchers=[
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
                                # SM( "",
                                    # forwardMatch=True,
                                    # endReStr=" ***                 MNBRACK - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.cm.regex_i),
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
                                # SM( "",
                                    # forwardMatch=True,
                                    # endReStr=" ***                 BRENT   - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.cm.regex_i),
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
112
                                SM( " --------  Informations at step"),
113
                                SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.cm.regex_word)),
114
115
116
                                SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.cm.regex_f),
                                    otherMetaInfo=["frame_sequence_potential_energy"]
                                ),
117
118
119
120
                                SM( "  Real energy change         =\s+(?P<x_cp2k_optimization_energy_change__hartree>{})".format(self.cm.regex_f)),
                                SM( "  Decrease in energy         =\s+(?P<x_cp2k_optimization_energy_decrease>{})".format(self.cm.regex_word)),
                                SM( "  Used time                  =\s+(?P<x_cp2k_optimization_used_time>{})".format(self.cm.regex_f)),
                                SM( "  Max. step size             =\s+(?P<x_cp2k_optimization_max_step_size__bohr>{})".format(self.cm.regex_f)),
121
122
123
                                SM( "  Conv. limit for step size  =\s+(?P<x_cp2k_optimization_step_size_convergence_limit__bohr>{})".format(self.cm.regex_f),
                                    otherMetaInfo=["geometry_optimization_geometry_change"]
                                ),
124
125
126
127
                                SM( "  Convergence in step size   =\s+(?P<x_cp2k_optimization_step_size_convergence>{})".format(self.cm.regex_word)),
                                SM( "  RMS step size              =\s+(?P<x_cp2k_optimization_rms_step_size__bohr>{})".format(self.cm.regex_f)),
                                SM( "  Convergence in RMS step    =\s+(?P<x_cp2k_optimization_rms_step_size_convergence>{})".format(self.cm.regex_word)),
                                SM( "  Max. gradient              =\s+(?P<x_cp2k_optimization_max_gradient__bohr_1hartree>{})".format(self.cm.regex_f)),
128
129
130
                                SM( "  Conv. limit for gradients  =\s+(?P<x_cp2k_optimization_gradient_convergence_limit__bohr_1hartree>{})".format(self.cm.regex_f),
                                    otherMetaInfo=["geometry_optimization_threshold_force"]
                                ),
131
132
133
134
135
                                SM( "  Conv. for gradients        =\s+(?P<x_cp2k_optimization_max_gradient_convergence>{})".format(self.cm.regex_word)),
                                SM( "  RMS gradient               =\s+(?P<x_cp2k_optimization_rms_gradient__bohr_1hartree>{})".format(self.cm.regex_f)),
                                SM( "  Conv. in RMS gradients     =\s+(?P<x_cp2k_optimization_rms_gradient_convergence>{})".format(self.cm.regex_word)),
                            ],
                            adHoc=self.adHoc_step()
136
137
                        ),
                    ]
138
139
                ),
                SM( " ***                    GEOMETRY OPTIMIZATION COMPLETED                      ***".replace("*", "\*"),
140
141
142
                    adHoc=self.adHoc_geo_opt_converged(),
                    otherMetaInfo=["geometry_optimization_converged"]
                ),
143
144
145
146
147
148
149
                SM( "                    Reevaluating energy at the minimum",
                    sections=["x_cp2k_section_geometry_optimization_energy_reevaluation", "section_system"],
                    subMatchers=[
                        self.cm.quickstep_calculation(),
                    ],
                    adHoc=self.adHoc_step()
                ),
150
            ],
151
152
153
154
155
156
157
        )

        # Compose root matcher according to the run type. This way the
        # unnecessary regex parsers will not be compiled and searched. Saves
        # computational time.
        self.root_matcher = SM("",
            forwardMatch=True,
158
            sections=["section_run", "section_sampling_method"],
159
            subMatchers=[
160
161
162
163
164
                SM( "",
                    forwardMatch=True,
                    sections=["section_method"],
                    subMatchers=[
                        self.cm.header(),
165
                        self.cm.quickstep_header(),
166
                    ],
167
                ),
168
169
170
171
172
                self.geo_opt
            ]
        )

    #===========================================================================
173
    # onClose triggers
174
175
176
177
178
179
180
181
    def onClose_x_cp2k_section_geometry_optimization(self, backend, gIndex, section):

        # Get the re-evaluated energy and add it to frame_sequence_potential_energy
        reeval = section["x_cp2k_section_geometry_optimization_energy_reevaluation"][0]
        quickstep = reeval["x_cp2k_section_quickstep_calculation"][0]
        energy = quickstep["x_cp2k_energy_total"]
        self.cache_service["frame_sequence_potential_energy"].append(energy[0])

182
183
184
        self.cache_service.push_value("number_of_frames_in_sequence")
        self.cache_service.push_array_values("frame_sequence_potential_energy")

185
        opt_section = section["x_cp2k_section_geometry_optimization_step"]
186
187
188
189
190
191
192
193
194
195
196
197
        if opt_section is not None:
            opt_section = opt_section[-1]
            geo_limit = opt_section["x_cp2k_optimization_step_size_convergence_limit"]
            if geo_limit is not None:
                self.backend.addValue("geometry_optimization_geometry_change", geo_limit[0])
            force_limit = opt_section["x_cp2k_optimization_gradient_convergence_limit"]
            if force_limit is not None:
                self.backend.addValue("geometry_optimization_threshold_force", force_limit[0])

    def onClose_section_sampling_method(self, backend, gIndex, section):
        self.backend.addValue("sampling_method", "geometry_optimization")

198
199
200
201
    def onClose_x_cp2k_section_geometry_optimization_step(self, backend, gIndex, section):
        energy = section["x_cp2k_optimization_energy"]
        if energy is not None:
            self.cache_service["frame_sequence_potential_energy"].append(energy[0])
202

203
204
    def onClose_section_method(self, backend, gIndex, section):
        traj_file = self.file_service.get_file_by_id("trajectory")
205
206
207
208
209
210
211
212
213
214
215
        traj_format = self.cache_service["trajectory_format"]
        if traj_format is not None and traj_file is not None:

            # Use special parsing for CP2K pdb files because they don't follow the proper syntax
            if traj_format == "PDB":
                self.traj_iterator = cp2kparser.generic.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END")
            else:
                try:
                    self.traj_iterator = cp2kparser.generic.configurationreading.iread(traj_file)
                except ValueError:
                    pass
216

217
    #===========================================================================
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
    # adHoc functions
    def adHoc_geo_opt_converged(self):
        """Called when the geometry optimization converged.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", True)
        return wrapper

    def adHoc_geo_opt_not_converged(self):
        """Called when the geometry optimization did not converge.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", False)
        return wrapper

233
234
235
236
237
238
239
    def adHoc_conjugate_gradient(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_method", "conjugate_gradient")
        return wrapper

240
241
242
243
244
245
246
    def adHoc_bfgs(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_method", "bfgs")
        return wrapper

247
    def adHoc_step(self):
248
249
        """Called when all the step information has been retrieved from the
        output file. Here further information is gathered from external files.
250
251
252
        """
        def wrapper(parser):
            self.cache_service["number_of_frames_in_sequence"] += 1
253
254
255

            # Get the next position from the trajectory file
            if self.traj_iterator is not None:
256
257
258
259
260
261
262
263
                # pos = next(self.traj_iterator)
                # self.cache_service["atom_positions"] = pos
                try:
                    pos = next(self.traj_iterator)
                except StopIteration:
                    logger.error("Could not get the next geometries from an external file. It seems that the number of optimization steps in the CP2K outpufile doesn't match the number of steps found in the external trajectory file.")
                else:
                    self.cache_service["atom_positions"] = pos
264
265
266
267
268

        return wrapper

    def adHoc_setup_traj_file(self):
        def wrapper(parser):
269
            pass
270
271
272
273
274
        return wrapper

    def debug(self):
        def wrapper(parser):
            print "FOUND"
275
        return wrapper