geooptparser.py 13.5 KB
Newer Older
1
from nomadcore.simple_parser import SimpleMatcher as SM
2
from nomadcore.baseclasses import MainHierarchicalParser
3
from commonmatcher import CommonMatcher
4
5
import cp2kparser.generic.configurationreading
import cp2kparser.generic.csvparsing
6
from nomadcore.caching_backend import CachingLevel
7
import logging
8
import ase.io
9
10
11
12
13
14
15
16
17
18
19
20
logger = logging.getLogger("nomad")


#===============================================================================
class CP2KGeoOptParser(MainHierarchicalParser):
    """Used to parse the CP2K calculation with run types:
        -GEO_OPT/GEOMETRY_OPTIMIZATION
    """
    def __init__(self, file_path, parser_context):
        """
        """
        super(CP2KGeoOptParser, self).__init__(file_path, parser_context)
21
        self.setup_common_matcher(CommonMatcher(parser_context))
22
        self.traj_iterator = None
23

24
25
        #=======================================================================
        # Cached values
26
27
        self.cache_service.add_cache_object("number_of_frames_in_sequence", 0)
        self.cache_service.add_cache_object("frame_sequence_potential_energy", [])
28
29
30
31
32

        #=======================================================================
        # Cache levels
        self.caching_level_for_metaname.update({
            'x_cp2k_optimization_energy': CachingLevel.ForwardAndCache,
33
34
            'x_cp2k_optimization_step_size_convergence_limit': CachingLevel.ForwardAndCache,
            'x_cp2k_section_geometry_optimization_information': CachingLevel.ForwardAndCache,
35
36
37
38
        })

        #=======================================================================
        # SimpleMatchers
39
        self.geo_opt = SM(
40
            " ***                     STARTING GEOMETRY OPTIMIZATION                      ***".replace("*", "\*"),
41
            sections=["section_frame_sequence"],
42
            subMatchers=[
43
                SM( " ***                           CONJUGATE GRADIENTS                           ***".replace("*", "\*"),
44
45
                    adHoc=self.adHoc_conjugate_gradient(),
                    otherMetaInfo=["geometry_optimization_method"]
46
                ),
47
48
49
50
51
52
53
54
55
                SM( " ***                                   BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
                    otherMetaInfo=["geometry_optimization_method"]
                ),
                SM( " ***                                 L-BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
                    otherMetaInfo=["geometry_optimization_method"]
                ),
                SM( "",
56
                    forwardMatch=True,
57
58
59
60
61
62
63
64
65
66
67
68
                    sections=["section_single_configuration_calculation", "section_system", "x_cp2k_section_geometry_optimization_information"],
                    subMatchers=[
                        self.cm.scf(),
                        SM( " --------  Informations at step"),
                        SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.cm.regex_word)),
                        SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.cm.regex_f),
                            otherMetaInfo=["frame_sequence_potential_energy"]
                        ),
                    ],
                    adHoc=self.adHoc_step()
                ),
                SM( " OPTIMIZATION STEP:",
69
70
71
                    name="geooptstep",
                    repeats=True,
                    sections=["section_single_configuration_calculation", "section_system"],
72
                    subMatchers=[
73
74
                        SM( "",
                            forwardMatch=True,
75
                            sections=["x_cp2k_section_geometry_optimization_information"],
76
77
78
                            otherMetaInfo=[
                                "atom_positions",
                            ],
79
                            subMatchers=[
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
                                SM( "",
                                    forwardMatch=True,
                                    endReStr=" ***                 MNBRACK - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.cm.regex_i),
                                    subMatchers=[
                                        SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            forwardMatch=True,
                                            adHoc=self.debug(),
                                            repeats=True,
                                            subMatchers=[
                                                self.cm.scf(),
                                            ]
                                        )
                                    ]
                                ),
                                SM( "",
                                    forwardMatch=True,
                                    endReStr=" ***                 BRENT   - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.cm.regex_i),
                                    subMatchers=[
                                        SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            forwardMatch=True,
                                            repeats=True,
                                            subMatchers=[
                                                self.cm.scf(),
                                            ]
                                        )
                                    ]
                                ),
                                SM( " --------  Informations at step"),
108
                                SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.cm.regex_word)),
109
110
111
                                SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.cm.regex_f),
                                    otherMetaInfo=["frame_sequence_potential_energy"]
                                ),
112
113
114
115
                                SM( "  Real energy change         =\s+(?P<x_cp2k_optimization_energy_change__hartree>{})".format(self.cm.regex_f)),
                                SM( "  Decrease in energy         =\s+(?P<x_cp2k_optimization_energy_decrease>{})".format(self.cm.regex_word)),
                                SM( "  Used time                  =\s+(?P<x_cp2k_optimization_used_time>{})".format(self.cm.regex_f)),
                                SM( "  Max. step size             =\s+(?P<x_cp2k_optimization_max_step_size__bohr>{})".format(self.cm.regex_f)),
116
117
118
                                SM( "  Conv. limit for step size  =\s+(?P<x_cp2k_optimization_step_size_convergence_limit__bohr>{})".format(self.cm.regex_f),
                                    otherMetaInfo=["geometry_optimization_geometry_change"]
                                ),
119
120
121
122
                                SM( "  Convergence in step size   =\s+(?P<x_cp2k_optimization_step_size_convergence>{})".format(self.cm.regex_word)),
                                SM( "  RMS step size              =\s+(?P<x_cp2k_optimization_rms_step_size__bohr>{})".format(self.cm.regex_f)),
                                SM( "  Convergence in RMS step    =\s+(?P<x_cp2k_optimization_rms_step_size_convergence>{})".format(self.cm.regex_word)),
                                SM( "  Max. gradient              =\s+(?P<x_cp2k_optimization_max_gradient__bohr_1hartree>{})".format(self.cm.regex_f)),
123
124
125
                                SM( "  Conv. limit for gradients  =\s+(?P<x_cp2k_optimization_gradient_convergence_limit__bohr_1hartree>{})".format(self.cm.regex_f),
                                    otherMetaInfo=["geometry_optimization_threshold_force"]
                                ),
126
127
128
129
130
                                SM( "  Conv. for gradients        =\s+(?P<x_cp2k_optimization_max_gradient_convergence>{})".format(self.cm.regex_word)),
                                SM( "  RMS gradient               =\s+(?P<x_cp2k_optimization_rms_gradient__bohr_1hartree>{})".format(self.cm.regex_f)),
                                SM( "  Conv. in RMS gradients     =\s+(?P<x_cp2k_optimization_rms_gradient_convergence>{})".format(self.cm.regex_word)),
                            ],
                            adHoc=self.adHoc_step()
131
132
                        ),
                    ]
133
134
                ),
                SM( " ***                    GEOMETRY OPTIMIZATION COMPLETED                      ***".replace("*", "\*"),
135
136
137
                    adHoc=self.adHoc_geo_opt_converged(),
                    otherMetaInfo=["geometry_optimization_converged"]
                ),
138
            ],
139
140
141
142
143
144
145
        )

        # Compose root matcher according to the run type. This way the
        # unnecessary regex parsers will not be compiled and searched. Saves
        # computational time.
        self.root_matcher = SM("",
            forwardMatch=True,
146
            sections=["section_run", "section_sampling_method"],
147
            subMatchers=[
148
149
150
151
152
153
                SM( "",
                    forwardMatch=True,
                    sections=["section_method"],
                    subMatchers=[
                        self.cm.header(),
                        self.cm.quickstep(),
154
                    ],
155
                ),
156
157
158
159
160
                self.geo_opt
            ]
        )

    #===========================================================================
161
162
163
164
165
    # onClose triggers
    def onClose_section_frame_sequence(self, backend, gIndex, section):
        self.cache_service.push_value("number_of_frames_in_sequence")
        self.cache_service.push_array_values("frame_sequence_potential_energy")

166
167
168
169
170
171
172
173
174
175
176
177
178
        opt_section = section["x_cp2k_section_geometry_optimization_information"]
        if opt_section is not None:
            opt_section = opt_section[-1]
            geo_limit = opt_section["x_cp2k_optimization_step_size_convergence_limit"]
            if geo_limit is not None:
                self.backend.addValue("geometry_optimization_geometry_change", geo_limit[0])
            force_limit = opt_section["x_cp2k_optimization_gradient_convergence_limit"]
            if force_limit is not None:
                self.backend.addValue("geometry_optimization_threshold_force", force_limit[0])

    def onClose_section_sampling_method(self, backend, gIndex, section):
        self.backend.addValue("sampling_method", "geometry_optimization")

179
180
    def onClose_x_cp2k_section_geometry_optimization_information(self, backend, gIndex, section):
        energy = section["x_cp2k_optimization_energy"][0]
181
        # backend.addValue("energy_total", energy)
182
        self.cache_service["frame_sequence_potential_energy"].append(energy)
183

184
185
    def onClose_section_method(self, backend, gIndex, section):
        traj_file = self.file_service.get_file_by_id("trajectory")
186
187
188
189
190
191
192
193
194
195
196
        traj_format = self.cache_service["trajectory_format"]
        if traj_format is not None and traj_file is not None:

            # Use special parsing for CP2K pdb files because they don't follow the proper syntax
            if traj_format == "PDB":
                self.traj_iterator = cp2kparser.generic.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END")
            else:
                try:
                    self.traj_iterator = cp2kparser.generic.configurationreading.iread(traj_file)
                except ValueError:
                    pass
197

198
    #===========================================================================
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
    # adHoc functions
    def adHoc_geo_opt_converged(self):
        """Called when the geometry optimization converged.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", True)
        return wrapper

    def adHoc_geo_opt_not_converged(self):
        """Called when the geometry optimization did not converge.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", False)
        return wrapper

214
215
216
217
218
219
220
    def adHoc_conjugate_gradient(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_method", "conjugate_gradient")
        return wrapper

221
222
223
224
225
226
227
    def adHoc_bfgs(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_method", "bfgs")
        return wrapper

228
    def adHoc_step(self):
229
230
        """Called when all the step information has been retrieved from the
        output file. Here further information is gathered from external files.
231
232
        """
        def wrapper(parser):
233
            # print "STEP"
234
            self.cache_service["number_of_frames_in_sequence"] += 1
235
236
237

            # Get the next position from the trajectory file
            if self.traj_iterator is not None:
238
239
240
241
242
243
244
245
                # pos = next(self.traj_iterator)
                # self.cache_service["atom_positions"] = pos
                try:
                    pos = next(self.traj_iterator)
                except StopIteration:
                    logger.error("Could not get the next geometries from an external file. It seems that the number of optimization steps in the CP2K outpufile doesn't match the number of steps found in the external trajectory file.")
                else:
                    self.cache_service["atom_positions"] = pos
246
247
248
249
250

        return wrapper

    def adHoc_setup_traj_file(self):
        def wrapper(parser):
251
            pass
252
253
254
255
256
        return wrapper

    def debug(self):
        def wrapper(parser):
            print "FOUND"
257
        return wrapper