geooptparser.py 17.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2015-2018 Lauri Himanen, Fawzi Mohamed, Ankit Kariryaa
# 
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

15
16
from __future__ import print_function
from __future__ import absolute_import
17
18
from builtins import next
from builtins import range
19
from nomadcore.simple_parser import SimpleMatcher as SM
20
from nomadcore.baseclasses import MainHierarchicalParser
21
22
import nomadcore.configurationreading
import nomadcore.csvparsing
23
from .commonparser import CP2KCommonParser
24
from nomadcore.caching_backend import CachingLevel
25
26
27
28
29
30
31
32
import logging
logger = logging.getLogger("nomad")


class CP2KGeoOptParser(MainHierarchicalParser):
    """Used to parse the CP2K calculation with run types:
        -GEO_OPT/GEOMETRY_OPTIMIZATION
    """
33
    def __init__(self, parser_context):
34
35
        """
        """
36
        super(CP2KGeoOptParser, self).__init__(parser_context)
37
        self.setup_common_matcher(CP2KCommonParser(parser_context))
38
        self.traj_iterator = None
39
        self.energy_reeval_quickstep = None
40

41
        #=======================================================================
42
        # Globally cached values
43
44
45
46
        self.cache_service.add("number_of_frames_in_sequence", 0)
        self.cache_service.add("frame_sequence_potential_energy", [])
        self.cache_service.add("frame_sequence_local_frames_ref", [])
        self.cache_service.add("geometry_optimization_method")
47
48
49

        #=======================================================================
        # Cache levels
50
        self.caching_levels.update({
51
52
53
            'x_cp2k_section_geometry_optimization_step': CachingLevel.ForwardAndCache,
            'x_cp2k_section_quickstep_calculation': CachingLevel.ForwardAndCache,
            'x_cp2k_section_geometry_optimization': CachingLevel.ForwardAndCache,
54
            # 'x_cp2k_section_geometry_optimization_energy_reevaluation': CachingLevel.ForwardAndCache,
55
56
57
58
        })

        #=======================================================================
        # SimpleMatchers
59
        self.geo_opt = SM(
60
            " ***                     STARTING GEOMETRY OPTIMIZATION                      ***".replace("*", "\*"),
61
            sections=["section_frame_sequence", "x_cp2k_section_geometry_optimization"],
62
            subMatchers=[
63
                SM( " ***                           CONJUGATE GRADIENTS                           ***".replace("*", "\*"),
64
                    adHoc=self.adHoc_conjugate_gradient(),
65
                    otherMetaInfo=["geometry_optimization_method"],
66
                ),
67
68
                SM( " ***                                   BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
69
                    otherMetaInfo=["geometry_optimization_method"],
70
71
72
                ),
                SM( " ***                                 L-BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
73
                    otherMetaInfo=["geometry_optimization_method"],
74
                ),
75
76
77
78
79
80
                # SM( "",
                    # forwardMatch=True,
                    # sections=["section_single_configuration_calculation", "section_system", "x_cp2k_section_geometry_optimization_step"],
                    # subMatchers=[
                        # self.cm.quickstep_calculation(),
                        # SM( " --------  Informations at step"),
81
82
                        # SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.word)),
                        # SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.float),
83
84
85
86
87
88
                            # otherMetaInfo=["frame_sequence_potential_energy"]
                        # ),
                    # ],
                    # otherMetaInfo=["atom_positions"],
                    # adHoc=self.adHoc_step(),
                # ),
89
                SM( " OPTIMIZATION STEP:",
90
                    endReStr="  Conv. in RMS gradients     =",
91
92
                    name="geooptstep",
                    repeats=True,
93
                    subMatchers=[
94
95
                        SM( "",
                            forwardMatch=True,
96
                            sections=["x_cp2k_section_geometry_optimization_step"],
97
98
99
                            otherMetaInfo=[
                                "atom_positions",
                            ],
100
                            subMatchers=[
101
102
                                # SM( "",
                                    # forwardMatch=True,
103
                                    # endReStr=" ***                 MNBRACK - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.int),
104
105
106
107
108
109
110
111
112
113
114
115
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
                                # SM( "",
                                    # forwardMatch=True,
116
                                    # endReStr=" ***                 BRENT   - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.int),
117
118
119
120
121
122
123
124
125
126
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
127
                                SM( " --------  Informations at step"),
128
129
                                SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.word)),
                                SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.float),
130
131
                                    otherMetaInfo=["frame_sequence_potential_energy"]
                                ),
132
133
134
135
136
                                SM( "  Real energy change         =\s+(?P<x_cp2k_optimization_energy_change__hartree>{})".format(self.regexs.float)),
                                SM( "  Decrease in energy         =\s+(?P<x_cp2k_optimization_energy_decrease>{})".format(self.regexs.word)),
                                SM( "  Used time                  =\s+(?P<x_cp2k_optimization_used_time>{})".format(self.regexs.float)),
                                SM( "  Max. step size             =\s+(?P<x_cp2k_optimization_max_step_size__bohr>{})".format(self.regexs.float)),
                                SM( "  Conv. limit for step size  =\s+(?P<x_cp2k_optimization_step_size_convergence_limit__bohr>{})".format(self.regexs.float),
137
138
                                    otherMetaInfo=["geometry_optimization_geometry_change"]
                                ),
139
140
141
142
143
                                SM( "  Convergence in step size   =\s+(?P<x_cp2k_optimization_step_size_convergence>{})".format(self.regexs.word)),
                                SM( "  RMS step size              =\s+(?P<x_cp2k_optimization_rms_step_size__bohr>{})".format(self.regexs.float)),
                                SM( "  Convergence in RMS step    =\s+(?P<x_cp2k_optimization_rms_step_size_convergence>{})".format(self.regexs.word)),
                                SM( "  Max. gradient              =\s+(?P<x_cp2k_optimization_max_gradient__bohr_1hartree>{})".format(self.regexs.float)),
                                SM( "  Conv. limit for gradients  =\s+(?P<x_cp2k_optimization_gradient_convergence_limit__bohr_1hartree>{})".format(self.regexs.float),
144
145
                                    otherMetaInfo=["geometry_optimization_threshold_force"]
                                ),
146
147
148
                                SM( "  Conv. for gradients        =\s+(?P<x_cp2k_optimization_max_gradient_convergence>{})".format(self.regexs.word)),
                                SM( "  RMS gradient               =\s+(?P<x_cp2k_optimization_rms_gradient__bohr_1hartree>{})".format(self.regexs.float)),
                                SM( "  Conv. in RMS gradients     =\s+(?P<x_cp2k_optimization_rms_gradient_convergence>{})".format(self.regexs.word)),
149
                            ],
150
                            # adHoc=self.adHoc_step()
151
152
                        ),
                    ]
153
154
                ),
                SM( " ***                    GEOMETRY OPTIMIZATION COMPLETED                      ***".replace("*", "\*"),
155
156
157
                    adHoc=self.adHoc_geo_opt_converged(),
                    otherMetaInfo=["geometry_optimization_converged"]
                ),
158
                SM( "                    Reevaluating energy at the minimum",
159
                    # sections=["x_cp2k_section_geometry_optimization_energy_reevaluation"],
160
161
                    subMatchers=[
                        self.cm.quickstep_calculation(),
162
163
164
                        # SM("",
                            # adHoc=self.adHoc_save_energy_reeval_quickstep()
                        # )
165
                    ],
166
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
167
                ),
168
169
170
171
                # SM( "",
                    # forwardMatch=True,
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
                # )
172
            ],
173
174
175
176
177
178
179
        )

        # Compose root matcher according to the run type. This way the
        # unnecessary regex parsers will not be compiled and searched. Saves
        # computational time.
        self.root_matcher = SM("",
            forwardMatch=True,
180
            sections=["section_run", "section_sampling_method"],
181
            subMatchers=[
182
183
184
185
186
                SM( "",
                    forwardMatch=True,
                    sections=["section_method"],
                    subMatchers=[
                        self.cm.header(),
187
                        self.cm.quickstep_header(),
188
                    ],
189
                ),
190
191
                self.geo_opt,
                self.cm.footer(),
192
193
194
195
            ]
        )

    #===========================================================================
196
    # onClose triggers
197
198
199
    def onClose_x_cp2k_section_geometry_optimization(self, backend, gIndex, section):

        # Get the re-evaluated energy and add it to frame_sequence_potential_energy
200
201
202
203
204
        reeval_quickstep = self.energy_reeval_quickstep
        if reeval_quickstep is not None:
            energy = reeval_quickstep.get_latest_value("x_cp2k_energy_total")
            if energy is not None:
                self.cache_service["frame_sequence_potential_energy"].append(energy)
205

206
        # Push values from cache
207
208
        self.cache_service.addArrayValues("frame_sequence_potential_energy")
        self.cache_service.addValue("geometry_optimization_method")
209
        self.backend.addValue("frame_sequence_to_sampling_ref", 0)
210

211
212
213
214
215
216
217
218
219
220
221
222
        # Get the optimization convergence criteria from the last optimization
        # step
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_step_size_convergence_limit"],
            "geometry_optimization_geometry_change",
        )
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_gradient_convergence_limit"],
            "geometry_optimization_threshold_force",
        )
223

224
225
226
227
228
229
230
231
232
233
234
        # Push the information into single configuration and system
        steps = section["x_cp2k_section_geometry_optimization_step"]
        each = self.cache_service["each_geo_opt"]
        add_last = False
        add_last_setting = self.cache_service["traj_add_last"]
        if add_last_setting == "NUMERIC" or add_last_setting == "SYMBOLIC":
            add_last = True

        # Push the trajectory
        n_steps = len(steps) + 1
        last_step = n_steps - 1
235
236
237

        # First push the original system geometry
        # print(self.cache_service["atom_positions"])
238
239
240
241
242
243
244
245
246
        for i_step in range(n_steps):
            singleId = backend.openSection("section_single_configuration_calculation")
            systemId = backend.openSection("section_system")

            if self.traj_iterator is not None:
                if (i_step + 1) % each == 0 or (i_step == last_step and add_last):
                    try:
                        pos = next(self.traj_iterator)
                    except StopIteration:
247
248
249
250
251
252
253
                        logger.error(
                            "Could not get next geometry from an external"
                            " file. It seems that the number of optimization "
                            "steps in the CP2K output file doesn't match the "
                            "number of steps found in the external trajectory "
                            "file."
                        )
254
255
256
257
258
                    else:
                        backend.addArrayValues("atom_positions", pos, unit="angstrom")
            backend.closeSection("section_system", systemId)
            backend.closeSection("section_single_configuration_calculation", singleId)

259
        self.cache_service.addArrayValues("frame_sequence_local_frames_ref")
260
261
        backend.addValue("number_of_frames_in_sequence", n_steps)

262
263
264
    def onClose_section_sampling_method(self, backend, gIndex, section):
        self.backend.addValue("sampling_method", "geometry_optimization")

265
266
267
    def onClose_x_cp2k_section_quickstep_calculation(self, backend, gIndex, section):
        self.energy_reeval_quickstep = section

268
269
270
271
    def onClose_x_cp2k_section_geometry_optimization_step(self, backend, gIndex, section):
        energy = section["x_cp2k_optimization_energy"]
        if energy is not None:
            self.cache_service["frame_sequence_potential_energy"].append(energy[0])
272

273
    def onClose_section_system(self, backend, gIndex, section):
274
        self.cache_service.addArrayValues("simulation_cell", unit="angstrom")
275
        self.cache_service.addArrayValues("lattice_vectors", unit="angstrom")
276

277
278
    def onClose_section_method(self, backend, gIndex, section):
        traj_file = self.file_service.get_file_by_id("trajectory")
279
280
281
282
283
        traj_format = self.cache_service["trajectory_format"]
        if traj_format is not None and traj_file is not None:

            # Use special parsing for CP2K pdb files because they don't follow the proper syntax
            if traj_format == "PDB":
284
                self.traj_iterator = nomadcore.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END")
285
286
            else:
                try:
287
                    self.traj_iterator = nomadcore.configurationreading.iread(traj_file)
288
289
                except ValueError:
                    pass
290

291
292
293
    def onClose_section_single_configuration_calculation(self, backend, gIndex, section):
        self.cache_service["frame_sequence_local_frames_ref"].append(gIndex)

294
    #===========================================================================
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
    # adHoc functions
    def adHoc_geo_opt_converged(self):
        """Called when the geometry optimization converged.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", True)
        return wrapper

    def adHoc_geo_opt_not_converged(self):
        """Called when the geometry optimization did not converge.
        """
        def wrapper(parser):
            parser.backend.addValue("geometry_optimization_converged", False)
        return wrapper

310
311
312
313
    def adHoc_conjugate_gradient(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
314
            self.cache_service["geometry_optimization_method"] = "conjugate_gradient"
315
316
        return wrapper

317
318
319
320
    def adHoc_bfgs(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
321
            self.cache_service["geometry_optimization_method"] = "bfgs"
322
323
        return wrapper

324
    # def adHoc_save_energy_reeval_quickstep(self):
325
        # def wrapper(parser):
326
327
328
329
330
331
332
            # section_managers = parser.backend.sectionManagers
            # section_run_manager = section_managers["section_run"]
            # section_run = section_run_manager.openSections[0]
            # print section_run.subSectionValues
            # # quickstep = section_run.get_latest_value("x_cp2k_section_quickstep_calculation")
            # # print quickstep
            # # self.energy_reeval_quickstep = quickstep
333
        # return wrapper
334
335
336

    def debug(self):
        def wrapper(parser):
337
            print("DEBUG")
338
        return wrapper