geooptparser.py 17.2 KB
Newer Older
1
# Copyright 2015-2018 Lauri Himanen, Fawzi Mohamed, Ankit Kariryaa
2
#
3
4
5
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
10
11
12
13
14
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

15
16
from __future__ import print_function
from __future__ import absolute_import
17
18
from builtins import next
from builtins import range
19
from nomadcore.simple_parser import SimpleMatcher as SM
20
from nomadcore.baseclasses import MainHierarchicalParser
21
22
import nomadcore.configurationreading
import nomadcore.csvparsing
23
from .commonparser import CP2KCommonParser
24
from nomadcore.caching_backend import CachingLevel
25
26
27
28
29
30
31
32
import logging
logger = logging.getLogger("nomad")


class CP2KGeoOptParser(MainHierarchicalParser):
    """Used to parse the CP2K calculation with run types:
        -GEO_OPT/GEOMETRY_OPTIMIZATION
    """
33
    def __init__(self, parser_context):
34
35
        """
        """
36
        super(CP2KGeoOptParser, self).__init__(parser_context)
37
        self.setup_common_matcher(CP2KCommonParser(parser_context))
38
        self.traj_iterator = None
39
        self.energy_reeval_quickstep = None
40

41
        #=======================================================================
42
        # Globally cached values
43
        self.cache_service.add("number_of_frames_in_sequence", 0)
44
        self.cache_service.add("frame_sequence_to_frames_ref", [])
45
        self.cache_service.add("geometry_optimization_method")
46
47
        self.cache_service.add("geometry_optimization_converged")
        self.cache_service.add("geometry_opt_max_reached")
48
49
50

        #=======================================================================
        # Cache levels
51
        self.caching_levels.update({
52
53
54
            'x_cp2k_section_geometry_optimization_step': CachingLevel.ForwardAndCache,
            'x_cp2k_section_quickstep_calculation': CachingLevel.ForwardAndCache,
            'x_cp2k_section_geometry_optimization': CachingLevel.ForwardAndCache,
55
            # 'x_cp2k_section_geometry_optimization_energy_reevaluation': CachingLevel.ForwardAndCache,
56
57
58
59
        })

        #=======================================================================
        # SimpleMatchers
60
        self.geo_opt = SM(
61
            " ***                     STARTING GEOMETRY OPTIMIZATION                      ***".replace("*", "\*"),
62
            sections=["section_frame_sequence", "x_cp2k_section_geometry_optimization"],
63
            subMatchers=[
64
                SM( " ***                           CONJUGATE GRADIENTS                           ***".replace("*", "\*"),
65
                    adHoc=self.adHoc_conjugate_gradient(),
66
                ),
67
68
69
70
71
72
                SM( " ***                                   BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
                ),
                SM( " ***                                 L-BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
                ),
73
74
75
76
77
78
                # SM( "",
                    # forwardMatch=True,
                    # sections=["section_single_configuration_calculation", "section_system", "x_cp2k_section_geometry_optimization_step"],
                    # subMatchers=[
                        # self.cm.quickstep_calculation(),
                        # SM( " --------  Informations at step"),
79
                        # SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.word)),
80
                        # SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.float)),
81
82
83
                    # ],
                    # adHoc=self.adHoc_step(),
                # ),
84
                SM( " OPTIMIZATION STEP:",
85
                    endReStr="  Conv. in RMS gradients     =",
86
87
                    name="geooptstep",
                    repeats=True,
88
                    subMatchers=[
89
90
                        SM( "",
                            forwardMatch=True,
91
                            sections=["x_cp2k_section_geometry_optimization_step"],
92
                            subMatchers=[
93
94
                                # SM( "",
                                    # forwardMatch=True,
95
                                    # endReStr=" ***                 MNBRACK - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.int),
96
97
98
99
100
101
102
103
104
105
106
107
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
                                # SM( "",
                                    # forwardMatch=True,
108
                                    # endReStr=" ***                 BRENT   - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.int),
109
110
111
112
113
114
115
116
117
118
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
119
                                SM( " --------  Informations at step"),
120
                                SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.word)),
121
                                SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.float)),
122
123
124
125
                                SM( "  Real energy change         =\s+(?P<x_cp2k_optimization_energy_change__hartree>{})".format(self.regexs.float)),
                                SM( "  Decrease in energy         =\s+(?P<x_cp2k_optimization_energy_decrease>{})".format(self.regexs.word)),
                                SM( "  Used time                  =\s+(?P<x_cp2k_optimization_used_time>{})".format(self.regexs.float)),
                                SM( "  Max. step size             =\s+(?P<x_cp2k_optimization_max_step_size__bohr>{})".format(self.regexs.float)),
126
                                SM( "  Conv. limit for step size  =\s+(?P<x_cp2k_optimization_step_size_convergence_limit__bohr>{})".format(self.regexs.float)),
127
128
129
130
                                SM( "  Convergence in step size   =\s+(?P<x_cp2k_optimization_step_size_convergence>{})".format(self.regexs.word)),
                                SM( "  RMS step size              =\s+(?P<x_cp2k_optimization_rms_step_size__bohr>{})".format(self.regexs.float)),
                                SM( "  Convergence in RMS step    =\s+(?P<x_cp2k_optimization_rms_step_size_convergence>{})".format(self.regexs.word)),
                                SM( "  Max. gradient              =\s+(?P<x_cp2k_optimization_max_gradient__bohr_1hartree>{})".format(self.regexs.float)),
131
                                SM( "  Conv. limit for gradients  =\s+(?P<x_cp2k_optimization_gradient_convergence_limit__bohr_1hartree>{})".format(self.regexs.float)),
132
133
134
                                SM( "  Conv. for gradients        =\s+(?P<x_cp2k_optimization_max_gradient_convergence>{})".format(self.regexs.word)),
                                SM( "  RMS gradient               =\s+(?P<x_cp2k_optimization_rms_gradient__bohr_1hartree>{})".format(self.regexs.float)),
                                SM( "  Conv. in RMS gradients     =\s+(?P<x_cp2k_optimization_rms_gradient_convergence>{})".format(self.regexs.word)),
135
                            ],
136
                            # adHoc=self.adHoc_step()
137
138
                        ),
                    ]
139
140
                ),
                SM( " ***                    GEOMETRY OPTIMIZATION COMPLETED                      ***".replace("*", "\*"),
141
                    adHoc=self.adHoc_geo_opt_converged(),
142
143
144
                ),
                SM( " *** MAXIMUM NUMBER OF OPTIMIZATION STEPS REACHED ***".replace("*", "\*"),
                    adHoc=self.adHoc_geo_opt_max_steps_reached(),
145
                ),
146
                SM( "                    Reevaluating energy at the minimum",
147
                    # sections=["x_cp2k_section_geometry_optimization_energy_reevaluation"],
148
149
                    subMatchers=[
                        self.cm.quickstep_calculation(),
150
151
152
                        # SM("",
                            # adHoc=self.adHoc_save_energy_reeval_quickstep()
                        # )
153
                    ],
154
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
155
                ),
156
157
158
159
                # SM( "",
                    # forwardMatch=True,
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
                # )
160
            ],
161
162
163
164
165
166
167
        )

        # Compose root matcher according to the run type. This way the
        # unnecessary regex parsers will not be compiled and searched. Saves
        # computational time.
        self.root_matcher = SM("",
            forwardMatch=True,
168
            sections=["section_run", "section_sampling_method"],
169
            subMatchers=[
170
171
172
173
174
                SM( "",
                    forwardMatch=True,
                    sections=["section_method"],
                    subMatchers=[
                        self.cm.header(),
175
                        self.cm.quickstep_header(),
176
                    ],
177
                ),
178
179
                self.geo_opt,
                self.cm.footer(),
180
181
182
            ]
        )

183
184
185
186
187
188
    #===========================================================================
    # onOpen triggers
    def onOpen_section_frame_sequence(self, backend, gIndex, section):
        self.cache_service["geometry_optimization_converged"] = False
        self.cache_service["geometry_opt_max_reached"] = False

189
    #===========================================================================
190
    # onClose triggers
191
192
    def onClose_x_cp2k_section_geometry_optimization(self, backend, gIndex, section):

193
        # Get the re-evaluated energy and add it to potential_energy
194
195
196
197
        reeval_quickstep = self.energy_reeval_quickstep
        if reeval_quickstep is not None:
            energy = reeval_quickstep.get_latest_value("x_cp2k_energy_total")
            if energy is not None:
198
                self.backend.addValue("potential_energy", energy)
199

200
        # Push values from cache
201
        self.cache_service.addValue("geometry_optimization_method")
202
        self.backend.addValue("frame_sequence_to_sampling_method_ref", 0)
203

204
205
206
207
208
209
210
211
212
213
214
215
        # Get the optimization convergence criteria from the last optimization
        # step
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_step_size_convergence_limit"],
            "geometry_optimization_geometry_change",
        )
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_gradient_convergence_limit"],
            "geometry_optimization_threshold_force",
        )
216

217
218
219
220
221
222
223
224
        # Push the information into single configuration and system
        steps = section["x_cp2k_section_geometry_optimization_step"]
        each = self.cache_service["each_geo_opt"]
        add_last = False
        add_last_setting = self.cache_service["traj_add_last"]
        if add_last_setting == "NUMERIC" or add_last_setting == "SYMBOLIC":
            add_last = True

225
226
227
228
        # Determine number of steps. If the optimization was quit due to
        # maximum number of steps reached, the energy for the last step is not
        # calculated, and thus the final geometry is not reported. This is
        # detected and then the number of frames is adjusted.
229
        n_steps = len(steps) + 1
230
231
        if self.cache_service["geometry_opt_max_reached"]:
            n_steps -= 1
232
        last_step = n_steps - 1
233
234
235

        # First push the original system geometry
        # print(self.cache_service["atom_positions"])
236
237
238
239
240
241
242
243
244
        for i_step in range(n_steps):
            singleId = backend.openSection("section_single_configuration_calculation")
            systemId = backend.openSection("section_system")

            if self.traj_iterator is not None:
                if (i_step + 1) % each == 0 or (i_step == last_step and add_last):
                    try:
                        pos = next(self.traj_iterator)
                    except StopIteration:
245
246
247
248
249
250
251
                        logger.error(
                            "Could not get next geometry from an external"
                            " file. It seems that the number of optimization "
                            "steps in the CP2K output file doesn't match the "
                            "number of steps found in the external trajectory "
                            "file."
                        )
252
253
254
255
256
                    else:
                        backend.addArrayValues("atom_positions", pos, unit="angstrom")
            backend.closeSection("section_system", systemId)
            backend.closeSection("section_single_configuration_calculation", singleId)

257
        self.cache_service.addArrayValues("frame_sequence_to_frames_ref")
258
259
        backend.addValue("number_of_frames_in_sequence", n_steps)

260
261
262
    def onClose_section_frame_sequence(self, backend, gIndex, section):
        self.cache_service.addValue("geometry_optimization_converged")

263
264
265
    def onClose_section_sampling_method(self, backend, gIndex, section):
        self.backend.addValue("sampling_method", "geometry_optimization")

266
267
268
    def onClose_x_cp2k_section_quickstep_calculation(self, backend, gIndex, section):
        self.energy_reeval_quickstep = section

269
270
271
    def onClose_x_cp2k_section_geometry_optimization_step(self, backend, gIndex, section):
        energy = section["x_cp2k_optimization_energy"]
        if energy is not None:
272
            self.backend.addValue("potential_energy",energy[0])
273

274
    def onClose_section_system(self, backend, gIndex, section):
275
        self.cache_service.addArrayValues("simulation_cell", unit="angstrom")
276
        self.cache_service.addArrayValues("lattice_vectors", unit="angstrom")
277

278
279
    def onClose_section_method(self, backend, gIndex, section):
        traj_file = self.file_service.get_file_by_id("trajectory")
280
281
282
283
284
        traj_format = self.cache_service["trajectory_format"]
        if traj_format is not None and traj_file is not None:

            # Use special parsing for CP2K pdb files because they don't follow the proper syntax
            if traj_format == "PDB":
285
                self.traj_iterator = nomadcore.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END")
286
287
            else:
                try:
288
                    self.traj_iterator = nomadcore.configurationreading.iread(traj_file)
289
290
                except ValueError:
                    pass
291

292
    def onClose_section_single_configuration_calculation(self, backend, gIndex, section):
293
        self.cache_service["frame_sequence_to_frames_ref"].append(gIndex)
294

295
    #===========================================================================
296
297
298
299
300
    # adHoc functions
    def adHoc_geo_opt_converged(self):
        """Called when the geometry optimization converged.
        """
        def wrapper(parser):
301
            self.cache_service["geometry_optimization_converged"] = True
302
303
        return wrapper

304
305
    def adHoc_geo_opt_max_steps_reached(self):
        """Called when the geometry optimization converged.
306
307
        """
        def wrapper(parser):
308
            self.cache_service["geometry_opt_max_reached"] = True
309
310
        return wrapper

311
312
313
314
    def adHoc_conjugate_gradient(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
315
            self.cache_service["geometry_optimization_method"] = "conjugate_gradient"
316
317
        return wrapper

318
319
320
321
    def adHoc_bfgs(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
322
            self.cache_service["geometry_optimization_method"] = "bfgs"
323
324
        return wrapper

325
    # def adHoc_save_energy_reeval_quickstep(self):
326
        # def wrapper(parser):
327
328
329
330
331
332
333
            # section_managers = parser.backend.sectionManagers
            # section_run_manager = section_managers["section_run"]
            # section_run = section_run_manager.openSections[0]
            # print section_run.subSectionValues
            # # quickstep = section_run.get_latest_value("x_cp2k_section_quickstep_calculation")
            # # print quickstep
            # # self.energy_reeval_quickstep = quickstep
334
        # return wrapper
335
336
337

    def debug(self):
        def wrapper(parser):
338
            print("DEBUG")
339
        return wrapper