geooptparser.py 17.4 KB
Newer Older
1
# Copyright 2015-2018 Lauri Himanen, Fawzi Mohamed, Ankit Kariryaa
2
#
3
4
5
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
10
11
12
13
14
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

15
16
from __future__ import print_function
from __future__ import absolute_import
17
18
from builtins import next
from builtins import range
19
from nomadcore.simple_parser import SimpleMatcher as SM
20
from nomadcore.baseclasses import MainHierarchicalParser
21
22
import nomadcore.configurationreading
import nomadcore.csvparsing
23
from .commonparser import CP2KCommonParser
24
from nomadcore.caching_backend import CachingLevel
25
26
27
28
29
30
31
32
import logging
logger = logging.getLogger("nomad")


class CP2KGeoOptParser(MainHierarchicalParser):
    """Used to parse the CP2K calculation with run types:
        -GEO_OPT/GEOMETRY_OPTIMIZATION
    """
33
    def __init__(self, parser_context):
34
35
        """
        """
36
        super(CP2KGeoOptParser, self).__init__(parser_context)
37
        self.setup_common_matcher(CP2KCommonParser(parser_context))
38
        self.traj_iterator = None
39
        self.energy_reeval_quickstep = None
40

41
        #=======================================================================
42
        # Globally cached values
43
44
45
46
        self.cache_service.add("number_of_frames_in_sequence", 0)
        self.cache_service.add("frame_sequence_potential_energy", [])
        self.cache_service.add("frame_sequence_local_frames_ref", [])
        self.cache_service.add("geometry_optimization_method")
47
48
        self.cache_service.add("geometry_optimization_converged")
        self.cache_service.add("geometry_opt_max_reached")
49
50
51

        #=======================================================================
        # Cache levels
52
        self.caching_levels.update({
53
54
55
            'x_cp2k_section_geometry_optimization_step': CachingLevel.ForwardAndCache,
            'x_cp2k_section_quickstep_calculation': CachingLevel.ForwardAndCache,
            'x_cp2k_section_geometry_optimization': CachingLevel.ForwardAndCache,
56
            # 'x_cp2k_section_geometry_optimization_energy_reevaluation': CachingLevel.ForwardAndCache,
57
58
59
60
        })

        #=======================================================================
        # SimpleMatchers
61
        self.geo_opt = SM(
62
            " ***                     STARTING GEOMETRY OPTIMIZATION                      ***".replace("*", "\*"),
63
            sections=["section_frame_sequence", "x_cp2k_section_geometry_optimization"],
64
            subMatchers=[
65
                SM( " ***                           CONJUGATE GRADIENTS                           ***".replace("*", "\*"),
66
                    adHoc=self.adHoc_conjugate_gradient(),
67
                ),
68
69
70
71
72
73
                SM( " ***                                   BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
                ),
                SM( " ***                                 L-BFGS                                  ***".replace("*", "\*"),
                    adHoc=self.adHoc_bfgs(),
                ),
74
75
76
77
78
79
                # SM( "",
                    # forwardMatch=True,
                    # sections=["section_single_configuration_calculation", "section_system", "x_cp2k_section_geometry_optimization_step"],
                    # subMatchers=[
                        # self.cm.quickstep_calculation(),
                        # SM( " --------  Informations at step"),
80
                        # SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.word)),
81
                        # SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.float)),
82
83
84
                    # ],
                    # adHoc=self.adHoc_step(),
                # ),
85
                SM( " OPTIMIZATION STEP:",
86
                    endReStr="  Conv. in RMS gradients     =",
87
88
                    name="geooptstep",
                    repeats=True,
89
                    subMatchers=[
90
91
                        SM( "",
                            forwardMatch=True,
92
                            sections=["x_cp2k_section_geometry_optimization_step"],
93
                            subMatchers=[
94
95
                                # SM( "",
                                    # forwardMatch=True,
96
                                    # endReStr=" ***                 MNBRACK - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.int),
97
98
99
100
101
102
103
104
105
106
107
108
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
                                # SM( "",
                                    # forwardMatch=True,
109
                                    # endReStr=" ***                 BRENT   - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.regexs.int),
110
111
112
113
114
115
116
117
118
119
                                    # subMatchers=[
                                        # SM(" SCF WAVEFUNCTION OPTIMIZATION",
                                            # forwardMatch=True,
                                            # repeats=True,
                                            # subMatchers=[
                                                # self.cm.quickstep_calculation(),
                                            # ]
                                        # )
                                    # ]
                                # ),
120
                                SM( " --------  Informations at step"),
121
                                SM( "  Optimization Method        =\s+(?P<x_cp2k_optimization_method>{})".format(self.regexs.word)),
122
                                SM( "  Total Energy               =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.regexs.float)),
123
124
125
126
                                SM( "  Real energy change         =\s+(?P<x_cp2k_optimization_energy_change__hartree>{})".format(self.regexs.float)),
                                SM( "  Decrease in energy         =\s+(?P<x_cp2k_optimization_energy_decrease>{})".format(self.regexs.word)),
                                SM( "  Used time                  =\s+(?P<x_cp2k_optimization_used_time>{})".format(self.regexs.float)),
                                SM( "  Max. step size             =\s+(?P<x_cp2k_optimization_max_step_size__bohr>{})".format(self.regexs.float)),
127
                                SM( "  Conv. limit for step size  =\s+(?P<x_cp2k_optimization_step_size_convergence_limit__bohr>{})".format(self.regexs.float)),
128
129
130
131
                                SM( "  Convergence in step size   =\s+(?P<x_cp2k_optimization_step_size_convergence>{})".format(self.regexs.word)),
                                SM( "  RMS step size              =\s+(?P<x_cp2k_optimization_rms_step_size__bohr>{})".format(self.regexs.float)),
                                SM( "  Convergence in RMS step    =\s+(?P<x_cp2k_optimization_rms_step_size_convergence>{})".format(self.regexs.word)),
                                SM( "  Max. gradient              =\s+(?P<x_cp2k_optimization_max_gradient__bohr_1hartree>{})".format(self.regexs.float)),
132
                                SM( "  Conv. limit for gradients  =\s+(?P<x_cp2k_optimization_gradient_convergence_limit__bohr_1hartree>{})".format(self.regexs.float)),
133
134
135
                                SM( "  Conv. for gradients        =\s+(?P<x_cp2k_optimization_max_gradient_convergence>{})".format(self.regexs.word)),
                                SM( "  RMS gradient               =\s+(?P<x_cp2k_optimization_rms_gradient__bohr_1hartree>{})".format(self.regexs.float)),
                                SM( "  Conv. in RMS gradients     =\s+(?P<x_cp2k_optimization_rms_gradient_convergence>{})".format(self.regexs.word)),
136
                            ],
137
                            # adHoc=self.adHoc_step()
138
139
                        ),
                    ]
140
141
                ),
                SM( " ***                    GEOMETRY OPTIMIZATION COMPLETED                      ***".replace("*", "\*"),
142
                    adHoc=self.adHoc_geo_opt_converged(),
143
144
145
                ),
                SM( " *** MAXIMUM NUMBER OF OPTIMIZATION STEPS REACHED ***".replace("*", "\*"),
                    adHoc=self.adHoc_geo_opt_max_steps_reached(),
146
                ),
147
                SM( "                    Reevaluating energy at the minimum",
148
                    # sections=["x_cp2k_section_geometry_optimization_energy_reevaluation"],
149
150
                    subMatchers=[
                        self.cm.quickstep_calculation(),
151
152
153
                        # SM("",
                            # adHoc=self.adHoc_save_energy_reeval_quickstep()
                        # )
154
                    ],
155
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
156
                ),
157
158
159
160
                # SM( "",
                    # forwardMatch=True,
                    # adHoc=self.adHoc_save_energy_reeval_quickstep()
                # )
161
            ],
162
163
164
165
166
167
168
        )

        # Compose root matcher according to the run type. This way the
        # unnecessary regex parsers will not be compiled and searched. Saves
        # computational time.
        self.root_matcher = SM("",
            forwardMatch=True,
169
            sections=["section_run", "section_sampling_method"],
170
            subMatchers=[
171
172
173
174
175
                SM( "",
                    forwardMatch=True,
                    sections=["section_method"],
                    subMatchers=[
                        self.cm.header(),
176
                        self.cm.quickstep_header(),
177
                    ],
178
                ),
179
180
                self.geo_opt,
                self.cm.footer(),
181
182
183
            ]
        )

184
185
186
187
188
189
    #===========================================================================
    # onOpen triggers
    def onOpen_section_frame_sequence(self, backend, gIndex, section):
        self.cache_service["geometry_optimization_converged"] = False
        self.cache_service["geometry_opt_max_reached"] = False

190
    #===========================================================================
191
    # onClose triggers
192
193
194
    def onClose_x_cp2k_section_geometry_optimization(self, backend, gIndex, section):

        # Get the re-evaluated energy and add it to frame_sequence_potential_energy
195
196
197
198
199
        reeval_quickstep = self.energy_reeval_quickstep
        if reeval_quickstep is not None:
            energy = reeval_quickstep.get_latest_value("x_cp2k_energy_total")
            if energy is not None:
                self.cache_service["frame_sequence_potential_energy"].append(energy)
200

201
        # Push values from cache
202
203
        self.cache_service.addArrayValues("frame_sequence_potential_energy")
        self.cache_service.addValue("geometry_optimization_method")
204
        self.backend.addValue("frame_sequence_to_sampling_ref", 0)
205

206
207
208
209
210
211
212
213
214
215
216
217
        # Get the optimization convergence criteria from the last optimization
        # step
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_step_size_convergence_limit"],
            "geometry_optimization_geometry_change",
        )
        section.add_latest_value([
            "x_cp2k_section_geometry_optimization_step",
            "x_cp2k_optimization_gradient_convergence_limit"],
            "geometry_optimization_threshold_force",
        )
218

219
220
221
222
223
224
225
226
        # Push the information into single configuration and system
        steps = section["x_cp2k_section_geometry_optimization_step"]
        each = self.cache_service["each_geo_opt"]
        add_last = False
        add_last_setting = self.cache_service["traj_add_last"]
        if add_last_setting == "NUMERIC" or add_last_setting == "SYMBOLIC":
            add_last = True

227
228
229
230
        # Determine number of steps. If the optimization was quit due to
        # maximum number of steps reached, the energy for the last step is not
        # calculated, and thus the final geometry is not reported. This is
        # detected and then the number of frames is adjusted.
231
        n_steps = len(steps) + 1
232
233
        if self.cache_service["geometry_opt_max_reached"]:
            n_steps -= 1
234
        last_step = n_steps - 1
235
236
237

        # First push the original system geometry
        # print(self.cache_service["atom_positions"])
238
239
240
241
242
243
244
245
246
        for i_step in range(n_steps):
            singleId = backend.openSection("section_single_configuration_calculation")
            systemId = backend.openSection("section_system")

            if self.traj_iterator is not None:
                if (i_step + 1) % each == 0 or (i_step == last_step and add_last):
                    try:
                        pos = next(self.traj_iterator)
                    except StopIteration:
247
248
249
250
251
252
253
                        logger.error(
                            "Could not get next geometry from an external"
                            " file. It seems that the number of optimization "
                            "steps in the CP2K output file doesn't match the "
                            "number of steps found in the external trajectory "
                            "file."
                        )
254
255
256
257
258
                    else:
                        backend.addArrayValues("atom_positions", pos, unit="angstrom")
            backend.closeSection("section_system", systemId)
            backend.closeSection("section_single_configuration_calculation", singleId)

259
        self.cache_service.addArrayValues("frame_sequence_local_frames_ref")
260
261
        backend.addValue("number_of_frames_in_sequence", n_steps)

262
263
264
    def onClose_section_frame_sequence(self, backend, gIndex, section):
        self.cache_service.addValue("geometry_optimization_converged")

265
266
267
    def onClose_section_sampling_method(self, backend, gIndex, section):
        self.backend.addValue("sampling_method", "geometry_optimization")

268
269
270
    def onClose_x_cp2k_section_quickstep_calculation(self, backend, gIndex, section):
        self.energy_reeval_quickstep = section

271
272
273
274
    def onClose_x_cp2k_section_geometry_optimization_step(self, backend, gIndex, section):
        energy = section["x_cp2k_optimization_energy"]
        if energy is not None:
            self.cache_service["frame_sequence_potential_energy"].append(energy[0])
275

276
    def onClose_section_system(self, backend, gIndex, section):
277
        self.cache_service.addArrayValues("simulation_cell", unit="angstrom")
278
        self.cache_service.addArrayValues("lattice_vectors", unit="angstrom")
279

280
281
    def onClose_section_method(self, backend, gIndex, section):
        traj_file = self.file_service.get_file_by_id("trajectory")
282
283
284
285
286
        traj_format = self.cache_service["trajectory_format"]
        if traj_format is not None and traj_file is not None:

            # Use special parsing for CP2K pdb files because they don't follow the proper syntax
            if traj_format == "PDB":
287
                self.traj_iterator = nomadcore.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END")
288
289
            else:
                try:
290
                    self.traj_iterator = nomadcore.configurationreading.iread(traj_file)
291
292
                except ValueError:
                    pass
293

294
295
296
    def onClose_section_single_configuration_calculation(self, backend, gIndex, section):
        self.cache_service["frame_sequence_local_frames_ref"].append(gIndex)

297
    #===========================================================================
298
299
300
301
302
    # adHoc functions
    def adHoc_geo_opt_converged(self):
        """Called when the geometry optimization converged.
        """
        def wrapper(parser):
303
            self.cache_service["geometry_optimization_converged"] = True
304
305
        return wrapper

306
307
    def adHoc_geo_opt_max_steps_reached(self):
        """Called when the geometry optimization converged.
308
309
        """
        def wrapper(parser):
310
            self.cache_service["geometry_opt_max_reached"] = True
311
312
        return wrapper

313
314
315
316
    def adHoc_conjugate_gradient(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
317
            self.cache_service["geometry_optimization_method"] = "conjugate_gradient"
318
319
        return wrapper

320
321
322
323
    def adHoc_bfgs(self):
        """Called when conjugate gradient method is used.
        """
        def wrapper(parser):
324
            self.cache_service["geometry_optimization_method"] = "bfgs"
325
326
        return wrapper

327
    # def adHoc_save_energy_reeval_quickstep(self):
328
        # def wrapper(parser):
329
330
331
332
333
334
335
            # section_managers = parser.backend.sectionManagers
            # section_run_manager = section_managers["section_run"]
            # section_run = section_run_manager.openSections[0]
            # print section_run.subSectionValues
            # # quickstep = section_run.get_latest_value("x_cp2k_section_quickstep_calculation")
            # # print quickstep
            # # self.energy_reeval_quickstep = quickstep
336
        # return wrapper
337
338
339

    def debug(self):
        def wrapper(parser):
340
            print("DEBUG")
341
        return wrapper