commonparser.py 33.4 KB
Newer Older
1
from __future__ import absolute_import
2
from builtins import str
3
4
5
import re
import numpy as np
import logging
6
7
from nomadcore.simple_parser import SimpleMatcher as SM
from nomadcore.caching_backend import CachingLevel
Lauri Himanen's avatar
Lauri Himanen committed
8
from nomadcore.unit_conversion.unit_conversion import convert_unit
9
from nomadcore.baseclasses import CommonParser
10
from .inputparser import CP2KInputParser
11
12
13
14
logger = logging.getLogger("nomad")


#===============================================================================
15
class CP2KCommonParser(CommonParser):
16
17
18
19
20
21
    """
    This class is used to store and instantiate common parts of the
    hierarchical SimpleMatcher structure used in the parsing of a CP2K
    output file.
    """
    def __init__(self, parser_context):
22
        super(CP2KCommonParser, self).__init__(parser_context)
23
24
        self.section_method_index = None
        self.section_system_index = None
25
26
        self.test_electronic_structure_method = "DFT"
        self.basis_to_kind_mapping = []
27

28
29
        #=======================================================================
        # Cache levels
30
        self.caching_levels = {
31
            'x_cp2k_atoms': CachingLevel.ForwardAndCache,
32
33
            'section_XC_functionals': CachingLevel.ForwardAndCache,
            'self_interaction_correction_method': CachingLevel.Cache,
34
            'x_cp2k_section_program_information': CachingLevel.ForwardAndCache,
35
            'x_cp2k_section_quickstep_settings': CachingLevel.ForwardAndCache,
36
37
            'x_cp2k_section_atomic_kind': CachingLevel.ForwardAndCache,
            'x_cp2k_section_kind_basis_set': CachingLevel.ForwardAndCache,
38
39
        }

40
        #=======================================================================
41
        # Globally cached values
42
        self.cache_service.add("simulation_cell", single=False, update=False)
43
44
45
46
        self.cache_service.add("number_of_scf_iterations", 0)
        self.cache_service.add("atom_positions", single=False, update=True)
        self.cache_service.add("atom_labels", single=False, update=False)
        self.cache_service.add("number_of_atoms", single=False, update=False)
47

48
    #===========================================================================
49
    # SimpleMatchers
50
51
52

    # SimpleMatcher for the header that is common to all run types
    def header(self):
53
        return SM( " DBCSR\| Multiplication driver",
54
55
            forwardMatch=True,
            subMatchers=[
56
57
                SM( " DBCSR\| Multiplication driver",
                    forwardMatch=True,
58
                    sections=['x_cp2k_section_dbcsr'],
59
                    subMatchers=[
60
61
62
63
64
65
66
67
68
                        SM( " DBCSR\| Multiplication driver\s+(?P<x_cp2k_dbcsr_multiplication_driver>{})".format(self.regexs.regex_word)),
                        SM( " DBCSR\| Multrec recursion limit\s+(?P<x_cp2k_dbcsr_multrec_recursion_limit>{})".format(self.regexs.regex_i)),
                        SM( " DBCSR\| Multiplication stack size\s+(?P<x_cp2k_dbcsr_multiplication_stack_size>{})".format(self.regexs.regex_i)),
                        SM( " DBCSR\| Multiplication size stacks\s+(?P<x_cp2k_dbcsr_multiplication_size_stacks>{})".format(self.regexs.regex_i)),
                        SM( " DBCSR\| Use subcommunicators\s+(?P<x_cp2k_dbcsr_use_subcommunicators>{})".format(self.regexs.regex_letter)),
                        SM( " DBCSR\| Use MPI combined types\s+(?P<x_cp2k_dbcsr_use_mpi_combined_types>{})".format(self.regexs.regex_letter)),
                        SM( " DBCSR\| Use MPI memory allocation\s+(?P<x_cp2k_dbcsr_use_mpi_memory_allocation>{})".format(self.regexs.regex_letter)),
                        SM( " DBCSR\| Use Communication thread\s+(?P<x_cp2k_dbcsr_use_communication_thread>{})".format(self.regexs.regex_letter)),
                        SM( " DBCSR\| Communication thread load\s+(?P<x_cp2k_dbcsr_communication_thread_load>{})".format(self.regexs.regex_i)),
69
                    ]
70
                ),
71
72
                SM( "  **** **** ******  **  PROGRAM STARTED AT".replace("*", "\*"),
                    forwardMatch=True,
73
                    sections=['x_cp2k_section_startinformation'],
74
                    subMatchers=[
75
76
77
78
                        SM( "  **** **** ******  **  PROGRAM STARTED AT\s+(?P<x_cp2k_start_time>{})".replace("*", "\*").format(self.regexs.regex_eol)),
                        SM( " ***** ** ***  *** **   PROGRAM STARTED ON\s+(?P<x_cp2k_start_host>{})".replace("*", "\*").format(self.regexs.regex_word)),
                        SM( " **    ****   ******    PROGRAM STARTED BY\s+(?P<x_cp2k_start_user>{})".replace("*", "\*").format(self.regexs.regex_word)),
                        SM( " ***** **    ** ** **   PROGRAM PROCESS ID\s+(?P<x_cp2k_start_id>{})".replace("*", "\*").format(self.regexs.regex_i)),
79
80
81
82
83
                        SM( "  **** **  *******  **  PROGRAM STARTED IN".replace("*", "\*"),
                            forwardMatch=True,
                            adHoc=self.adHoc_run_dir(),
                        )
                    ]
84
                ),
85
                SM( " CP2K\| version string:",
86
                    sections=['x_cp2k_section_program_information'],
87
88
                    forwardMatch=True,
                    subMatchers=[
89
                        SM( " CP2K\| version string:\s+(?P<program_version>{})".format(self.regexs.regex_eol)),
90
                        SM( " CP2K\| source code revision number:\s+svn:(?P<x_cp2k_svn_revision>\d+)"),
91
92
93
94
95
                        SM( " CP2K\| is freely available from{}".format(self.regexs.regex_eol)),
                        SM( " CP2K\| Program compiled at\s+(?P<x_cp2k_program_compilation_datetime>{})".format(self.regexs.regex_eol)),
                        SM( " CP2K\| Program compiled on\s+(?P<program_compilation_host>{})".format(self.regexs.regex_eol)),
                        SM( " CP2K\| Program compiled for{}".format(self.regexs.regex_eol)),
                        SM( " CP2K\| Input file name\s+(?P<x_cp2k_input_filename>{})".format(self.regexs.regex_eol)),
96
97
                    ]
                ),
98
99
100
101
                SM( " GLOBAL\|",
                    sections=['x_cp2k_section_global_settings'],
                    subMatchers=[
                        SM( " GLOBAL\| Force Environment number"),
102
103
104
105
106
107
                        SM( " GLOBAL\| Basis set file name\s+(?P<x_cp2k_basis_set_filename>{})".format(self.regexs.regex_eol)),
                        SM( " GLOBAL\| Geminal file name\s+(?P<x_cp2k_geminal_filename>{})".format(self.regexs.regex_eol)),
                        SM( " GLOBAL\| Potential file name\s+(?P<x_cp2k_potential_filename>{})".format(self.regexs.regex_eol)),
                        SM( " GLOBAL\| MM Potential file name\s+(?P<x_cp2k_mm_potential_filename>{})".format(self.regexs.regex_eol)),
                        SM( " GLOBAL\| Coordinate file name\s+(?P<x_cp2k_coordinate_filename>{})".format(self.regexs.regex_eol)),
                        SM( " GLOBAL\| Method name\s+(?P<x_cp2k_method_name>{})".format(self.regexs.regex_eol)),
108
                        SM( " GLOBAL\| Project name"),
109
110
111
                        SM( " GLOBAL\| Preferred FFT library\s+(?P<x_cp2k_preferred_fft_library>{})".format(self.regexs.regex_eol)),
                        SM( " GLOBAL\| Preferred diagonalization lib.\s+(?P<x_cp2k_preferred_diagonalization_library>{})".format(self.regexs.regex_eol)),
                        SM( " GLOBAL\| Run type\s+(?P<x_cp2k_run_type>{})".format(self.regexs.regex_eol)),
112
113
114
115
116
117
118
                        SM( " GLOBAL\| All-to-all communication in single precision"),
                        SM( " GLOBAL\| FFTs using library dependent lengths"),
                        SM( " GLOBAL\| Global print level"),
                        SM( " GLOBAL\| Total number of message passing processes"),
                        SM( " GLOBAL\| Number of threads for this process"),
                        SM( " GLOBAL\| This output is from process"),
                    ],
119
120
121
122
123
124
125
126
127
                    otherMetaInfo=[
                        "section_XC_functionals",
                        'XC_functional_name',
                        'XC_functional_weight',
                        'XC_functional',
                        'configuration_periodic_dimensions',
                        "stress_tensor_method",
                        "atom_positions",
                    ],
128
129
                ),
                SM( " CELL\|",
130
                    adHoc=self.adHoc_x_cp2k_section_cell(),
131
132
                    otherMetaInfo=["simulation_cell"]
                ),
133
134
135
136
            ]
        )

    # SimpleMatcher for an SCF wavefunction optimization
137
    def quickstep_calculation(self):
138
        return SM( " SCF WAVEFUNCTION OPTIMIZATION",
139
            sections=["x_cp2k_section_quickstep_calculation"],
140
141
            subMatchers=[
                SM( r"  Trace\(PS\):",
142
                    sections=["x_cp2k_section_scf_iteration"],
143
144
                    repeats=True,
                    subMatchers=[
145
146
                        SM( r"  Exchange-correlation energy:\s+(?P<x_cp2k_energy_XC_scf_iteration__hartree>{})".format(self.regexs.regex_f)),
                        SM( r"\s+\d+\s+\S+\s+{0}\s+{0}\s+{0}\s+(?P<x_cp2k_energy_total_scf_iteration__hartree>{0})\s+(?P<x_cp2k_energy_change_scf_iteration__hartree>{0})".format(self.regexs.regex_f)),
147
148
149
150
151
152
153
154
155
156
                    ]
                ),
                SM( r"  \*\*\* SCF run converged in\s+(\d+) steps \*\*\*",
                    otherMetaInfo=["single_configuration_calculation_converged"],
                    adHoc=self.adHoc_single_point_converged()
                ),
                SM( r"  \*\*\* SCF run NOT converged \*\*\*",
                    otherMetaInfo=["single_configuration_calculation_converged"],
                    adHoc=self.adHoc_single_point_not_converged()
                ),
157
                SM( r"  Electronic kinetic energy:\s+(?P<x_cp2k_electronic_kinetic_energy__hartree>{})".format(self.regexs.regex_f)),
158
                SM( r" **************************** NUMERICAL STRESS ********************************".replace("*", "\*"),
159
                    # endReStr=" **************************** NUMERICAL STRESS END *****************************".replace("*", "\*"),
160
161
                    adHoc=self.adHoc_stress_calculation(),
                ),
162
                SM( r" ENERGY\| Total FORCE_EVAL \( \w+ \) energy \(a\.u\.\):\s+(?P<x_cp2k_energy_total__hartree>{0})".format(self.regexs.regex_f),
163
164
                    otherMetaInfo=["energy_total"],
                ),
165
166
167
                SM( r" ATOMIC FORCES in \[a\.u\.\]"),
                SM( r" # Atom   Kind   Element          X              Y              Z",
                    adHoc=self.adHoc_atom_forces(),
168
                    otherMetaInfo=["atom_forces", "x_cp2k_atom_forces"],
169
170
                ),
                SM( r" (?:NUMERICAL )?STRESS TENSOR \[GPa\]",
171
                    sections=["x_cp2k_section_stress_tensor"],
172
173
                    subMatchers=[
                        SM( r"\s+X\s+Y\s+Z",
174
175
                            adHoc=self.adHoc_stress_tensor(),
                            otherMetaInfo=["stress_tensor", "section_stress_tensor"],
176
                        ),
177
178
                        SM( "  1/3 Trace\(stress tensor\):\s+(?P<x_cp2k_stress_tensor_one_third_of_trace__GPa>{})".format(self.regexs.regex_f)),
                        SM( "  Det\(stress tensor\)\s+:\s+(?P<x_cp2k_stress_tensor_determinant__GPa3>{})".format(self.regexs.regex_f)),
179
180
                        SM( " EIGENVECTORS AND EIGENVALUES OF THE STRESS TENSOR",
                            adHoc=self.adHoc_stress_tensor_eigenpairs()),
181
182
183
184
185
                    ]
                )
            ]
        )

186
    # SimpleMatcher the stuff that is done to initialize a quickstep calculation
187
    def quickstep_header(self):
188
        return SM( " *******************************************************************************".replace("*", "\*"),
189
            forwardMatch=True,
190
            sections=["x_cp2k_section_quickstep_settings"],
191
            subMatchers=[
192
193
194
                SM( " DFT\|",
                    forwardMatch=True,
                    subMatchers=[
195
196
197
198
                        SM( " DFT\| Spin restricted Kohn-Sham (RKS) calculation\s+(?P<x_cp2k_spin_restriction>{})".format(self.regexs.regex_word)),
                        SM( " DFT\| Multiplicity\s+(?P<spin_target_multiplicity>{})".format(self.regexs.regex_i)),
                        SM( " DFT\| Number of spin states\s+(?P<number_of_spin_channels>{})".format(self.regexs.regex_i)),
                        SM( " DFT\| Charge\s+(?P<total_charge>{})".format(self.regexs.regex_i)),
199
200
201
202
                        SM( " DFT\| Self-interaction correction \(SIC\)\s+(?P<self_interaction_correction_method>[^\n]+)"),
                    ],
                    otherMetaInfo=["self_interaction_correction_method"],
                ),
203
204
205
206
207
208
                SM( " DFT\+U\|",
                    adHoc=self.adHoc_dft_plus_u(),
                ),
                SM( " QS\|",
                    forwardMatch=True,
                    subMatchers=[
209
210
211
212
213
214
215
216
217
218
                        SM( " QS\| Method:\s+(?P<x_cp2k_quickstep_method>{})".format(self.regexs.regex_word)),
                        SM( " QS\| Density plane wave grid type\s+{}".format(self.regexs.regex_eol)),
                        SM( " QS\| Number of grid levels:\s+{}".format(self.regexs.regex_i)),
                        SM( " QS\| Density cutoff \[a\.u\.\]:\s+(?P<x_cp2k_planewave_cutoff>{})".format(self.regexs.regex_f)),
                        SM( " QS\| Multi grid cutoff \[a\.u\.\]: 1\) grid level\s+{}".format(self.regexs.regex_f)),
                        SM( " QS\|                           2\) grid level\s+{}".format(self.regexs.regex_f)),
                        SM( " QS\|                           3\) grid level\s+{}".format(self.regexs.regex_f)),
                        SM( " QS\|                           4\) grid level\s+{}".format(self.regexs.regex_f)),
                        SM( " QS\| Grid level progression factor:\s+{}".format(self.regexs.regex_f)),
                        SM( " QS\| Relative density cutoff \[a\.u\.\]:".format(self.regexs.regex_f)),
219
                        SM( " QS\| Consistent realspace mapping and integration"),
220
221
222
223
224
225
226
227
                        SM( " QS\| Interaction thresholds: eps_pgf_orb:\s+{}".format(self.regexs.regex_f)),
                        SM( " QS\|                         eps_filter_matrix:\s+{}".format(self.regexs.regex_f)),
                        SM( " QS\|                         eps_core_charge:\s+{}".format(self.regexs.regex_f)),
                        SM( " QS\|                         eps_rho_gspace:\s+{}".format(self.regexs.regex_f)),
                        SM( " QS\|                         eps_rho_rspace:\s+{}".format(self.regexs.regex_f)),
                        SM( " QS\|                         eps_gvg_rspace:\s+{}".format(self.regexs.regex_f)),
                        SM( " QS\|                         eps_ppl:\s+{}".format(self.regexs.regex_f)),
                        SM( " QS\|                         eps_ppnl:\s+{}".format(self.regexs.regex_f)),
228
229
230
231
232
                    ],
                ),
                SM( " ATOMIC KIND INFORMATION",
                    sections=["x_cp2k_section_atomic_kinds", "section_method_basis_set"],
                    subMatchers=[
233
                        SM( "\s+(?P<x_cp2k_kind_number>{0})\. Atomic kind: (?P<x_cp2k_kind_element_symbol>{1})\s+Number of atoms:\s+(?P<x_cp2k_kind_number_of_atoms>{1})".format(self.regexs.regex_i, self.regexs.regex_word),
234
235
236
                            repeats=True,
                            sections=["x_cp2k_section_atomic_kind", "x_cp2k_section_kind_basis_set"],
                            subMatchers=[
237
238
239
240
241
242
243
                                SM( "     Orbital Basis Set\s+(?P<x_cp2k_kind_basis_set_name>{})".format(self.regexs.regex_word)),
                                SM( "       Number of orbital shell sets:\s+(?P<x_cp2k_basis_set_number_of_orbital_shell_sets>{})".format(self.regexs.regex_i)),
                                SM( "       Number of orbital shells:\s+(?P<x_cp2k_basis_set_number_of_orbital_shells>{})".format(self.regexs.regex_i)),
                                SM( "       Number of primitive Cartesian functions:\s+(?P<x_cp2k_basis_set_number_of_primitive_cartesian_functions>{})".format(self.regexs.regex_i)),
                                SM( "       Number of Cartesian basis functions:\s+(?P<x_cp2k_basis_set_number_of_cartesian_basis_functions>{})".format(self.regexs.regex_i)),
                                SM( "       Number of spherical basis functions:\s+(?P<x_cp2k_basis_set_number_of_spherical_basis_functions>{})".format(self.regexs.regex_i)),
                                SM( "       Norm type:\s+(?P<x_cp2k_basis_set_norm_type>{})".format(self.regexs.regex_i)),
244
245
246
247
                            ]
                        )
                    ]
                ),
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
                SM( "  Total number of",
                    forwardMatch=True,
                    sections=["x_cp2k_section_total_numbers"],
                    subMatchers=[
                        SM( "  Total number of            - Atomic kinds:\s+(?P<x_cp2k_atomic_kinds>\d+)"),
                        SM( "\s+- Atoms:\s+(?P<x_cp2k_atoms>\d+)",
                            otherMetaInfo=["number_of_atoms"],
                        ),
                        SM( "\s+- Shell sets:\s+(?P<x_cp2k_shell_sets>\d+)"),
                        SM( "\s+- Shells:\s+(?P<x_cp2k_shells>\d+)"),
                        SM( "\s+- Primitive Cartesian functions:\s+(?P<x_cp2k_primitive_cartesian_functions>\d+)"),
                        SM( "\s+- Cartesian basis functions:\s+(?P<x_cp2k_cartesian_basis_functions>\d+)"),
                        SM( "\s+- Spherical basis functions:\s+(?P<x_cp2k_spherical_basis_functions>\d+)"),
                    ]
                ),
                SM( " Maximum angular momentum of",
                    forwardMatch=True,
                    sections=["x_cp2k_section_maximum_angular_momentum"],
                    subMatchers=[
                        SM( "  Maximum angular momentum of- Orbital basis functions::\s+(?P<x_cp2k_orbital_basis_functions>\d+)"),
                        SM( "\s+- Local part of the GTH pseudopotential:\s+(?P<x_cp2k_local_part_of_gth_pseudopotential>\d+)"),
                        SM( "\s+- Non-local part of the GTH pseudopotential:\s+(?P<x_cp2k_non_local_part_of_gth_pseudopotential>\d+)"),
                    ]
                ),
272
                SM( " MODULE QUICKSTEP:  ATOMIC COORDINATES IN angstrom",
273
274
275
276
277
278
279
                    forwardMatch=True,
                    subMatchers=[
                        SM( " MODULE QUICKSTEP:  ATOMIC COORDINATES IN angstrom",
                            adHoc=self.adHoc_x_cp2k_section_quickstep_atom_information(),
                            otherMetaInfo=["atom_labels", "atom_positions"]
                        )
                    ]
280
281
282
283
                ),
                SM( " SCF PARAMETERS",
                    forwardMatch=True,
                    subMatchers=[
284
285
286
287
                        SM( " SCF PARAMETERS         Density guess:\s+{}".format(self.regexs.regex_eol)),
                        SM( "                        max_scf:\s+(?P<scf_max_iteration>{})".format(self.regexs.regex_i)),
                        SM( "                        max_scf_history:\s+{}".format(self.regexs.regex_i)),
                        SM( "                        max_diis:\s+{}".format(self.regexs.regex_i)),
288
                        SM( "                        eps_scf:\s+(?P<scf_threshold_energy_change__hartree>{})".format(self.regexs.regex_f)),
289
                    ]
290
291
292
293
294
295
296
                ),
                SM( " MP2\|",
                    adHoc=self.adHoc_mp2()
                ),
                SM( " RI-RPA\|",
                    adHoc=self.adHoc_rpa()
                ),
297
298
299
            ]
        )

300
    #===========================================================================
301
302
303
    # onClose triggers
    def onClose_x_cp2k_section_total_numbers(self, backend, gIndex, section):
        """Keep track of how many SCF iteration are made."""
304
305
306
        number_of_atoms = section.get_latest_value("x_cp2k_atoms")
        if number_of_atoms is not None:
            self.cache_service["number_of_atoms"] = number_of_atoms
307

308
309
310
311
312
313
    # def onClose_x_cp2k_section_quickstep_calculation(self, backend, gIndex, section):
        # print "quickstep CLOSED"

    # def onClose_x_cp2k_section_geometry_optimization_step(self, backend, gIndex, section):
        # print "Optimisation step CLOSED"

314
315
316
317
318
    def onClose_section_method(self, backend, gIndex, section):
        """When all the functional definitions have been gathered, matches them
        with the nomad correspondents and combines into one single string which
        is put into the backend.
        """
319
320
        self.section_method_index = gIndex

321
322
        # Transform the CP2K self-interaction correction string to the NOMAD
        # correspondent, and push directly to the superBackend to avoid caching
323
        try:
324
            sic_cp2k = section.get_latest_value("self_interaction_correction_method")
325
326
327
328
329
330
331
332
333
334
335
336
337
338
            sic_map = {
                "NO": "",
                "AD SIC": "SIC_AD",
                "Explicit Orbital SIC": "SIC_EXPLICIT_ORBITALS",
                "SPZ/MAURI SIC": "SIC_MAURI_SPZ",
                "US/MAURI SIC": "SIC_MAURI_US",
            }
            sic_nomad = sic_map.get(sic_cp2k)
            if sic_nomad is not None:
                backend.superBackend.addValue('self_interaction_correction_method', sic_nomad)
            else:
                logger.warning("Unknown self-interaction correction method used.")
        except:
            pass
339

340
341
342
    def onClose_section_run(self, backend, gIndex, section):
        backend.addValue("program_name", "CP2K")

343
344
    def onClose_x_cp2k_section_quickstep_settings(self, backend, gIndex, section):
        backend.addValue("program_basis_set_type", "gaussian")
345
346
        backend.addValue("electronic_structure_method", self.test_electronic_structure_method)

Lauri Himanen's avatar
Lauri Himanen committed
347
348
349
350
351
352
353
354
        # See if the cutoff is available
        cutoff = section.get_latest_value("x_cp2k_planewave_cutoff")
        if cutoff is not None:
            gid = backend.openSection("section_basis_set_cell_dependent")
            cutoff = convert_unit(2*cutoff, "hartree")
            backend.addValue("basis_set_planewave_cutoff", cutoff)
            backend.closeSection("section_basis_set_cell_dependent", gid)

355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
    def onClose_section_method_basis_set(self, backend, gIndex, section):
        backend.addValue("method_basis_set_kind", "wavefunction")
        backend.addValue("number_of_basis_sets_atom_centered", len(self.basis_to_kind_mapping))
        backend.addArrayValues("mapping_section_method_basis_set_atom_centered", np.array(self.basis_to_kind_mapping))

    def onClose_x_cp2k_section_atomic_kind(self, backend, gIndex, section):
        kindID = backend.openSection("section_method_atom_kind")
        basisID = backend.openSection("section_basis_set_atom_centered")

        element_symbol = section.get_latest_value("x_cp2k_kind_element_symbol")
        kind_number = section.get_latest_value("x_cp2k_kind_number")
        basis_set_name = section.get_latest_value(["x_cp2k_section_kind_basis_set", "x_cp2k_kind_basis_set_name"])
        atom_number = self.get_atomic_number(element_symbol)
        kind_label = element_symbol + str(kind_number)
        backend.addValue("method_atom_kind_atom_number", atom_number)
        backend.addValue("method_atom_kind_label", kind_label)
        backend.addValue("basis_set_atom_number", atom_number)
        backend.addValue("basis_set_atom_centered_short_name", basis_set_name)

        # Add the reference based mapping between basis and atomic kind
        self.basis_to_kind_mapping.append([basisID, kindID])

        backend.closeSection("section_basis_set_atom_centered", basisID)
        backend.closeSection("section_method_atom_kind", kindID)
379

380
    def onClose_x_cp2k_section_program_information(self, backend, gIndex, section):
381
        input_file = section.get_latest_value("x_cp2k_input_filename")
382
383
384
385
386
        self.file_service.set_file_id(input_file, "input")

    def onClose_x_cp2k_section_global_settings(self, backend, gIndex, section):
        # If the input file is available, parse it
        filepath = self.file_service.get_file_by_id("input")
387
388
389
390
391
392
        if filepath is not None:
            input_parser = CP2KInputParser(filepath, self.parser_context)
            input_parser.parse()
        else:
            logger.warning("The input file of the calculation could not be found.")

393
394
395
396
397
398
    def onClose_section_system(self, backend, gIndex, section):
        """Stores the index of the section method. Should always be 0, but
        let's get it dynamically just in case there's something wrong.
        """
        self.section_system_index = gIndex
        self.cache_service.push_value("number_of_atoms")
399
        # self.cache_service.push_array_values("simulation_cell", unit="angstrom")
400
401
402
403
404
405
406
407
        self.cache_service.push_array_values("configuration_periodic_dimensions")
        self.cache_service.push_array_values("atom_labels")

    def onClose_section_single_configuration_calculation(self, backend, gIndex, section):
        # Write the references to section_method and section_system
        backend.addValue('single_configuration_to_calculation_method_ref', self.section_method_index)
        backend.addValue('single_configuration_calculation_to_system_ref', self.section_system_index)

408
    #===========================================================================
409
410
    # adHoc functions
    def adHoc_x_cp2k_section_cell(self):
411
412
413
414
415
416
417
418
419
        """Used to extract the cell information.
        """
        def wrapper(parser):
            # Read the lines containing the cell vectors
            a_line = parser.fIn.readline()
            b_line = parser.fIn.readline()
            c_line = parser.fIn.readline()

            # Define the regex that extracts the components and apply it to the lines
420
            regex_string = r" CELL\| Vector \w \[angstrom\]:\s+({0})\s+({0})\s+({0})".format(self.regexs.regex_f)
421
422
423
424
425
426
427
428
429
430
431
            regex_compiled = re.compile(regex_string)
            a_result = regex_compiled.match(a_line)
            b_result = regex_compiled.match(b_line)
            c_result = regex_compiled.match(c_line)

            # Convert the string results into a 3x3 numpy array
            cell = np.zeros((3, 3))
            cell[0, :] = [float(x) for x in a_result.groups()]
            cell[1, :] = [float(x) for x in b_result.groups()]
            cell[2, :] = [float(x) for x in c_result.groups()]

432
433
            # Push the results to cache
            self.cache_service["simulation_cell"] = cell
434
435
        return wrapper

436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
    def adHoc_atom_forces(self):
        """Used to extract the final atomic forces printed at the end of a
        calculation.
        """
        def wrapper(parser):

            end_str = " SUM OF ATOMIC FORCES"
            end = False
            force_array = []

            # Loop through coordinates until the sum of forces is read
            while not end:
                line = parser.fIn.readline()
                if line.startswith(end_str):
                    end = True
                else:
                    forces = line.split()[-3:]
                    forces = [float(x) for x in forces]
                    force_array.append(forces)
            force_array = np.array(force_array)

            # If anything found, push the results to the correct section
            if len(force_array) != 0:
459
460
                # self.cache_service["atom_forces"] = force_array
                self.backend.addArrayValues("x_cp2k_atom_forces", force_array, unit="forceAu")
461
462
463
464
465
466
467
468
469
470
471
472

        return wrapper

    def adHoc_stress_tensor(self):
        """Used to extract the stress tensor printed at the end of a
        calculation.
        """
        def wrapper(parser):
            row1 = [float(x) for x in parser.fIn.readline().split()[-3:]]
            row2 = [float(x) for x in parser.fIn.readline().split()[-3:]]
            row3 = [float(x) for x in parser.fIn.readline().split()[-3:]]
            stress_array = np.array([row1, row2, row3])
473
            parser.backend.addArrayValues("x_cp2k_stress_tensor", stress_array, unit="GPa")
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493

        return wrapper

    def adHoc_stress_calculation(self):
        """Used to skip over the stress tensor calculation details.
        """
        def wrapper(parser):
            end_line = " **************************** NUMERICAL STRESS END *****************************\n"
            finished = False
            while not finished:
                line = parser.fIn.readline()
                if line == end_line:
                    finished = True
        return wrapper

    def adHoc_stress_tensor_eigenpairs(self):
        """Parses the stress tensor eigenpairs.
        """
        def wrapper(parser):
            parser.fIn.readline()
494
            eigenvalues = np.array([float(x) for x in parser.fIn.readline().split()])
495
496
497
498
            parser.fIn.readline()
            row1 = [float(x) for x in parser.fIn.readline().split()]
            row2 = [float(x) for x in parser.fIn.readline().split()]
            row3 = [float(x) for x in parser.fIn.readline().split()]
499
            eigenvectors = np.array([row1, row2, row3])
500
501
502
503
504
505
506
507
            parser.backend.addArrayValues("x_cp2k_stress_tensor_eigenvalues", eigenvalues, unit="GPa")
            parser.backend.addArrayValues("x_cp2k_stress_tensor_eigenvectors", eigenvectors)
        return wrapper

    def adHoc_single_point_converged(self):
        """Called when the SCF cycle of a single point calculation has converged.
        """
        def wrapper(parser):
508
            parser.backend.addValue("x_cp2k_quickstep_converged", True)
509
510
511
512
513
514
        return wrapper

    def adHoc_single_point_not_converged(self):
        """Called when the SCF cycle of a single point calculation did not converge.
        """
        def wrapper(parser):
515
            parser.backend.addValue("x_cp2k_quickstep_converged", False)
516
517
518
519
520
521
522
523
524
        return wrapper

    def adHoc_x_cp2k_section_quickstep_atom_information(self):
        """Used to extract the initial atomic coordinates and names in the
        Quickstep module.
        """
        def wrapper(parser):

            # Define the regex that extracts the information
525
            regex_string = r"\s+\d+\s+(\d+)\s+(\w+)\s+\d+\s+({0})\s+({0})\s+({0})".format(self.regexs.regex_f)
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
            regex_compiled = re.compile(regex_string)

            match = True
            coordinates = []
            labels = []

            # Currently these three lines are not processed
            parser.fIn.readline()
            parser.fIn.readline()
            parser.fIn.readline()

            while match:
                line = parser.fIn.readline()
                result = regex_compiled.match(line)

                if result:
                    match = True
543
                    label = result.groups()[1] + result.groups()[0]
544
                    labels.append(label)
545
                    coordinate = [float(x) for x in result.groups()[2:]]
546
547
548
549
550
551
552
553
554
555
556
557
558
                    coordinates.append(coordinate)
                else:
                    match = False
            coordinates = np.array(coordinates)
            labels = np.array(labels)

            # If anything found, push the results to the correct section
            if len(coordinates) != 0:
                self.cache_service["atom_positions"] = coordinates
                self.cache_service["atom_labels"] = labels

        return wrapper

559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
    def adHoc_run_dir(self):
        def wrapper(parser):
            end_str = "\n"
            end = False
            path_array = []

            # Loop through coordinates until the sum of forces is read
            while not end:
                line = parser.fIn.readline()
                if line.startswith(end_str):
                    end = True
                else:
                    path_part = line.split()[-1]
                    path_array.append(path_part)

            # Form the final path and push to backend
            path = "".join(path_array)
            parser.backend.addValue("x_cp2k_start_path", path)

        return wrapper

580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
    def adHoc_dft_plus_u(self):
        def wrapper(parser):
            self.test_electronic_structure_method = "DFT+U"
        return wrapper

    def adHoc_mp2(self):
        def wrapper(parser):
            self.test_electronic_structure_method = "MP2"
        return wrapper

    def adHoc_rpa(self):
        def wrapper(parser):
            self.test_electronic_structure_method = "RPA"
        return wrapper

595
596
597
598
    # def debug(self):
        # def wrapper(parser):
            # print("FOUND")
        # return wrapper
599

600
    #===========================================================================
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
    # MISC functions
    def get_atomic_number(self, symbol):
        """ Returns the atomic number when given the atomic symbol.

        Args:
            symbol: atomic symbol as string

        Returns:
            The atomic number (number of protons) for the given symbol.
        """
        chemical_symbols = [
            'X',  'H',  'He', 'Li', 'Be',
            'B',  'C',  'N',  'O',  'F',
            'Ne', 'Na', 'Mg', 'Al', 'Si',
            'P',  'S',  'Cl', 'Ar', 'K',
            'Ca', 'Sc', 'Ti', 'V',  'Cr',
            'Mn', 'Fe', 'Co', 'Ni', 'Cu',
            'Zn', 'Ga', 'Ge', 'As', 'Se',
            'Br', 'Kr', 'Rb', 'Sr', 'Y',
            'Zr', 'Nb', 'Mo', 'Tc', 'Ru',
            'Rh', 'Pd', 'Ag', 'Cd', 'In',
            'Sn', 'Sb', 'Te', 'I',  'Xe',
            'Cs', 'Ba', 'La', 'Ce', 'Pr',
            'Nd', 'Pm', 'Sm', 'Eu', 'Gd',
            'Tb', 'Dy', 'Ho', 'Er', 'Tm',
            'Yb', 'Lu', 'Hf', 'Ta', 'W',
            'Re', 'Os', 'Ir', 'Pt', 'Au',
            'Hg', 'Tl', 'Pb', 'Bi', 'Po',
            'At', 'Rn', 'Fr', 'Ra', 'Ac',
            'Th', 'Pa', 'U',  'Np', 'Pu',
            'Am', 'Cm', 'Bk', 'Cf', 'Es',
            'Fm', 'Md', 'No', 'Lr'
        ]

        atomic_numbers = {}
        for Z, name in enumerate(chemical_symbols):
            atomic_numbers[name] = Z

        return atomic_numbers[symbol]