method.py 16.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List
from abc import abstractmethod
from collections import OrderedDict
import numpy as np
19
from pint import UnitRegistry
20
21
22
23
24

from nomad.metainfo.encyclopedia import (
    Material,
    Method,
)
25
from nomad.metainfo import Section
26
27
28
29
30
from nomad.normalizing.encyclopedia.basisset import get_basis_set
from nomad.normalizing.encyclopedia.context import Context
from nomad.utils import RestrictedDict
from nomad import config

31
ureg = UnitRegistry()
32
33
34
35
36
37
38
39
40


class MethodNormalizer():
    """A base class that is used for processing method related information
    in the Encylopedia.
    """
    def __init__(self, backend, logger):
        self.backend = backend
        self.logger = logger
41
        self.section_run = backend.entry_archive.section_run[0]
42
43
44
45
46
47
48
49
50

    def method_hash(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section):
        method_dict = RestrictedDict(
            mandatory_keys=[
                "program_name",
                "subsettings",
            ],
            forbidden_values=[None]
        )
51
        method_dict['program_name'] = self.section_run.program_name
52
53
54
55
56
57
58
59
60
61

        # The subclasses may define their own method properties that are to be
        # included here.
        subsettings = self.method_hash_dict(method, settings_basis_set, repr_method)
        method_dict["subsettings"] = subsettings

        # If all required information is present, safe the hash
        try:
            method_dict.check(recursive=True)
        except (KeyError, ValueError) as e:
62
            self.logger.info("Could not create method hash: {}".format(e))
63
64
65
66
67
68
69
70
71
        else:
            method.method_hash = method_dict.hash()

    @abstractmethod
    def method_hash_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict:
        pass

    def group_eos_hash(self, method: Method, material: Material, repr_method: Section):
        eos_dict = RestrictedDict(
72
            mandatory_keys=[
73
74
75
                "upload_id",
                "method_hash",
                "formula",
76
            ],
77
78
79
80
            forbidden_values=[None]
        )

        # Only calculations from the same upload are grouped
81
        eos_dict['upload_id'] = self.backend.entry_archive.section_metadata.upload_id
82
83
84
85
86
87
88
89
90
91
92

        # Method
        eos_dict["method_hash"] = method.method_hash

        # The formula should be same for EoS (maybe even symmetries)
        eos_dict["formula"] = material.formula

        # Form a hash from the dictionary
        try:
            eos_dict.check(recursive=True)
        except (KeyError, ValueError) as e:
93
            self.logger.info("Could not create EOS hash: {}".format(e))
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
        else:
            method.group_eos_hash = eos_dict.hash()

    def group_parametervariation_hash(self, method: Method, settings_basis_set: RestrictedDict, repr_system: Section, repr_method: Section):
        # Create ordered dictionary with the values. Order is important for
        param_dict = RestrictedDict(
            mandatory_keys=[
                "upload_id",
                "program_name",
                "program_version",
                "settings_geometry",
                "subsettings",
            ],
            forbidden_values=[None]
        )

        # Only calculations from the same upload are grouped
111
        param_dict['upload_id'] = self.backend.entry_archive.section_metadata.upload_id
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126

        # The same code and functional type is required
        param_dict['program_name'] = self.backend["program_name"]
        param_dict['program_version'] = self.backend["program_version"]

        # Get a string representation of the geometry. It is included as the
        # geometry should remain the same during parameter variation. By simply
        # using the atom labels and positions we assume that their
        # order/translation/rotation does not change.
        geom_dict: OrderedDict = OrderedDict()
        sec_sys = repr_system
        atom_labels = sec_sys['atom_labels']
        geom_dict['atom_labels'] = ', '.join(atom_labels)
        atom_positions = sec_sys['atom_positions']
        geom_dict['atom_positions'] = np.array2string(
127
            atom_positions.to(ureg.angstrom).magnitude,  # convert to Angstrom
128
129
130
131
            formatter={'float_kind': lambda x: "%.6f" % x},
        ).replace('\n', '')
        cell = sec_sys['lattice_vectors']
        geom_dict['simulation_cell'] = np.array2string(
132
            cell.to(ureg.angstrom).magnitude,  # convert to Angstrom
133
134
135
136
137
138
139
140
141
142
143
144
145
            formatter={'float_kind': lambda x: "%.6f" % x},
        ).replace('\n', '')
        param_dict['settings_geometry'] = geom_dict

        # The subclasses may define their own method properties that are to be
        # included here.
        subsettings = self.group_parametervariation_hash_dict(method, settings_basis_set, repr_method)
        param_dict["subsettings"] = subsettings

        # Form a hash from the dictionary
        try:
            param_dict.check(recursive=True)
        except (KeyError, ValueError) as e:
146
            self.logger.info("Could not create parameter variation hash: {}".format(e))
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
        else:
            method.group_parametervariation_hash = param_dict.hash()

    @abstractmethod
    def group_parametervariation_hash_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict:
        pass

    def group_e_min(self) -> None:
        pass

    def group_type(self) -> None:
        pass

    @abstractmethod
    def normalize(self, context: Context) -> None:
        pass


class MethodDFTNormalizer(MethodNormalizer):
    """A base class that is used for processing method related information
    in the Encylopedia.
    """
    def core_electron_treatment(self, method: Method) -> None:
        treatment = config.services.unavailable_value
        code_name = self.backend["program_name"]
        if code_name is not None:
            core_electron_treatments = {
                'VASP': 'pseudopotential',
                'FHI-aims': 'full all electron',
                'exciting': 'full all electron',
                'quantum espresso': 'pseudopotential'
            }
            treatment = core_electron_treatments.get(code_name, config.services.unavailable_value)
        method.core_electron_treatment = treatment

    def functional_long_name(self, method: Method, repr_method: Section) -> None:
        """'Long' form of exchange-correlation functional, list of components
        and parameters as a string: see
        https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional
        """
187
        xc_functional = MethodDFTNormalizer.functional_long_name_from_method(repr_method, self.section_run.section_method)
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
        if xc_functional is config.services.unavailable_value:
            self.logger.warning(
                "Metainfo for 'XC_functional' not found, and could not "
                "compose name from 'section_XC_functionals'."
            )
        method.functional_long_name = xc_functional

    @staticmethod
    def functional_long_name_from_method(repr_method: Section, methods: List[Section]):
        """'Long' form of exchange-correlation functional, list of components
        and parameters as a string: see
        https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional
        """
        linked_methods = [repr_method]
        try:
203
            refs = repr_method.section_method_to_method_refs
204
205
206
207
        except KeyError:
            pass
        else:
            for ref in refs:
208
209
                method_to_method_kind = ref.method_to_method_kind
                referenced_method = ref.method_to_method_ref
210
                if method_to_method_kind == "core_settings":
211
                    linked_methods.append(referenced_method)
212
213
214
215

        xc_functional = config.services.unavailable_value
        for method in linked_methods:
            try:
216
                section_xc_functionals = method.section_XC_functionals
217
218
219
220
221
222
            except KeyError:
                pass
            else:
                components = {}
                for component in section_xc_functionals:
                    try:
223
                        cname = component.XC_functional_name
224
225
226
227
                    except KeyError:
                        pass
                    else:
                        this_component = ''
Lauri Himanen's avatar
Lauri Himanen committed
228
                        if component.XC_functional_weight is not None:
229
                            this_component = str(component.XC_functional_weight) + '*'
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
                        this_component += cname
                        components[cname] = this_component
                result_array = []
                for name in sorted(components):
                    result_array.append(components[name])
                if len(result_array) >= 1:
                    xc_functional = '+'.join(result_array)

        return xc_functional

    def functional_type(self, method: Method) -> None:
        long_name = method.functional_long_name
        if long_name is not None:
            short_name = self.create_xc_functional_shortname(long_name)
            method.functional_type = short_name

    def method_hash_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict:
        # Extend by DFT settings.
        hash_dict = RestrictedDict(
            mandatory_keys=(
                "functional_long_name",
                "settings_basis_set",
                "scf_threshold_energy_change",
            ),
            optional_keys=(
                "smearing_kind",
                "smearing_width",
                "number_of_eigenvalues_kpoints",
            ),
            forbidden_values=[None]
        )
        # Functional settings
        hash_dict['functional_long_name'] = method.functional_long_name

        # Basis set settings
        hash_dict['settings_basis_set'] = settings_basis_set

        # k-point sampling settings if present. Add number of kpoints as
        # detected from eigenvalues. TODO: we would like to have info on the
        # _reducible_ k-point-mesh:
        #    - grid dimensions (e.g. [ 4, 4, 8 ])
        #    - or list of reducible k-points
272
        smearing_kind = repr_method.smearing_kind
273
274
        if smearing_kind is not None:
            hash_dict['smearing_kind'] = smearing_kind
275
        smearing_width = repr_method.smearing_width
276
        if smearing_width is not None:
277
            smearing_width = '%.4f' % (smearing_width)
278
279
            hash_dict['smearing_width'] = smearing_width
        try:
280
281
282
            scc = self.section_run.section_single_configuration_calculation[-1]
            eigenvalues = scc.section_eigenvalues
            kpt = eigenvalues[-1].eigenvalues_kpoints
283
284
285
        except (KeyError, IndexError):
            pass
        else:
286
287
            if kpt is not None:
                hash_dict['number_of_eigenvalues_kpoints'] = str(len(kpt))
288
289

        # SCF convergence settings
290
        conv_thr = repr_method.scf_threshold_energy_change
291
        if conv_thr is not None:
292
            conv_thr = '%.13f' % (conv_thr.to(ureg.rydberg).magnitude)
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
            hash_dict['scf_threshold_energy_change'] = conv_thr

        return hash_dict

    def group_parametervariation_hash_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section):
        """Dictionary containing the parameters used for convergence test
        grouping
        This is the source for generating the related hash."""
        param_dict = RestrictedDict(
            mandatory_keys=(
                "functional_long_name",
                "scf_threshold_energy_change",
            ),
            optional_keys=(
                "atoms_pseudopotentials",
            ),
            forbidden_values=[None]
        )

        # TODO: Add other DFT-specific properties
        # considered variations:
        #   - smearing kind/width
        #   - k point grids
        #   - basis set parameters
        # convergence threshold should be kept constant during convtest
        param_dict['functional_long_name'] = method.functional_long_name
319
        conv_thr = repr_method.scf_threshold_energy_change
320
        if conv_thr is not None:
321
            conv_thr = '%.13f' % (conv_thr.to(ureg.rydberg).magnitude)
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
        param_dict['scf_threshold_energy_change'] = conv_thr

        # Pseudopotentials are kept constant, if applicable
        if settings_basis_set is not None:
            pseudos = settings_basis_set.get('atoms_pseudopotentials', None)
            if pseudos is not None:
                param_dict['atoms_pseudopotentials'] = pseudos

        return param_dict

    def create_xc_functional_shortname(self, xc_longname):
        """Use lookup table to transform xc functional long- into shortname.
        """
        # Loof for "special" functional names listed in table
        """Easily editable table of 'short' XC functional names"""
        xc_functional_shortname = {
            'HF_X': 'HF',
            'HYB_GGA_XC_B3LYP5': 'hybrid-GGA',
            'HYB_GGA_XC_HSE06': 'hybrid-GGA',
            'BEEF-vdW': 'vdW-DF'
        }
        shortname = xc_functional_shortname.get(xc_longname, None)

        # If not, look into other options:
        if shortname is None:
            xc_functional_starts = {
                "LDA": "LDA",
                "GGA": "GGA",
                "HYB_GGA": "hybrid-GGA",
                "MGGA": "meta-GGA",
                "HYB_MGGA": "hybrid-meta-GGA",
                "HF": "HF"
            }
            sections = xc_longname.split("+")
            # decompose long name, this could be done more consistent with the
            # composition of the long name
            funcnames = []
            for section in sections:
                funcname = section.split('*')[-1]
                for func_start in xc_functional_starts:
                    if funcname.startswith(func_start):
                        funcnames.append(func_start)
                        break
            funcnames = set(funcnames)

            # Only one functional is defined
            # (usually for correlation and exchange)
            if len(funcnames) == 1:
                shortname = xc_functional_starts[func_start]
            # Two functionals that give a hybrid-GGA functional
            elif "GGA" in funcnames and "HF" in funcnames:
                shortname = "hybrid-GGA"

        if shortname is None:
            self.logger.info(
                "Could not find a functional shortname for xc_functional {}."
                .format(xc_longname)
            )

        return shortname

    def normalize(self, context: Context) -> None:
        # Fetch resources
        repr_method = context.representative_method
        repr_system = context.representative_system
387
        sec_enc = self.backend.entry_archive.section_encyclopedia
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
        method = sec_enc.method
        material = sec_enc.material
        settings_basis_set = get_basis_set(context, self.backend, self.logger)

        # Fill metainfo
        self.core_electron_treatment(method)
        self.functional_long_name(method, repr_method)
        self.functional_type(method)
        self.method_hash(method, settings_basis_set, repr_method)
        self.group_eos_hash(method, material, repr_method)
        self.group_parametervariation_hash(method, settings_basis_set, repr_system, repr_method)


class MethodGWNormalizer(MethodDFTNormalizer):
    """A base class that is used for processing GW calculations.
    """
    def gw_starting_point(self, method: Method, repr_method: Section) -> None:
        try:
406
407
408
            ref = repr_method.section_method_to_method_refs[0]
            method_to_method_kind = ref.method_to_method_kind
            start_method = ref.method_to_method_ref
409
410
411
412
        except KeyError:
            pass
        else:
            if method_to_method_kind == "starting_point":
413
                methods = self.section_run.section_method
414
415
416
417
418
419
420
421
422
423
424
425
                xc_functional = MethodDFTNormalizer.functional_long_name_from_method(start_method, methods)
                method.gw_starting_point = xc_functional

    def functional_type(self, method: Method) -> None:
        method.functional_type = "GW"

    def gw_type(self, method: Method, repr_method: Section) -> None:
        method.gw_type = repr_method["electronic_structure_method"]

    def normalize(self, context: Context) -> None:
        # Fetch resources
        repr_method = context.representative_method
426
        sec_enc = self.backend.entry_archive.section_encyclopedia
427
428
429
430
431
432
        method = sec_enc.method

        # Fill metainfo
        self.functional_type(method)
        self.gw_type(method, context.representative_method)
        self.gw_starting_point(method, repr_method)