dft.py 8.21 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
DFT specific metadata
"""

from typing import List
import re
from elasticsearch_dsl import Integer
22
import ase.data
23

24
25
from nomadcore.local_backend import ParserEvent

26
27
from nomad import utils, config

28
from .base import CalcWithMetadata, DomainQuantity, Domain, get_optional_backend_value
29
30


31
32
33
34
35
calculations_sections = [
    'section_single_configuration_calculation',
    'section_k_band',
    'section_eigenvalues']

36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
xc_treatments = {
    'gga': 'GGA',
    'hf_': 'HF',
    'oep': 'OEP',
    'hyb': 'hybrid',
    'mgg': 'meta-GGA',
    'vdw': 'vdW',
    'lda': 'LDA',
}
""" https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional """

basis_sets = {
    'gaussians': 'gaussians',
    'realspacegrid': 'real-space grid',
    'planewaves': 'plane waves'
}

version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)')


def map_functional_name_to_xc_treatment(name):
Markus Scheidgen's avatar
Markus Scheidgen committed
57
    if name == config.services.unavailable_value:
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
        return name

    return xc_treatments.get(name[:3].lower(), name)


def map_basis_set_to_basis_set_label(name):
    key = name.replace('_', '').replace('-', '').replace(' ', '').lower()
    return basis_sets.get(key, name)


def simplify_version(version):
    match = version_re.search(version)
    if match is None:
        return version
    else:
        return match.group(0)


class DFTCalcWithMetadata(CalcWithMetadata):

    def __init__(self, **kwargs):
        self.formula: str = None
        self.atoms: List[str] = []
        self.n_atoms: int = 0
        self.basis_set: str = None
        self.xc_functional: str = None
        self.system: str = None
        self.crystal_system: str = None
        self.spacegroup: str = None
        self.spacegroup_symbol: str = None
        self.code_name: str = None
        self.code_version: str = None

        self.n_total_energies = 0
        self.n_geometries = 0
93
94
        self.n_calculations = 0
        self.n_quantities = 0
95
96
97
98
99
100
101
        self.quantities = []
        self.geometries = []
        self.group_hash: str = None

        super().__init__(**kwargs)

    def apply_domain_metadata(self, backend):
102
103
        from nomad.normalizing.system import normalized_atom_labels

104
105
106
107
        logger = utils.get_logger(__name__).bind(
            upload_id=self.upload_id, calc_id=self.calc_id, mainfile=self.mainfile)

        self.code_name = backend.get_value('program_name', 0)
108
109
110
111
        try:
            self.code_version = simplify_version(backend.get_value('program_version', 0))
        except KeyError:
            self.code_version = config.services.unavailable_value
112

113
        self.atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
114
115
116
        if hasattr(self.atoms, 'tolist'):
            self.atoms = self.atoms.tolist()
        self.n_atoms = len(self.atoms)
117
        self.atoms = list(set(normalized_atom_labels(set(self.atoms))))
118
119
        self.atoms.sort()

120
121
122
123
124
125
        self.crystal_system = get_optional_backend_value(
            backend, 'crystal_system', 'section_symmetry', logger=logger)
        self.spacegroup = get_optional_backend_value(
            backend, 'space_group_number', 'section_symmetry', 0, logger=logger)
        self.spacegroup_symbol = get_optional_backend_value(
            backend, 'international_short_symbol', 'section_symmetry', 0, logger=logger)
126
        self.basis_set = map_basis_set_to_basis_set_label(
127
128
129
130
131
            get_optional_backend_value(backend, 'program_basis_set_type', 'section_run', logger=logger))
        self.system = get_optional_backend_value(
            backend, 'system_type', 'section_system', logger=logger)
        self.formula = get_optional_backend_value(
            backend, 'chemical_composition_bulk_reduced', 'section_system', logger=logger)
132
        self.xc_functional = map_functional_name_to_xc_treatment(
133
            get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149

        self.group_hash = utils.hash(
            self.formula,
            self.spacegroup,
            self.basis_set,
            self.xc_functional,
            self.code_name,
            self.code_version,
            self.with_embargo,
            self.comment,
            self.references,
            self.uploader,
            self.coauthors)

        quantities = set()
        geometries = set()
150
151
        n_quantities = 0
        n_calculations = 0
152
153
154
        n_total_energies = 0
        n_geometries = 0

155
        for meta_info, event, value in backend._delegate.results.traverse():
156
            quantities.add(meta_info)
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

            if event == ParserEvent.add_value or event == ParserEvent.add_array_value:
                n_quantities += 1

                if meta_info == 'energy_total':
                    n_total_energies += 1

                if meta_info == 'configuration_raw_gid':
                    geometries.add(value)

            elif event == ParserEvent.open_section:
                if meta_info in calculations_sections:
                    n_calculations += 1

                if meta_info == 'section_system':
                    n_geometries += 1
173
174
175

        self.quantities = list(quantities)
        self.geometries = list(geometries)
176
177
        self.n_quantities = n_quantities
        self.n_calculations = n_calculations
178
179
180
181
        self.n_total_energies = n_total_energies
        self.n_geometries = n_geometries


182
183
184
185
186
187
def only_atoms(atoms):
    numbers = [ase.data.atomic_numbers[atom] for atom in atoms]
    only_atoms = [ase.data.chemical_symbols[number] for number in sorted(numbers)]
    return ''.join(only_atoms)


188
Domain('DFT', DFTCalcWithMetadata, quantities=dict(
189
190
191
192
193
    formula=DomainQuantity(
        'The chemical (hill) formula of the simulated system.',
        order_default=True),
    atoms=DomainQuantity(
        'The atom labels of all atoms in the simulated system.',
194
        aggregations=len(ase.data.chemical_symbols), multi=True, zero_aggs=False),
195
196
197
198
    only_atoms=DomainQuantity(
        'The atom labels concatenated in species-number order. Used with keyword search '
        'to facilitate exclusive searches.',
        elastic_value=only_atoms, metadata_field='atoms', multi=True),
199
    basis_set=DomainQuantity(
200
        'The used basis set functions.', aggregations=20),
201
    xc_functional=DomainQuantity(
202
        'The xc functional type used for the simulation.', aggregations=20),
203
204
205
206
207
    system=DomainQuantity(
        'The system type of the simulated system.', aggregations=10),
    crystal_system=DomainQuantity(
        'The crystal system type of the simulated system.', aggregations=10),
    code_name=DomainQuantity(
208
        'The code name.', aggregations=40),
209
210
    spacegroup=DomainQuantity('The spacegroup of the simulated system as number'),
    spacegroup_symbol=DomainQuantity('The spacegroup as international short symbol'),
211
212
213
214
    geometries=DomainQuantity(
        'Hashes that describe unique geometries simulated by this code run.',
        metric=('geometries', 'cardinality')
    ),
215
216
    quantities=DomainQuantity(
        'All quantities that are used by this calculation',
217
        metric=('quantities', 'value_count'), multi=True
218
    ),
219
220
221
222
    n_total_energies=DomainQuantity(
        'Number of total energy calculations',
        metric=('total_energies', 'sum'),
        elastic_mapping=Integer()),
223
224
225
226
227
228
229
230
    n_calculations=DomainQuantity(
        'Number of calculations (single configuration, k band, and eigenvalues)',
        metric=('calculations', 'sum'),
        elastic_mapping=Integer()),
    n_quantities=DomainQuantity(
        'Number of overall parsed quantities',
        metric=('parsed_quantities', 'sum'),
        elastic_mapping=Integer()),
231
232
233
    n_geometries=DomainQuantity(
        'Number of unique geometries',
        elastic_mapping=Integer()),
234
    n_atoms=DomainQuantity('Number of atoms in the simulated system', elastic_mapping=Integer())))