dft.py 11.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
DFT specific metadata
"""

from typing import List
import re
21
from elasticsearch_dsl import Integer, Object, InnerDoc, Keyword
22

23
24
from nomadcore.local_backend import ParserEvent

25
from nomad import utils, config
26
27
from nomad.metainfo import optimade, MSection, Section, Quantity, MEnum
from nomad.metainfo.elastic import elastic_mapping, elastic_obj
28

29
from .base import CalcWithMetadata, DomainQuantity, Domain, get_optional_backend_value
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52


xc_treatments = {
    'gga': 'GGA',
    'hf_': 'HF',
    'oep': 'OEP',
    'hyb': 'hybrid',
    'mgg': 'meta-GGA',
    'vdw': 'vdW',
    'lda': 'LDA',
}
""" https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional """

basis_sets = {
    'gaussians': 'gaussians',
    'realspacegrid': 'real-space grid',
    'planewaves': 'plane waves'
}

version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)')


def map_functional_name_to_xc_treatment(name):
Markus Scheidgen's avatar
Markus Scheidgen committed
53
    if name == config.services.unavailable_value:
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
        return name

    return xc_treatments.get(name[:3].lower(), name)


def map_basis_set_to_basis_set_label(name):
    key = name.replace('_', '').replace('-', '').replace(' ', '').lower()
    return basis_sets.get(key, name)


def simplify_version(version):
    match = version_re.search(version)
    if match is None:
        return version
    else:
        return match.group(0)


72
73
74
75
76
77
78
79
80
81
82
83
class Label(MSection):
    """
    Label that further classify a structure.

    Attributes:
        label: The label as a string
        type: The type of the label
        source: The source that this label was taken from.
    """

    m_def = Section(a_elastic=dict(type=InnerDoc))

84
85
86
87
88
89
90
91
92
    label = Quantity(type=str, a_elastic=dict(type=Keyword))

    type = Quantity(type=MEnum(
        'compound_class', 'classification', 'prototype', 'prototype_id'),
        a_elastic=dict(type=Keyword))

    source = Quantity(
        type=MEnum('springer', 'aflow_prototype_library'),
        a_elastic=dict(type=Keyword))
93
94
95
96
97


ESLabel = elastic_mapping(Label.m_def, InnerDoc)


98
99
100
101
102
103
104
105
106
107
108
109
110
class DFTCalcWithMetadata(CalcWithMetadata):

    def __init__(self, **kwargs):
        self.basis_set: str = None
        self.xc_functional: str = None
        self.system: str = None
        self.crystal_system: str = None
        self.spacegroup: str = None
        self.spacegroup_symbol: str = None
        self.code_name: str = None
        self.code_version: str = None

        self.n_geometries = 0
111
        self.n_calculations = 0
112
        self.n_total_energies = 0
113
        self.n_quantities = 0
114
115
116
117
        self.quantities = []
        self.geometries = []
        self.group_hash: str = None

118
        self.labels: List[Label] = []
119
        self.optimade: optimade.OptimadeEntry = None
120

121
122
        super().__init__(**kwargs)

123
124
125
    def update(self, **kwargs):
        super().update(**kwargs)

126
127
128
        if len(self.labels) > 0:
            self.labels = [Label.m_from_dict(label) for label in self.labels]

129
        if self.optimade is not None and isinstance(self.optimade, dict):
130
            self.optimade = optimade.OptimadeEntry.m_from_dict(self.optimade)
131

132
133
134
135
136
137
138
139
140
141
142
    def __getitem__(self, key):
        value = super().__getitem__(key)

        if key == 'labels':
            return [item.m_to_dict() for item in value]

        if key == 'optimade':
            return value.m_to_dict()

        return value

143
    def apply_domain_metadata(self, backend):
144
145
        from nomad.normalizing.system import normalized_atom_labels

146
147
148
        logger = utils.get_logger(__name__).bind(
            upload_id=self.upload_id, calc_id=self.calc_id, mainfile=self.mainfile)

149
        # code and code specific ids
150
        self.code_name = backend.get_value('program_name', 0)
151
152
153
154
        try:
            self.code_version = simplify_version(backend.get_value('program_version', 0))
        except KeyError:
            self.code_version = config.services.unavailable_value
155

Markus Scheidgen's avatar
Markus Scheidgen committed
156
157
        self.raw_id = get_optional_backend_value(backend, 'raw_id', 'section_run', 0)

158
        # metadata (system, method, chemistry)
159
        self.atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
160
161
162
        if hasattr(self.atoms, 'tolist'):
            self.atoms = self.atoms.tolist()
        self.n_atoms = len(self.atoms)
163
        self.atoms = list(set(normalized_atom_labels(set(self.atoms))))
164
165
        self.atoms.sort()

166
167
168
169
170
171
        self.crystal_system = get_optional_backend_value(
            backend, 'crystal_system', 'section_symmetry', logger=logger)
        self.spacegroup = get_optional_backend_value(
            backend, 'space_group_number', 'section_symmetry', 0, logger=logger)
        self.spacegroup_symbol = get_optional_backend_value(
            backend, 'international_short_symbol', 'section_symmetry', 0, logger=logger)
172
        self.basis_set = map_basis_set_to_basis_set_label(
173
174
175
176
177
            get_optional_backend_value(backend, 'program_basis_set_type', 'section_run', logger=logger))
        self.system = get_optional_backend_value(
            backend, 'system_type', 'section_system', logger=logger)
        self.formula = get_optional_backend_value(
            backend, 'chemical_composition_bulk_reduced', 'section_system', logger=logger)
178
        self.xc_functional = map_functional_name_to_xc_treatment(
179
            get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
180

181
        # grouping
182
183
184
185
186
187
188
189
190
191
192
193
194
        self.group_hash = utils.hash(
            self.formula,
            self.spacegroup,
            self.basis_set,
            self.xc_functional,
            self.code_name,
            self.code_version,
            self.with_embargo,
            self.comment,
            self.references,
            self.uploader,
            self.coauthors)

195
        # metrics and quantities
196
197
        quantities = set()
        geometries = set()
198
199
        n_quantities = 0
        n_calculations = 0
200
201
202
        n_total_energies = 0
        n_geometries = 0

203
        for meta_info, event, value in backend.traverse():
204
            quantities.add(meta_info)
205
206
207
208
209
210
211
212
213
214
215

            if event == ParserEvent.add_value or event == ParserEvent.add_array_value:
                n_quantities += 1

                if meta_info == 'energy_total':
                    n_total_energies += 1

                if meta_info == 'configuration_raw_gid':
                    geometries.add(value)

            elif event == ParserEvent.open_section:
216
                if meta_info == 'section_single_configuration_calculation':
217
                    n_calculations += 1
218

219
220
                if meta_info == 'section_system':
                    n_geometries += 1
221
222
223

        self.quantities = list(quantities)
        self.geometries = list(geometries)
224
225
        self.n_quantities = n_quantities
        self.n_calculations = n_calculations
226
227
228
        self.n_total_energies = n_total_energies
        self.n_geometries = n_geometries

229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
        # labels
        compounds = set()
        classifications = set()
        for index in backend.get_sections('section_springer_material'):
            compounds.update(backend.get_value('springer_compound_class', index))
            classifications.update(backend.get_value('springer_classification', index))

        for compound in compounds:
            self.labels.append(Label(label=compound, type='compound_class', source='springer'))
        for classification in classifications:
            self.labels.append(Label(label=classification, type='classification', source='springer'))

        aflow_id = get_optional_backend_value(backend, 'prototype_aflow_id', 'section_prototype')
        aflow_label = get_optional_backend_value(backend, 'prototype_label', 'section_prototype')

        if aflow_id is not None and aflow_label is not None:
            self.labels.append(Label(label=aflow_label, type='prototype', source='aflow_prototype_library'))
            self.labels.append(Label(label=aflow_id, type='prototype_id', source='aflow_prototype_library'))

        # optimade
249
        self.optimade = backend.get_mi2_section(optimade.OptimadeEntry.m_def)
250

251

252
253
254
255
256
257
258
def _elastic_label_value(label):
    if isinstance(label, str):
        return label
    else:
        return elastic_obj(label, ESLabel)


259
Domain(
260
    'dft', DFTCalcWithMetadata,
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
    quantities=dict(
        basis_set=DomainQuantity(
            'The used basis set functions.', aggregations=20),
        xc_functional=DomainQuantity(
            'The xc functional type used for the simulation.', aggregations=20),
        system=DomainQuantity(
            'The system type of the simulated system.', aggregations=10),
        crystal_system=DomainQuantity(
            'The crystal system type of the simulated system.', aggregations=10),
        code_name=DomainQuantity(
            'The code name.', aggregations=40),
        spacegroup=DomainQuantity('The spacegroup of the simulated system as number'),
        spacegroup_symbol=DomainQuantity('The spacegroup as international short symbol'),
        geometries=DomainQuantity(
            'Hashes that describe unique geometries simulated by this code run.', multi=True),
Markus Scheidgen's avatar
Markus Scheidgen committed
276
277
        group_hash=DomainQuantity(
            'A hash from key metadata used to group similar entries.'),
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
        quantities=DomainQuantity(
            'All quantities that are used by this calculation',
            metric=('quantities', 'value_count'), multi=True),
        n_total_energies=DomainQuantity(
            'Number of total energy calculations',
            elastic_mapping=Integer()),
        n_calculations=DomainQuantity(
            'Number of single configuration calculation sections',
            elastic_mapping=Integer()),
        n_quantities=DomainQuantity(
            'Number of overall parsed quantities',
            elastic_mapping=Integer()),
        n_geometries=DomainQuantity(
            'Number of unique geometries',
            elastic_mapping=Integer()),
293
294
295
296
        labels=DomainQuantity(
            'Search based for springer classification and aflow prototypes',
            elastic_field='labels.label',
            elastic_mapping=Object(ESLabel),
297
            elastic_value=lambda labels: [_elastic_label_value(label) for label in labels],
298
            multi=True),
299
        optimade=DomainQuantity(
300
            'Search based on optimade\'s filter query language',
301
            elastic_mapping=Object(optimade.ESOptimadeEntry),
302
            elastic_value=lambda entry: elastic_obj(entry, optimade.ESOptimadeEntry)
303
        )),
304
305
306
307
308
    metrics=dict(
        total_energies=('n_total_energies', 'sum'),
        calculations=('n_calculations', 'sum'),
        quantities=('n_quantities', 'sum'),
        geometries=('n_geometries', 'sum'),
Markus Scheidgen's avatar
Markus Scheidgen committed
309
310
        unique_geometries=('geometries', 'cardinality'),
        groups=('group_hash', 'cardinality')
311
    ),
Markus Scheidgen's avatar
Markus Scheidgen committed
312
313
    groups=dict(
        groups=('group_hash', 'groups')),
314
    default_statistics=[
315
        'atoms', 'dft.basis_set', 'dft.xc_functional', 'dft.system', 'dft.crystal_system', 'dft.code_name'])