dft.py 11.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
DFT specific metadata
"""

from typing import List
import re
21
from elasticsearch_dsl import Integer, Object, InnerDoc
22
import ase.data
23

24
25
from nomadcore.local_backend import ParserEvent

26
from nomad import utils, config
27
28
from nomad.metainfo import optimade, MSection, Section, Quantity, MEnum
from nomad.metainfo.elastic import elastic_mapping, elastic_obj
29

30
from .base import CalcWithMetadata, DomainQuantity, Domain, get_optional_backend_value
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53


xc_treatments = {
    'gga': 'GGA',
    'hf_': 'HF',
    'oep': 'OEP',
    'hyb': 'hybrid',
    'mgg': 'meta-GGA',
    'vdw': 'vdW',
    'lda': 'LDA',
}
""" https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional """

basis_sets = {
    'gaussians': 'gaussians',
    'realspacegrid': 'real-space grid',
    'planewaves': 'plane waves'
}

version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)')


def map_functional_name_to_xc_treatment(name):
Markus Scheidgen's avatar
Markus Scheidgen committed
54
    if name == config.services.unavailable_value:
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
        return name

    return xc_treatments.get(name[:3].lower(), name)


def map_basis_set_to_basis_set_label(name):
    key = name.replace('_', '').replace('-', '').replace(' ', '').lower()
    return basis_sets.get(key, name)


def simplify_version(version):
    match = version_re.search(version)
    if match is None:
        return version
    else:
        return match.group(0)


73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
class Label(MSection):
    """
    Label that further classify a structure.

    Attributes:
        label: The label as a string
        type: The type of the label
        source: The source that this label was taken from.
    """

    m_def = Section(a_elastic=dict(type=InnerDoc))

    label = Quantity(type=str)
    type = Quantity(type=MEnum('compound_class', 'classification', 'prototype', 'prototype_id'))
    source = Quantity(type=MEnum('springer', 'aflow_prototype_library'))


ESLabel = elastic_mapping(Label.m_def, InnerDoc)


93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
class DFTCalcWithMetadata(CalcWithMetadata):

    def __init__(self, **kwargs):
        self.formula: str = None
        self.atoms: List[str] = []
        self.n_atoms: int = 0
        self.basis_set: str = None
        self.xc_functional: str = None
        self.system: str = None
        self.crystal_system: str = None
        self.spacegroup: str = None
        self.spacegroup_symbol: str = None
        self.code_name: str = None
        self.code_version: str = None

        self.n_geometries = 0
109
        self.n_calculations = 0
110
        self.n_total_energies = 0
111
        self.n_quantities = 0
112
113
114
115
        self.quantities = []
        self.geometries = []
        self.group_hash: str = None

116
        self.labels: List[Label] = []
117
        self.optimade: optimade.OptimadeEntry = None
118

119
120
        super().__init__(**kwargs)

121
122
123
    def update(self, **kwargs):
        super().update(**kwargs)

124
125
126
        if len(self.labels) > 0:
            self.labels = [Label.m_from_dict(label) for label in self.labels]

127
        if self.optimade is not None and isinstance(self.optimade, dict):
128
            self.optimade = optimade.OptimadeEntry.m_from_dict(self.optimade)
129

130
131
132
133
134
135
136
137
138
139
140
    def __getitem__(self, key):
        value = super().__getitem__(key)

        if key == 'labels':
            return [item.m_to_dict() for item in value]

        if key == 'optimade':
            return value.m_to_dict()

        return value

141
    def apply_domain_metadata(self, backend):
142
143
        from nomad.normalizing.system import normalized_atom_labels

144
145
146
        logger = utils.get_logger(__name__).bind(
            upload_id=self.upload_id, calc_id=self.calc_id, mainfile=self.mainfile)

147
        # code and code specific ids
148
        self.code_name = backend.get_value('program_name', 0)
149
150
151
152
        try:
            self.code_version = simplify_version(backend.get_value('program_version', 0))
        except KeyError:
            self.code_version = config.services.unavailable_value
153

Markus Scheidgen's avatar
Markus Scheidgen committed
154
155
        self.raw_id = get_optional_backend_value(backend, 'raw_id', 'section_run', 0)

156
        # metadata (system, method, chemistry)
157
        self.atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
158
159
160
        if hasattr(self.atoms, 'tolist'):
            self.atoms = self.atoms.tolist()
        self.n_atoms = len(self.atoms)
161
        self.atoms = list(set(normalized_atom_labels(set(self.atoms))))
162
163
        self.atoms.sort()

164
165
166
167
168
169
        self.crystal_system = get_optional_backend_value(
            backend, 'crystal_system', 'section_symmetry', logger=logger)
        self.spacegroup = get_optional_backend_value(
            backend, 'space_group_number', 'section_symmetry', 0, logger=logger)
        self.spacegroup_symbol = get_optional_backend_value(
            backend, 'international_short_symbol', 'section_symmetry', 0, logger=logger)
170
        self.basis_set = map_basis_set_to_basis_set_label(
171
172
173
174
175
            get_optional_backend_value(backend, 'program_basis_set_type', 'section_run', logger=logger))
        self.system = get_optional_backend_value(
            backend, 'system_type', 'section_system', logger=logger)
        self.formula = get_optional_backend_value(
            backend, 'chemical_composition_bulk_reduced', 'section_system', logger=logger)
176
        self.xc_functional = map_functional_name_to_xc_treatment(
177
            get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
178

179
        # grouping
180
181
182
183
184
185
186
187
188
189
190
191
192
        self.group_hash = utils.hash(
            self.formula,
            self.spacegroup,
            self.basis_set,
            self.xc_functional,
            self.code_name,
            self.code_version,
            self.with_embargo,
            self.comment,
            self.references,
            self.uploader,
            self.coauthors)

193
        # metrics and quantities
194
195
        quantities = set()
        geometries = set()
196
197
        n_quantities = 0
        n_calculations = 0
198
199
200
        n_total_energies = 0
        n_geometries = 0

201
        for meta_info, event, value in backend.traverse():
202
            quantities.add(meta_info)
203
204
205
206
207
208
209
210
211
212
213

            if event == ParserEvent.add_value or event == ParserEvent.add_array_value:
                n_quantities += 1

                if meta_info == 'energy_total':
                    n_total_energies += 1

                if meta_info == 'configuration_raw_gid':
                    geometries.add(value)

            elif event == ParserEvent.open_section:
214
                if meta_info == 'section_single_configuration_calculation':
215
                    n_calculations += 1
216

217
218
                if meta_info == 'section_system':
                    n_geometries += 1
219
220
221

        self.quantities = list(quantities)
        self.geometries = list(geometries)
222
223
        self.n_quantities = n_quantities
        self.n_calculations = n_calculations
224
225
226
        self.n_total_energies = n_total_energies
        self.n_geometries = n_geometries

227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
        # labels
        compounds = set()
        classifications = set()
        for index in backend.get_sections('section_springer_material'):
            compounds.update(backend.get_value('springer_compound_class', index))
            classifications.update(backend.get_value('springer_classification', index))

        for compound in compounds:
            self.labels.append(Label(label=compound, type='compound_class', source='springer'))
        for classification in classifications:
            self.labels.append(Label(label=classification, type='classification', source='springer'))

        aflow_id = get_optional_backend_value(backend, 'prototype_aflow_id', 'section_prototype')
        aflow_label = get_optional_backend_value(backend, 'prototype_label', 'section_prototype')

        if aflow_id is not None and aflow_label is not None:
            self.labels.append(Label(label=aflow_label, type='prototype', source='aflow_prototype_library'))
            self.labels.append(Label(label=aflow_id, type='prototype_id', source='aflow_prototype_library'))

        # optimade
247
        self.optimade = backend.get_mi2_section(optimade.OptimadeEntry.m_def)
248

249

250
251
252
253
254
255
def only_atoms(atoms):
    numbers = [ase.data.atomic_numbers[atom] for atom in atoms]
    only_atoms = [ase.data.chemical_symbols[number] for number in sorted(numbers)]
    return ''.join(only_atoms)


256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
Domain(
    'DFT', DFTCalcWithMetadata,
    quantities=dict(
        formula=DomainQuantity(
            'The chemical (hill) formula of the simulated system.',
            order_default=True),
        atoms=DomainQuantity(
            'The atom labels of all atoms in the simulated system.',
            aggregations=len(ase.data.chemical_symbols), multi=True, zero_aggs=False),
        only_atoms=DomainQuantity(
            'The atom labels concatenated in species-number order. Used with keyword search '
            'to facilitate exclusive searches.',
            elastic_value=only_atoms, metadata_field='atoms', multi=True),
        basis_set=DomainQuantity(
            'The used basis set functions.', aggregations=20),
        xc_functional=DomainQuantity(
            'The xc functional type used for the simulation.', aggregations=20),
        system=DomainQuantity(
            'The system type of the simulated system.', aggregations=10),
        crystal_system=DomainQuantity(
            'The crystal system type of the simulated system.', aggregations=10),
        code_name=DomainQuantity(
            'The code name.', aggregations=40),
        spacegroup=DomainQuantity('The spacegroup of the simulated system as number'),
        spacegroup_symbol=DomainQuantity('The spacegroup as international short symbol'),
        geometries=DomainQuantity(
            'Hashes that describe unique geometries simulated by this code run.', multi=True),
Markus Scheidgen's avatar
Markus Scheidgen committed
283
284
        group_hash=DomainQuantity(
            'A hash from key metadata used to group similar entries.'),
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
        quantities=DomainQuantity(
            'All quantities that are used by this calculation',
            metric=('quantities', 'value_count'), multi=True),
        n_total_energies=DomainQuantity(
            'Number of total energy calculations',
            elastic_mapping=Integer()),
        n_calculations=DomainQuantity(
            'Number of single configuration calculation sections',
            elastic_mapping=Integer()),
        n_quantities=DomainQuantity(
            'Number of overall parsed quantities',
            elastic_mapping=Integer()),
        n_geometries=DomainQuantity(
            'Number of unique geometries',
            elastic_mapping=Integer()),
300
301
302
        n_atoms=DomainQuantity(
            'Number of atoms in the simulated system',
            elastic_mapping=Integer()),
303
304
305
306
307
308
        labels=DomainQuantity(
            'Search based for springer classification and aflow prototypes',
            elastic_field='labels.label',
            elastic_mapping=Object(ESLabel),
            elastic_value=lambda labels: [elastic_obj(label, ESLabel) for label in labels],
            multi=True),
309
        optimade=DomainQuantity(
310
            'Search based on optimade\'s filter query language',
311
            elastic_mapping=Object(optimade.ESOptimadeEntry),
312
            elastic_value=lambda entry: elastic_obj(entry, optimade.ESOptimadeEntry)
313
        )),
314
315
316
317
318
    metrics=dict(
        total_energies=('n_total_energies', 'sum'),
        calculations=('n_calculations', 'sum'),
        quantities=('n_quantities', 'sum'),
        geometries=('n_geometries', 'sum'),
Markus Scheidgen's avatar
Markus Scheidgen committed
319
320
        unique_geometries=('geometries', 'cardinality'),
        groups=('group_hash', 'cardinality')
321
    ),
Markus Scheidgen's avatar
Markus Scheidgen committed
322
323
    groups=dict(
        groups=('group_hash', 'groups')),
324
325
    default_statistics=[
        'atoms', 'basis_set', 'xc_functional', 'system', 'crystal_system', 'code_name'])