dft.py 13.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
16
DFT specific metadata
17
'''
18
19
20

import re

21
from nomad import config, utils
22
23
from nomad.metainfo import MSection, Section, Quantity, MEnum, SubSection
from nomad.metainfo.search_extension import Search
24

25
26
from .common import get_optional_backend_value
from .optimade import OptimadeEntry
27
28
29
30
31
32
33
34
35
36
37


xc_treatments = {
    'gga': 'GGA',
    'hf_': 'HF',
    'oep': 'OEP',
    'hyb': 'hybrid',
    'mgg': 'meta-GGA',
    'vdw': 'vdW',
    'lda': 'LDA',
}
38
''' https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional '''
39
40
41
42
43
44
45

basis_sets = {
    'gaussians': 'gaussians',
    'realspacegrid': 'real-space grid',
    'planewaves': 'plane waves'
}

46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
compound_types = [
    'unary',
    'binary',
    'ternary',
    'quaternary',
    'quinary',
    'sexinary',
    'septenary',
    'octanary',
    'nonary',
    'decinary'
]

_energy_quantities = [
    'energy_total',
    'energy_total_T0',
    'energy_free',
    'energy_electrostatic',
    'energy_X',
    'energy_XC',
    'energy_sum_eigenvalues']

_electronic_quantities = [
    'dos_values',
    'eigenvalues_values',
    'volumetric_data_values',
    'electronic_kinetic_energy',
    'total_charge',
    'atomic_multipole_values']

_forces_quantities = [
    'atom_forces_free',
    'atom_forces_raw',
    'atom_forces_T0',
    'atom_forces',
    'stress_tensor']

_vibrational_quantities = [
    'thermodynamical_property_heat_capacity_C_v',
    'vibrational_free_energy_at_constant_volume',
    'band_energies']

_magnetic_quantities = [
    'spin_S2'
]

_optical_quantities = [
    'excitation_energies',
    'oscillator_strengths',
    'transition_dipole_moments'
]

98
99
_searchable_quantities = set(_energy_quantities + _electronic_quantities + _forces_quantities + _vibrational_quantities + _magnetic_quantities + _optical_quantities)

100
101
102
103
version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)')


def map_functional_name_to_xc_treatment(name):
Markus Scheidgen's avatar
Markus Scheidgen committed
104
    if name == config.services.unavailable_value:
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
        return name

    return xc_treatments.get(name[:3].lower(), name)


def map_basis_set_to_basis_set_label(name):
    key = name.replace('_', '').replace('-', '').replace(' ', '').lower()
    return basis_sets.get(key, name)


def simplify_version(version):
    match = version_re.search(version)
    if match is None:
        return version
    else:
        return match.group(0)


123
class Label(MSection):
124
    '''
125
126
127
128
129
130
131
    Label that further classify a structure.

    Attributes:
        label: The label as a string
        type: The type of the label
        source: The source that this label was taken from.

132
    '''
133
    label = Quantity(type=str, a_search=Search())
134
135
136

    type = Quantity(type=MEnum(
        'compound_class', 'classification', 'prototype', 'prototype_id'),
137
        a_search=Search())
138
139
140

    source = Quantity(
        type=MEnum('springer', 'aflow_prototype_library'),
141
        a_search=Search())
142
143
144
145
146
147
148
149


class DFTMetadata(MSection):
    m_def = Section(a_domain='dft')

    basis_set = Quantity(
        type=str, default='not processed',
        description='The used basis set functions.',
150
151
152
153
        a_search=Search(statistic_values=[
            '(L)APW+lo', 'FLAPW', 'gaussians', 'numeric AOs', 'plane waves', 'psinc functions',
            'real-space grid', 'unavailable', 'not processed'
        ]))
154
155
156
157

    xc_functional = Quantity(
        type=str, default='not processed',
        description='The libXC based xc functional classification used in the simulation.',
158
        a_search=Search(statistic_values=list(xc_treatments.values()) + ['unavailable', 'not processed']))
159
160
161
162

    system = Quantity(
        type=str, default='not processed',
        description='The system type of the simulated system.',
163
164
165
166
        a_search=Search(statistic_values=[
            '1D', '2D', 'atom', 'bulk', 'molecule / cluster', 'surface',
            'unavailable', 'not processed'
        ]))
167

168
169
170
    compound_type = Quantity(
        type=str, default='not processed',
        description='The compound type of the simulated system.',
171
        a_search=Search(statistic_values=compound_types + ['not processed'])
172
173
    )

174
175
176
    crystal_system = Quantity(
        type=str, default='not processed',
        description='The crystal system type of the simulated system.',
177
178
179
180
181
        a_search=Search(
            statistic_values=[
                'cubic', 'hexagonal', 'monoclinic', 'orthorombic', 'tetragonal',
                'triclinic', 'trigonal', 'unavailable', 'not processed']
        ))
182
183

    spacegroup = Quantity(
184
        type=int, default=-1,
185
        description='The spacegroup of the simulated system as number.',
186
        a_search=Search())
187
188
189
190

    spacegroup_symbol = Quantity(
        type=str, default='not processed',
        description='The spacegroup as international short symbol.',
191
        a_search=Search())
192
193
194
195

    code_name = Quantity(
        type=str, default='not processed',
        description='The name of the used code.',
196
        a_search=Search())  # in import the parser module is added codes here as statistic_values
197
198
199
200

    code_version = Quantity(
        type=str, default='not processed',
        description='The version of the used code.',
201
        a_search=Search())
202
203

    n_geometries = Quantity(
204
        type=int, default=0, description='Number of unique geometries.',
205
        a_search=Search(metric_name='geometries', metric='sum'))
206
207

    n_calculations = Quantity(
208
        type=int, default=0,
209
        description='Number of single configuration calculation sections',
210
        a_search=Search(metric_name='calculations', metric='sum'))
211
212

    n_total_energies = Quantity(
213
        type=int, default=0, description='Number of total energy calculations',
214
        a_search=Search(metric_name='total_energies', metric='sum'))
215
216

    n_quantities = Quantity(
217
        type=int, default=0, description='Number of metainfo quantities parsed from the entry.',
218
        a_search=Search(metric='sum', metric_name='quantities'))
219
220
221
222

    quantities = Quantity(
        type=str, shape=['0..*'],
        description='All quantities that are used by this entry.',
223
        a_search=Search(
224
225
            metric_name='distinct_quantities', metric='cardinality', many_and='append'))

226
    searchable_quantities = Quantity(
227
        type=str, shape=['0..*'],
228
        description='All quantities with existence filters in the search GUI.',
229
        a_search=Search(many_and='append', statistic_size=len(_searchable_quantities)))
230

231
232
233
    geometries = Quantity(
        type=str, shape=['0..*'],
        description='Hashes for each simulated geometry',
234
        a_search=Search(metric_name='unique_geometries', metric='cardinality'))
235
236
237
238

    group_hash = Quantity(
        type=str,
        description='Hashes that describe unique geometries simulated by this code run.',
239
        a_search=Search(many_or='append', group='groups_grouped', metric_name='groups', metric='cardinality'))
240
241
242
243
244
245

    labels = SubSection(
        sub_section=Label, repeats=True,
        description='The labels taken from AFLOW prototypes and springer.',
        a_search='labels')

246
247
248
    labels_springer_compound_class = Quantity(
        type=str, shape=['0..*'],
        description='Springer compund classification.',
249
        a_search=Search(
250
            many_and='append', statistic_size=20,
251
            statistic_order='_count'))
252
253
254
255

    labels_springer_classification = Quantity(
        type=str, shape=['0..*'],
        description='Springer classification by property.',
256
        a_search=Search(
257
            many_and='append', statistic_size=10,
258
            statistic_order='_count'))
259

260
    optimade = SubSection(
261
        sub_section=OptimadeEntry,
262
263
264
        description='Metadata used for the optimade API.',
        a_search='optimade')

Markus Scheidgen's avatar
Markus Scheidgen committed
265
266
267
268
269
270
271
272
273
    def code_name_from_parser(self):
        entry = self.m_parent
        if entry.parser_name is not None:
            from nomad.parsing import parser_dict
            parser = parser_dict.get(entry.parser_name)
            if hasattr(parser, 'code_name'):
                return parser.code_name
        return config.services.unavailable_value

274
275
276
277
278
279
280
281
282
283
284
    def update_group_hash(self):
        self.group_hash = utils.hash(
            self.m_parent.formula,
            self.spacegroup,
            self.basis_set,
            self.xc_functional,
            self.code_name,
            self.code_version,
            self.m_parent.with_embargo,
            self.m_parent.uploader)

285
    def apply_domain_metadata(self, backend):
286
        from nomad.normalizing.system import normalized_atom_labels
287
        entry = self.m_parent
288

289
        logger = utils.get_logger(__name__).bind(
290
            upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
291

292
        if backend is None:
Markus Scheidgen's avatar
Markus Scheidgen committed
293
            self.code_name = self.code_name_from_parser()
294
295
            return

296
        # code and code specific ids
Markus Scheidgen's avatar
Markus Scheidgen committed
297
298
299
300
301
302
        try:
            self.code_name = backend.get_value('program_name', 0)
        except KeyError as e:
            logger.warn('backend after parsing without program_name', exc_info=e)
            self.code_name = self.code_name_from_parser()

303
304
305
306
        try:
            self.code_version = simplify_version(backend.get_value('program_version', 0))
        except KeyError:
            self.code_version = config.services.unavailable_value
307

308
309
310
        raw_id = get_optional_backend_value(backend, 'raw_id', 'section_run', None)
        if raw_id is not None:
            entry.raw_id = raw_id
Markus Scheidgen's avatar
Markus Scheidgen committed
311

312
        # metadata (system, method, chemistry)
313
314
315
316
317
318
319
        atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
        if hasattr(atoms, 'tolist'):
            atoms = atoms.tolist()
        entry.n_atoms = len(atoms)
        atoms = list(set(normalized_atom_labels(set(atoms))))
        atoms.sort()
        entry.atoms = atoms
320
        self.compound_type = compound_types[len(atoms) - 1] if len(atoms) <= 10 else '>decinary'
321

322
323
324
325
326
        self.crystal_system = get_optional_backend_value(
            backend, 'crystal_system', 'section_symmetry', logger=logger)
        self.spacegroup = get_optional_backend_value(
            backend, 'space_group_number', 'section_symmetry', 0, logger=logger)
        self.spacegroup_symbol = get_optional_backend_value(
327
            backend, 'international_short_symbol', 'section_symmetry', logger=logger)
328
        self.basis_set = map_basis_set_to_basis_set_label(
329
330
331
            get_optional_backend_value(backend, 'program_basis_set_type', 'section_run', logger=logger))
        self.system = get_optional_backend_value(
            backend, 'system_type', 'section_system', logger=logger)
332
        entry.formula = get_optional_backend_value(
333
            backend, 'chemical_composition_bulk_reduced', 'section_system', logger=logger)
334
        self.xc_functional = map_functional_name_to_xc_treatment(
335
            get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
336

337
        # grouping
338
        self.update_group_hash()
339

340
        # metrics and quantities
341
        quantities = set()
342
        searchable_quantities = set()
343
        geometries = set()
344

345
346
        n_quantities = 0
        n_calculations = 0
347
348
349
        n_total_energies = 0
        n_geometries = 0

350
351
        for section_run in backend.entry_archive.section_run:
            quantities.add(section_run.m_def.name)
352
            n_quantities += 1
353

354
            for section, property_def, _ in section_run.m_traverse():
355
356
                property_name = property_def.name
                quantities.add(property_name)
357
358
                n_quantities += 1

359
360
                if property_name in _searchable_quantities:
                    searchable_quantities.add(property_name)
361

362
                if property_name == 'energy_total':
363
364
                    n_total_energies += 1

365
366
                if property_name == 'configuration_raw_gid':
                    geometries.add(section.m_get(property_def))
367

368
                if property_name == 'section_single_configuration_calculation':
369
                    n_calculations += 1
370

371
                if property_name == 'section_system':
372
                    n_geometries += 1
373
374
375

        self.quantities = list(quantities)
        self.geometries = list(geometries)
376
        self.searchable_quantities = list(searchable_quantities)
377
378
        self.n_quantities = n_quantities
        self.n_calculations = n_calculations
379
380
381
        self.n_total_energies = n_total_energies
        self.n_geometries = n_geometries

382
383
384
385
386
387
388
389
390
391
392
        # labels
        compounds = set()
        classifications = set()
        for index in backend.get_sections('section_springer_material'):
            compounds.update(backend.get_value('springer_compound_class', index))
            classifications.update(backend.get_value('springer_classification', index))

        for compound in compounds:
            self.labels.append(Label(label=compound, type='compound_class', source='springer'))
        for classification in classifications:
            self.labels.append(Label(label=classification, type='classification', source='springer'))
393
394
        self.labels_springer_compound_class = list(compounds)
        self.labels_springer_classification = list(classifications)
395
396
397
398
399
400
401

        aflow_id = get_optional_backend_value(backend, 'prototype_aflow_id', 'section_prototype')
        aflow_label = get_optional_backend_value(backend, 'prototype_label', 'section_prototype')

        if aflow_id is not None and aflow_label is not None:
            self.labels.append(Label(label=aflow_label, type='prototype', source='aflow_prototype_library'))
            self.labels.append(Label(label=aflow_id, type='prototype_id', source='aflow_prototype_library'))