dft.py 14.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
16
DFT specific metadata
17
'''
18
19
20

import re

21
22
from nomadcore.local_backend import ParserEvent

23
from nomad import utils, config
24
25
from nomad.metainfo import MSection, Section, Quantity, MEnum, SubSection
from nomad.metainfo.search_extension import Search
26

27
28
from .common import get_optional_backend_value
from .optimade import OptimadeEntry
29
30
31
32
33
34
35
36
37
38
39


xc_treatments = {
    'gga': 'GGA',
    'hf_': 'HF',
    'oep': 'OEP',
    'hyb': 'hybrid',
    'mgg': 'meta-GGA',
    'vdw': 'vdW',
    'lda': 'LDA',
}
40
''' https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional '''
41
42
43
44
45
46
47

basis_sets = {
    'gaussians': 'gaussians',
    'realspacegrid': 'real-space grid',
    'planewaves': 'plane waves'
}

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
compound_types = [
    'unary',
    'binary',
    'ternary',
    'quaternary',
    'quinary',
    'sexinary',
    'septenary',
    'octanary',
    'nonary',
    'decinary'
]

_energy_quantities = [
    'energy_total',
    'energy_total_T0',
    'energy_free',
    'energy_electrostatic',
    'energy_X',
    'energy_XC',
    'energy_sum_eigenvalues']

_electronic_quantities = [
    'dos_values',
    'eigenvalues_values',
    'volumetric_data_values',
    'electronic_kinetic_energy',
    'total_charge',
    'atomic_multipole_values']

_forces_quantities = [
    'atom_forces_free',
    'atom_forces_raw',
    'atom_forces_T0',
    'atom_forces',
    'stress_tensor']

_vibrational_quantities = [
    'thermodynamical_property_heat_capacity_C_v',
    'vibrational_free_energy_at_constant_volume',
    'band_energies']

_magnetic_quantities = [
    'spin_S2'
]

_optical_quantities = [
    'excitation_energies',
    'oscillator_strengths',
    'transition_dipole_moments'
]

100
101
102
103
version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)')


def map_functional_name_to_xc_treatment(name):
Markus Scheidgen's avatar
Markus Scheidgen committed
104
    if name == config.services.unavailable_value:
105
106
107
108
109
110
111
112
113
114
        return name

    return xc_treatments.get(name[:3].lower(), name)


def map_basis_set_to_basis_set_label(name):
    key = name.replace('_', '').replace('-', '').replace(' ', '').lower()
    return basis_sets.get(key, name)


115
116
117
118
119
120
def map_atoms_to_compound_type(atoms):
    if len(atoms) > len(compound_types):
        return '>decinary'
    return compound_types[len(atoms) - 1]


121
122
123
124
125
126
127
128
def simplify_version(version):
    match = version_re.search(version)
    if match is None:
        return version
    else:
        return match.group(0)


129
class Label(MSection):
130
    '''
131
132
133
134
135
136
137
    Label that further classify a structure.

    Attributes:
        label: The label as a string
        type: The type of the label
        source: The source that this label was taken from.

138
    '''
139
    label = Quantity(type=str, a_search=Search())
140
141
142

    type = Quantity(type=MEnum(
        'compound_class', 'classification', 'prototype', 'prototype_id'),
143
        a_search=Search())
144
145
146

    source = Quantity(
        type=MEnum('springer', 'aflow_prototype_library'),
147
        a_search=Search())
148
149
150
151
152
153
154
155


class DFTMetadata(MSection):
    m_def = Section(a_domain='dft')

    basis_set = Quantity(
        type=str, default='not processed',
        description='The used basis set functions.',
156
        a_search=Search(statistic_size=20, default_statistic=True))
157
158
159
160

    xc_functional = Quantity(
        type=str, default='not processed',
        description='The libXC based xc functional classification used in the simulation.',
161
        a_search=Search(statistic_size=20, default_statistic=True))
162
163
164
165

    system = Quantity(
        type=str, default='not processed',
        description='The system type of the simulated system.',
166
        a_search=Search(default_statistic=True))
167

168
169
170
171
172
173
    compound_type = Quantity(
        type=str, default='not processed',
        description='The compound type of the simulated system.',
        a_search=Search(statistic_size=11, default_statistic=True)
    )

174
175
176
    crystal_system = Quantity(
        type=str, default='not processed',
        description='The crystal system type of the simulated system.',
177
        a_search=Search(default_statistic=True))
178
179
180
181

    spacegroup = Quantity(
        type=int, default='not processed',
        description='The spacegroup of the simulated system as number.',
182
        a_search=Search())
183
184
185
186

    spacegroup_symbol = Quantity(
        type=str, default='not processed',
        description='The spacegroup as international short symbol.',
187
        a_search=Search())
188
189
190
191

    code_name = Quantity(
        type=str, default='not processed',
        description='The name of the used code.',
192
        a_search=Search(statistic_size=40, default_statistic=True))
193
194
195
196

    code_version = Quantity(
        type=str, default='not processed',
        description='The version of the used code.',
197
        a_search=Search())
198
199
200

    n_geometries = Quantity(
        type=int, description='Number of unique geometries.',
201
        a_search=Search(metric_name='geometries', metric='sum'))
202
203
204
205

    n_calculations = Quantity(
        type=int,
        description='Number of single configuration calculation sections',
206
        a_search=Search(metric_name='calculations', metric='sum'))
207
208
209

    n_total_energies = Quantity(
        type=int, description='Number of total energy calculations',
210
        a_search=Search(metric_name='total_energies', metric='sum'))
211
212
213

    n_quantities = Quantity(
        type=int, description='Number of metainfo quantities parsed from the entry.',
214
        a_search=Search(metric='sum', metric_name='quantities'))
215
216
217
218

    quantities = Quantity(
        type=str, shape=['0..*'],
        description='All quantities that are used by this entry.',
219
        a_search=Search(
220
221
            metric_name='distinct_quantities', metric='cardinality', many_and='append'))

222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
    quantities_energy = Quantity(
        type=str, shape=['0..*'],
        description='Energy-related quantities.',
        a_search=Search(many_and='append', default_statistic=True))

    quantities_electronic = Quantity(
        type=str, shape=['0..*'],
        description='Electronic structure-related quantities.',
        a_search=Search(many_and='append', default_statistic=True))

    quantities_forces = Quantity(
        type=str, shape=['0..*'],
        description='Forces-related quantities.',
        a_search=Search(many_and='append', default_statistic=True))

    quantities_vibrational = Quantity(
        type=str, shape=['0..*'],
        description='Vibrational-related quantities.',
        a_search=Search(many_and='append', default_statistic=True))

    quantities_magnetic = Quantity(
        type=str, shape=['0..*'],
        description='Magnetic-related quantities.',
        a_search=Search(many_and='append', default_statistic=True))

    quantities_optical = Quantity(
        type=str, shape=['0..*'],
        description='Optical-related quantities.',
        a_search=Search(many_and='append', default_statistic=True))

252
253
254
    geometries = Quantity(
        type=str, shape=['0..*'],
        description='Hashes for each simulated geometry',
255
        a_search=Search(metric_name='unique_geometries', metric='cardinality'))
256
257
258
259

    group_hash = Quantity(
        type=str,
        description='Hashes that describe unique geometries simulated by this code run.',
260
        a_search=Search(many_or='append', group='groups_grouped', metric_name='groups', metric='cardinality'))
261
262
263
264
265
266

    labels = SubSection(
        sub_section=Label, repeats=True,
        description='The labels taken from AFLOW prototypes and springer.',
        a_search='labels')

267
268
269
270
271
272
273
274
275
276
    labels_springer_compound_class = Quantity(
        type=str, shape=['0..*'],
        description='Springer compund classification.',
        a_search=Search(many_and='append', default_statistic=True, statistic_size=15))

    labels_springer_classification = Quantity(
        type=str, shape=['0..*'],
        description='Springer classification by property.',
        a_search=Search(many_and='append', default_statistic=True, statistic_size=15))

277
    optimade = SubSection(
278
        sub_section=OptimadeEntry,
279
280
281
282
283
284
285
286
287
288
289
        description='Metadata used for the optimade API.',
        a_search='optimade')

    def m_update(self, **kwargs):
        # TODO necessary?
        if 'labels' in kwargs:
            print('########################## A')
            self.labels = [Label.m_from_dict(label) for label in kwargs.pop('labels')]

        if 'optimade' in kwargs:
            print('########################## B')
290
            self.optimade = OptimadeEntry.m_from_dict(kwargs.pop('optimade'))
291
292

        super().m_update(**kwargs)
293

294
    def apply_domain_metadata(self, backend):
295
        from nomad.normalizing.system import normalized_atom_labels
296
        entry = self.m_parent
297

298
        logger = utils.get_logger(__name__).bind(
299
            upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
300

301
        # code and code specific ids
302
        self.code_name = backend.get_value('program_name', 0)
303
304
305
306
        try:
            self.code_version = simplify_version(backend.get_value('program_version', 0))
        except KeyError:
            self.code_version = config.services.unavailable_value
307

308
309
310
        raw_id = get_optional_backend_value(backend, 'raw_id', 'section_run', None)
        if raw_id is not None:
            entry.raw_id = raw_id
Markus Scheidgen's avatar
Markus Scheidgen committed
311

312
        # metadata (system, method, chemistry)
313
314
315
316
317
318
319
        atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
        if hasattr(atoms, 'tolist'):
            atoms = atoms.tolist()
        entry.n_atoms = len(atoms)
        atoms = list(set(normalized_atom_labels(set(atoms))))
        atoms.sort()
        entry.atoms = atoms
320
        self.compound_type = map_atoms_to_compound_type(atoms)
321

322
323
324
325
326
        self.crystal_system = get_optional_backend_value(
            backend, 'crystal_system', 'section_symmetry', logger=logger)
        self.spacegroup = get_optional_backend_value(
            backend, 'space_group_number', 'section_symmetry', 0, logger=logger)
        self.spacegroup_symbol = get_optional_backend_value(
327
            backend, 'international_short_symbol', 'section_symmetry', logger=logger)
328
        self.basis_set = map_basis_set_to_basis_set_label(
329
330
331
            get_optional_backend_value(backend, 'program_basis_set_type', 'section_run', logger=logger))
        self.system = get_optional_backend_value(
            backend, 'system_type', 'section_system', logger=logger)
332
        entry.formula = get_optional_backend_value(
333
            backend, 'chemical_composition_bulk_reduced', 'section_system', logger=logger)
334
        self.xc_functional = map_functional_name_to_xc_treatment(
335
            get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
336

337
        # grouping
338
        self.group_hash = utils.hash(
339
            entry.formula,
340
341
342
343
344
            self.spacegroup,
            self.basis_set,
            self.xc_functional,
            self.code_name,
            self.code_version,
345
346
            entry.with_embargo,
            entry.uploader)
347

348
        # metrics and quantities
349
350
        quantities = set()
        geometries = set()
351
352
353
354
355
356
357
        quantities_energy = set()
        quantities_electronic = set()
        quantities_forces = set()
        quantities_vibrational = set()
        quantities_magnetic = set()
        quantities_optical = set()

358
359
        n_quantities = 0
        n_calculations = 0
360
361
362
        n_total_energies = 0
        n_geometries = 0

363
        for meta_info, event, value in backend.traverse():
364
            quantities.add(meta_info)
365
366
367
368
369
370
371
372
373
374
375
376
            if meta_info in _energy_quantities:
                quantities_energy.add(meta_info)
            elif meta_info in _electronic_quantities:
                quantities_electronic.add(meta_info)
            elif meta_info in _forces_quantities:
                quantities_forces.add(meta_info)
            elif meta_info in _vibrational_quantities:
                quantities_vibrational.add(meta_info)
            elif meta_info in _magnetic_quantities:
                quantities_magnetic.add(meta_info)
            elif meta_info in _optical_quantities:
                quantities_optical.add(meta_info)
377
378
379
380
381
382
383
384
385
386
387

            if event == ParserEvent.add_value or event == ParserEvent.add_array_value:
                n_quantities += 1

                if meta_info == 'energy_total':
                    n_total_energies += 1

                if meta_info == 'configuration_raw_gid':
                    geometries.add(value)

            elif event == ParserEvent.open_section:
388
                if meta_info == 'section_single_configuration_calculation':
389
                    n_calculations += 1
390

391
392
                if meta_info == 'section_system':
                    n_geometries += 1
393
394
395

        self.quantities = list(quantities)
        self.geometries = list(geometries)
396
397
398
399
400
401
        self.quantities_energy = list(quantities_energy)
        self.quantities_electronic = list(quantities_electronic)
        self.quantities_forces = list(quantities_forces)
        self.quantities_vibrational = list(quantities_vibrational)
        self.quantities_magnetic = list(quantities_magnetic)
        self.quantities_optical = list(quantities_optical)
402
403
        self.n_quantities = n_quantities
        self.n_calculations = n_calculations
404
405
406
        self.n_total_energies = n_total_energies
        self.n_geometries = n_geometries

407
408
409
410
411
412
413
414
415
416
417
        # labels
        compounds = set()
        classifications = set()
        for index in backend.get_sections('section_springer_material'):
            compounds.update(backend.get_value('springer_compound_class', index))
            classifications.update(backend.get_value('springer_classification', index))

        for compound in compounds:
            self.labels.append(Label(label=compound, type='compound_class', source='springer'))
        for classification in classifications:
            self.labels.append(Label(label=classification, type='classification', source='springer'))
418
419
        self.labels_springer_compound_class = list(compounds)
        self.labels_springer_classification = list(classifications)
420
421
422
423
424
425
426
427
428

        aflow_id = get_optional_backend_value(backend, 'prototype_aflow_id', 'section_prototype')
        aflow_label = get_optional_backend_value(backend, 'prototype_label', 'section_prototype')

        if aflow_id is not None and aflow_label is not None:
            self.labels.append(Label(label=aflow_label, type='prototype', source='aflow_prototype_library'))
            self.labels.append(Label(label=aflow_id, type='prototype_id', source='aflow_prototype_library'))

        # optimade
429
        self.optimade = backend.get_mi2_section(OptimadeEntry.m_def)