dft.py 14.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
16
DFT specific metadata
17
'''
18
19
20

import re

21
22
from nomadcore.local_backend import ParserEvent

23
from nomad import utils, config
24
25
from nomad.metainfo import MSection, Section, Quantity, MEnum, SubSection
from nomad.metainfo.search_extension import Search
26

27
28
from .common import get_optional_backend_value
from .optimade import OptimadeEntry
29
30
31
32
33
34
35
36
37
38
39


xc_treatments = {
    'gga': 'GGA',
    'hf_': 'HF',
    'oep': 'OEP',
    'hyb': 'hybrid',
    'mgg': 'meta-GGA',
    'vdw': 'vdW',
    'lda': 'LDA',
}
40
''' https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional '''
41
42
43
44
45
46
47

basis_sets = {
    'gaussians': 'gaussians',
    'realspacegrid': 'real-space grid',
    'planewaves': 'plane waves'
}

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
compound_types = [
    'unary',
    'binary',
    'ternary',
    'quaternary',
    'quinary',
    'sexinary',
    'septenary',
    'octanary',
    'nonary',
    'decinary'
]

_energy_quantities = [
    'energy_total',
    'energy_total_T0',
    'energy_free',
    'energy_electrostatic',
    'energy_X',
    'energy_XC',
    'energy_sum_eigenvalues']

_electronic_quantities = [
    'dos_values',
    'eigenvalues_values',
    'volumetric_data_values',
    'electronic_kinetic_energy',
    'total_charge',
    'atomic_multipole_values']

_forces_quantities = [
    'atom_forces_free',
    'atom_forces_raw',
    'atom_forces_T0',
    'atom_forces',
    'stress_tensor']

_vibrational_quantities = [
    'thermodynamical_property_heat_capacity_C_v',
    'vibrational_free_energy_at_constant_volume',
    'band_energies']

_magnetic_quantities = [
    'spin_S2'
]

_optical_quantities = [
    'excitation_energies',
    'oscillator_strengths',
    'transition_dipole_moments'
]

100
101
102
103
version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)')


def map_functional_name_to_xc_treatment(name):
Markus Scheidgen's avatar
Markus Scheidgen committed
104
    if name == config.services.unavailable_value:
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
        return name

    return xc_treatments.get(name[:3].lower(), name)


def map_basis_set_to_basis_set_label(name):
    key = name.replace('_', '').replace('-', '').replace(' ', '').lower()
    return basis_sets.get(key, name)


def simplify_version(version):
    match = version_re.search(version)
    if match is None:
        return version
    else:
        return match.group(0)


123
class Label(MSection):
124
    '''
125
126
127
128
129
130
131
    Label that further classify a structure.

    Attributes:
        label: The label as a string
        type: The type of the label
        source: The source that this label was taken from.

132
    '''
133
    label = Quantity(type=str, a_search=Search())
134
135
136

    type = Quantity(type=MEnum(
        'compound_class', 'classification', 'prototype', 'prototype_id'),
137
        a_search=Search())
138
139
140

    source = Quantity(
        type=MEnum('springer', 'aflow_prototype_library'),
141
        a_search=Search())
142
143
144
145
146
147
148
149


class DFTMetadata(MSection):
    m_def = Section(a_domain='dft')

    basis_set = Quantity(
        type=str, default='not processed',
        description='The used basis set functions.',
150
        a_search=Search(statistic_size=20, default_statistic=True))
151
152
153
154

    xc_functional = Quantity(
        type=str, default='not processed',
        description='The libXC based xc functional classification used in the simulation.',
155
        a_search=Search(statistic_size=20, default_statistic=True))
156
157
158
159

    system = Quantity(
        type=str, default='not processed',
        description='The system type of the simulated system.',
160
        a_search=Search(default_statistic=True))
161

162
163
164
165
166
167
    compound_type = Quantity(
        type=str, default='not processed',
        description='The compound type of the simulated system.',
        a_search=Search(statistic_size=11, default_statistic=True)
    )

168
169
170
    crystal_system = Quantity(
        type=str, default='not processed',
        description='The crystal system type of the simulated system.',
171
        a_search=Search(default_statistic=True))
172
173
174
175

    spacegroup = Quantity(
        type=int, default='not processed',
        description='The spacegroup of the simulated system as number.',
176
        a_search=Search())
177
178
179
180

    spacegroup_symbol = Quantity(
        type=str, default='not processed',
        description='The spacegroup as international short symbol.',
181
        a_search=Search())
182
183
184
185

    code_name = Quantity(
        type=str, default='not processed',
        description='The name of the used code.',
186
        a_search=Search(statistic_size=40, default_statistic=True))
187
188
189
190

    code_version = Quantity(
        type=str, default='not processed',
        description='The version of the used code.',
191
        a_search=Search())
192
193
194

    n_geometries = Quantity(
        type=int, description='Number of unique geometries.',
195
        a_search=Search(metric_name='geometries', metric='sum'))
196
197
198
199

    n_calculations = Quantity(
        type=int,
        description='Number of single configuration calculation sections',
200
        a_search=Search(metric_name='calculations', metric='sum'))
201
202
203

    n_total_energies = Quantity(
        type=int, description='Number of total energy calculations',
204
        a_search=Search(metric_name='total_energies', metric='sum'))
205
206
207

    n_quantities = Quantity(
        type=int, description='Number of metainfo quantities parsed from the entry.',
208
        a_search=Search(metric='sum', metric_name='quantities'))
209
210
211
212

    quantities = Quantity(
        type=str, shape=['0..*'],
        description='All quantities that are used by this entry.',
213
        a_search=Search(
214
215
            metric_name='distinct_quantities', metric='cardinality', many_and='append'))

216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
    quantities_energy = Quantity(
        type=str, shape=['0..*'],
        description='Energy-related quantities.',
        a_search=Search(many_and='append', default_statistic=True))

    quantities_electronic = Quantity(
        type=str, shape=['0..*'],
        description='Electronic structure-related quantities.',
        a_search=Search(many_and='append', default_statistic=True))

    quantities_forces = Quantity(
        type=str, shape=['0..*'],
        description='Forces-related quantities.',
        a_search=Search(many_and='append', default_statistic=True))

    quantities_vibrational = Quantity(
        type=str, shape=['0..*'],
        description='Vibrational-related quantities.',
        a_search=Search(many_and='append', default_statistic=True))

    quantities_magnetic = Quantity(
        type=str, shape=['0..*'],
        description='Magnetic-related quantities.',
        a_search=Search(many_and='append', default_statistic=True))

    quantities_optical = Quantity(
        type=str, shape=['0..*'],
        description='Optical-related quantities.',
        a_search=Search(many_and='append', default_statistic=True))

246
247
248
    geometries = Quantity(
        type=str, shape=['0..*'],
        description='Hashes for each simulated geometry',
249
        a_search=Search(metric_name='unique_geometries', metric='cardinality'))
250
251
252
253

    group_hash = Quantity(
        type=str,
        description='Hashes that describe unique geometries simulated by this code run.',
254
        a_search=Search(many_or='append', group='groups_grouped', metric_name='groups', metric='cardinality'))
255
256
257
258
259
260

    labels = SubSection(
        sub_section=Label, repeats=True,
        description='The labels taken from AFLOW prototypes and springer.',
        a_search='labels')

261
262
263
264
265
266
267
268
269
270
    labels_springer_compound_class = Quantity(
        type=str, shape=['0..*'],
        description='Springer compund classification.',
        a_search=Search(many_and='append', default_statistic=True, statistic_size=15))

    labels_springer_classification = Quantity(
        type=str, shape=['0..*'],
        description='Springer classification by property.',
        a_search=Search(many_and='append', default_statistic=True, statistic_size=15))

271
    optimade = SubSection(
272
        sub_section=OptimadeEntry,
273
274
275
276
277
278
279
280
281
282
283
        description='Metadata used for the optimade API.',
        a_search='optimade')

    def m_update(self, **kwargs):
        # TODO necessary?
        if 'labels' in kwargs:
            print('########################## A')
            self.labels = [Label.m_from_dict(label) for label in kwargs.pop('labels')]

        if 'optimade' in kwargs:
            print('########################## B')
284
            self.optimade = OptimadeEntry.m_from_dict(kwargs.pop('optimade'))
285
286

        super().m_update(**kwargs)
287

288
    def apply_domain_metadata(self, backend):
289
        from nomad.normalizing.system import normalized_atom_labels
290
        entry = self.m_parent
291

292
        logger = utils.get_logger(__name__).bind(
293
            upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
294

295
        # code and code specific ids
296
        self.code_name = backend.get_value('program_name', 0)
297
298
299
300
        try:
            self.code_version = simplify_version(backend.get_value('program_version', 0))
        except KeyError:
            self.code_version = config.services.unavailable_value
301

302
303
304
        raw_id = get_optional_backend_value(backend, 'raw_id', 'section_run', None)
        if raw_id is not None:
            entry.raw_id = raw_id
Markus Scheidgen's avatar
Markus Scheidgen committed
305

306
        # metadata (system, method, chemistry)
307
308
309
310
311
312
313
        atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
        if hasattr(atoms, 'tolist'):
            atoms = atoms.tolist()
        entry.n_atoms = len(atoms)
        atoms = list(set(normalized_atom_labels(set(atoms))))
        atoms.sort()
        entry.atoms = atoms
314
        self.compound_type = compound_types[len(atoms) - 1] if len(atoms) <= 10 else '>decinary'
315

316
317
318
319
320
        self.crystal_system = get_optional_backend_value(
            backend, 'crystal_system', 'section_symmetry', logger=logger)
        self.spacegroup = get_optional_backend_value(
            backend, 'space_group_number', 'section_symmetry', 0, logger=logger)
        self.spacegroup_symbol = get_optional_backend_value(
321
            backend, 'international_short_symbol', 'section_symmetry', logger=logger)
322
        self.basis_set = map_basis_set_to_basis_set_label(
323
324
325
            get_optional_backend_value(backend, 'program_basis_set_type', 'section_run', logger=logger))
        self.system = get_optional_backend_value(
            backend, 'system_type', 'section_system', logger=logger)
326
        entry.formula = get_optional_backend_value(
327
            backend, 'chemical_composition_bulk_reduced', 'section_system', logger=logger)
328
        self.xc_functional = map_functional_name_to_xc_treatment(
329
            get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
330

331
        # grouping
332
        self.group_hash = utils.hash(
333
            entry.formula,
334
335
336
337
338
            self.spacegroup,
            self.basis_set,
            self.xc_functional,
            self.code_name,
            self.code_version,
339
340
            entry.with_embargo,
            entry.uploader)
341

342
        # metrics and quantities
343
344
        quantities = set()
        geometries = set()
345
346
347
348
349
350
351
        quantities_energy = set()
        quantities_electronic = set()
        quantities_forces = set()
        quantities_vibrational = set()
        quantities_magnetic = set()
        quantities_optical = set()

352
353
        n_quantities = 0
        n_calculations = 0
354
355
356
        n_total_energies = 0
        n_geometries = 0

357
        for meta_info, event, value in backend.traverse():
358
            quantities.add(meta_info)
359
360
361
362
363
364
365
366
367
368
369
370
            if meta_info in _energy_quantities:
                quantities_energy.add(meta_info)
            elif meta_info in _electronic_quantities:
                quantities_electronic.add(meta_info)
            elif meta_info in _forces_quantities:
                quantities_forces.add(meta_info)
            elif meta_info in _vibrational_quantities:
                quantities_vibrational.add(meta_info)
            elif meta_info in _magnetic_quantities:
                quantities_magnetic.add(meta_info)
            elif meta_info in _optical_quantities:
                quantities_optical.add(meta_info)
371
372
373
374
375
376
377
378
379
380
381

            if event == ParserEvent.add_value or event == ParserEvent.add_array_value:
                n_quantities += 1

                if meta_info == 'energy_total':
                    n_total_energies += 1

                if meta_info == 'configuration_raw_gid':
                    geometries.add(value)

            elif event == ParserEvent.open_section:
382
                if meta_info == 'section_single_configuration_calculation':
383
                    n_calculations += 1
384

385
386
                if meta_info == 'section_system':
                    n_geometries += 1
387
388
389

        self.quantities = list(quantities)
        self.geometries = list(geometries)
390
391
392
393
394
395
        self.quantities_energy = list(quantities_energy)
        self.quantities_electronic = list(quantities_electronic)
        self.quantities_forces = list(quantities_forces)
        self.quantities_vibrational = list(quantities_vibrational)
        self.quantities_magnetic = list(quantities_magnetic)
        self.quantities_optical = list(quantities_optical)
396
397
        self.n_quantities = n_quantities
        self.n_calculations = n_calculations
398
399
400
        self.n_total_energies = n_total_energies
        self.n_geometries = n_geometries

401
402
403
404
405
406
407
408
409
410
411
        # labels
        compounds = set()
        classifications = set()
        for index in backend.get_sections('section_springer_material'):
            compounds.update(backend.get_value('springer_compound_class', index))
            classifications.update(backend.get_value('springer_classification', index))

        for compound in compounds:
            self.labels.append(Label(label=compound, type='compound_class', source='springer'))
        for classification in classifications:
            self.labels.append(Label(label=classification, type='classification', source='springer'))
412
413
        self.labels_springer_compound_class = list(compounds)
        self.labels_springer_classification = list(classifications)
414
415
416
417
418
419
420
421
422

        aflow_id = get_optional_backend_value(backend, 'prototype_aflow_id', 'section_prototype')
        aflow_label = get_optional_backend_value(backend, 'prototype_label', 'section_prototype')

        if aflow_id is not None and aflow_label is not None:
            self.labels.append(Label(label=aflow_label, type='prototype', source='aflow_prototype_library'))
            self.labels.append(Label(label=aflow_id, type='prototype_id', source='aflow_prototype_library'))

        # optimade
423
        self.optimade = backend.get_mi2_section(OptimadeEntry.m_def)