dft.py 13.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
16
DFT specific metadata
17
'''
18
19
20

import re

21
from nomad import config
22
23
from nomad.metainfo import MSection, Section, Quantity, MEnum, SubSection
from nomad.metainfo.search_extension import Search
24

25
26
from .common import get_optional_backend_value
from .optimade import OptimadeEntry
27
28
29
30
31
32
33
34
35
36
37


xc_treatments = {
    'gga': 'GGA',
    'hf_': 'HF',
    'oep': 'OEP',
    'hyb': 'hybrid',
    'mgg': 'meta-GGA',
    'vdw': 'vdW',
    'lda': 'LDA',
}
38
''' https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional '''
39
40
41
42
43
44
45

basis_sets = {
    'gaussians': 'gaussians',
    'realspacegrid': 'real-space grid',
    'planewaves': 'plane waves'
}

46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
compound_types = [
    'unary',
    'binary',
    'ternary',
    'quaternary',
    'quinary',
    'sexinary',
    'septenary',
    'octanary',
    'nonary',
    'decinary'
]

_energy_quantities = [
    'energy_total',
    'energy_total_T0',
    'energy_free',
    'energy_electrostatic',
    'energy_X',
    'energy_XC',
    'energy_sum_eigenvalues']

_electronic_quantities = [
    'dos_values',
    'eigenvalues_values',
    'volumetric_data_values',
    'electronic_kinetic_energy',
    'total_charge',
    'atomic_multipole_values']

_forces_quantities = [
    'atom_forces_free',
    'atom_forces_raw',
    'atom_forces_T0',
    'atom_forces',
    'stress_tensor']

_vibrational_quantities = [
    'thermodynamical_property_heat_capacity_C_v',
    'vibrational_free_energy_at_constant_volume',
    'band_energies']

_magnetic_quantities = [
    'spin_S2'
]

_optical_quantities = [
    'excitation_energies',
    'oscillator_strengths',
    'transition_dipole_moments'
]

98
99
_searchable_quantities = set(_energy_quantities + _electronic_quantities + _forces_quantities + _vibrational_quantities + _magnetic_quantities + _optical_quantities)

100
101
102
103
version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)')


def map_functional_name_to_xc_treatment(name):
Markus Scheidgen's avatar
Markus Scheidgen committed
104
    if name == config.services.unavailable_value:
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
        return name

    return xc_treatments.get(name[:3].lower(), name)


def map_basis_set_to_basis_set_label(name):
    key = name.replace('_', '').replace('-', '').replace(' ', '').lower()
    return basis_sets.get(key, name)


def simplify_version(version):
    match = version_re.search(version)
    if match is None:
        return version
    else:
        return match.group(0)


123
class Label(MSection):
124
    '''
125
126
127
128
129
130
131
    Label that further classify a structure.

    Attributes:
        label: The label as a string
        type: The type of the label
        source: The source that this label was taken from.

132
    '''
133
    label = Quantity(type=str, a_search=Search())
134
135
136

    type = Quantity(type=MEnum(
        'compound_class', 'classification', 'prototype', 'prototype_id'),
137
        a_search=Search())
138
139
140

    source = Quantity(
        type=MEnum('springer', 'aflow_prototype_library'),
141
        a_search=Search())
142
143
144
145
146
147
148
149


class DFTMetadata(MSection):
    m_def = Section(a_domain='dft')

    basis_set = Quantity(
        type=str, default='not processed',
        description='The used basis set functions.',
150
        a_search=Search(statistic_size=20))
151
152
153
154

    xc_functional = Quantity(
        type=str, default='not processed',
        description='The libXC based xc functional classification used in the simulation.',
155
        a_search=Search(statistic_size=20))
156
157
158
159

    system = Quantity(
        type=str, default='not processed',
        description='The system type of the simulated system.',
160
        a_search=Search())
161

162
163
164
    compound_type = Quantity(
        type=str, default='not processed',
        description='The compound type of the simulated system.',
165
        a_search=Search(statistic_size=11)
166
167
    )

168
169
170
    crystal_system = Quantity(
        type=str, default='not processed',
        description='The crystal system type of the simulated system.',
171
        a_search=Search())
172
173

    spacegroup = Quantity(
174
        type=int, default=-1,
175
        description='The spacegroup of the simulated system as number.',
176
        a_search=Search())
177
178
179
180

    spacegroup_symbol = Quantity(
        type=str, default='not processed',
        description='The spacegroup as international short symbol.',
181
        a_search=Search())
182
183
184
185

    code_name = Quantity(
        type=str, default='not processed',
        description='The name of the used code.',
186
        a_search=Search(statistic_size=40))
187
188
189
190

    code_version = Quantity(
        type=str, default='not processed',
        description='The version of the used code.',
191
        a_search=Search())
192
193

    n_geometries = Quantity(
194
        type=int, default=0, description='Number of unique geometries.',
195
        a_search=Search(metric_name='geometries', metric='sum'))
196
197

    n_calculations = Quantity(
198
        type=int, default=0,
199
        description='Number of single configuration calculation sections',
200
        a_search=Search(metric_name='calculations', metric='sum'))
201
202

    n_total_energies = Quantity(
203
        type=int, default=0, description='Number of total energy calculations',
204
        a_search=Search(metric_name='total_energies', metric='sum'))
205
206

    n_quantities = Quantity(
207
        type=int, default=0, description='Number of metainfo quantities parsed from the entry.',
208
        a_search=Search(metric='sum', metric_name='quantities'))
209
210
211
212

    quantities = Quantity(
        type=str, shape=['0..*'],
        description='All quantities that are used by this entry.',
213
        a_search=Search(
214
215
            metric_name='distinct_quantities', metric='cardinality', many_and='append'))

216
    searchable_quantities = Quantity(
217
218
        type=str, shape=['0..*'],
        description='Energy-related quantities.',
219
        a_search=Search(many_and='append', statistic_size=len(_searchable_quantities)))
220

221
222
223
    geometries = Quantity(
        type=str, shape=['0..*'],
        description='Hashes for each simulated geometry',
224
        a_search=Search(metric_name='unique_geometries', metric='cardinality'))
225
226
227
228

    group_hash = Quantity(
        type=str,
        description='Hashes that describe unique geometries simulated by this code run.',
229
        a_search=Search(many_or='append', group='groups_grouped', metric_name='groups', metric='cardinality'))
230
231
232
233
234
235

    labels = SubSection(
        sub_section=Label, repeats=True,
        description='The labels taken from AFLOW prototypes and springer.',
        a_search='labels')

236
237
238
    labels_springer_compound_class = Quantity(
        type=str, shape=['0..*'],
        description='Springer compund classification.',
239
        a_search=Search(
240
            many_and='append', statistic_size=10,
241
            statistic_order='_count'))
242
243
244
245

    labels_springer_classification = Quantity(
        type=str, shape=['0..*'],
        description='Springer classification by property.',
246
        a_search=Search(
247
            many_and='append', statistic_size=10,
248
            statistic_order='_count'))
249

250
    optimade = SubSection(
251
        sub_section=OptimadeEntry,
252
253
254
        description='Metadata used for the optimade API.',
        a_search='optimade')

Markus Scheidgen's avatar
Markus Scheidgen committed
255
256
257
258
259
260
261
262
263
    def code_name_from_parser(self):
        entry = self.m_parent
        if entry.parser_name is not None:
            from nomad.parsing import parser_dict
            parser = parser_dict.get(entry.parser_name)
            if hasattr(parser, 'code_name'):
                return parser.code_name
        return config.services.unavailable_value

264
    def apply_domain_metadata(self, backend):
265
        from nomad import utils
266
        from nomad.normalizing.system import normalized_atom_labels
267
        entry = self.m_parent
268

269
        logger = utils.get_logger(__name__).bind(
270
            upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
271

272
        if backend is None:
Markus Scheidgen's avatar
Markus Scheidgen committed
273
            self.code_name = self.code_name_from_parser()
274
275
            return

276
        # code and code specific ids
Markus Scheidgen's avatar
Markus Scheidgen committed
277
278
279
280
281
282
        try:
            self.code_name = backend.get_value('program_name', 0)
        except KeyError as e:
            logger.warn('backend after parsing without program_name', exc_info=e)
            self.code_name = self.code_name_from_parser()

283
284
285
286
        try:
            self.code_version = simplify_version(backend.get_value('program_version', 0))
        except KeyError:
            self.code_version = config.services.unavailable_value
287

288
289
290
        raw_id = get_optional_backend_value(backend, 'raw_id', 'section_run', None)
        if raw_id is not None:
            entry.raw_id = raw_id
Markus Scheidgen's avatar
Markus Scheidgen committed
291

292
        # metadata (system, method, chemistry)
293
294
295
296
297
298
299
        atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
        if hasattr(atoms, 'tolist'):
            atoms = atoms.tolist()
        entry.n_atoms = len(atoms)
        atoms = list(set(normalized_atom_labels(set(atoms))))
        atoms.sort()
        entry.atoms = atoms
300
        self.compound_type = compound_types[len(atoms) - 1] if len(atoms) <= 10 else '>decinary'
301

302
303
304
305
306
        self.crystal_system = get_optional_backend_value(
            backend, 'crystal_system', 'section_symmetry', logger=logger)
        self.spacegroup = get_optional_backend_value(
            backend, 'space_group_number', 'section_symmetry', 0, logger=logger)
        self.spacegroup_symbol = get_optional_backend_value(
307
            backend, 'international_short_symbol', 'section_symmetry', logger=logger)
308
        self.basis_set = map_basis_set_to_basis_set_label(
309
310
311
            get_optional_backend_value(backend, 'program_basis_set_type', 'section_run', logger=logger))
        self.system = get_optional_backend_value(
            backend, 'system_type', 'section_system', logger=logger)
312
        entry.formula = get_optional_backend_value(
313
            backend, 'chemical_composition_bulk_reduced', 'section_system', logger=logger)
314
        self.xc_functional = map_functional_name_to_xc_treatment(
315
            get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
316

317
        # grouping
318
        self.group_hash = utils.hash(
319
            entry.formula,
320
321
322
323
324
            self.spacegroup,
            self.basis_set,
            self.xc_functional,
            self.code_name,
            self.code_version,
325
326
            entry.with_embargo,
            entry.uploader)
327

328
        # metrics and quantities
329
        quantities = set()
330
        searchable_quantities = set()
331
        geometries = set()
332

333
334
        n_quantities = 0
        n_calculations = 0
335
336
337
        n_total_energies = 0
        n_geometries = 0

338
339
        for section_run in backend.entry_archive.section_run:
            quantities.add(section_run.m_def.name)
340
            n_quantities += 1
341

342
            for section, property_def, _ in section_run.m_traverse():
343
344
                property_name = property_def.name
                quantities.add(property_name)
345
346
                n_quantities += 1

347
348
                if property_name in _searchable_quantities:
                    searchable_quantities.add(property_name)
349

350
                if property_name == 'energy_total':
351
352
                    n_total_energies += 1

353
354
                if property_name == 'configuration_raw_gid':
                    geometries.add(section.m_get(property_def))
355

356
                if property_name == 'section_single_configuration_calculation':
357
                    n_calculations += 1
358

359
                if property_name == 'section_system':
360
                    n_geometries += 1
361
362
363

        self.quantities = list(quantities)
        self.geometries = list(geometries)
364
        self.searchable_quantities = list(searchable_quantities)
365
366
        self.n_quantities = n_quantities
        self.n_calculations = n_calculations
367
368
369
        self.n_total_energies = n_total_energies
        self.n_geometries = n_geometries

370
371
372
373
374
375
376
377
378
379
380
        # labels
        compounds = set()
        classifications = set()
        for index in backend.get_sections('section_springer_material'):
            compounds.update(backend.get_value('springer_compound_class', index))
            classifications.update(backend.get_value('springer_classification', index))

        for compound in compounds:
            self.labels.append(Label(label=compound, type='compound_class', source='springer'))
        for classification in classifications:
            self.labels.append(Label(label=classification, type='classification', source='springer'))
381
382
        self.labels_springer_compound_class = list(compounds)
        self.labels_springer_classification = list(classifications)
383
384
385
386
387
388
389

        aflow_id = get_optional_backend_value(backend, 'prototype_aflow_id', 'section_prototype')
        aflow_label = get_optional_backend_value(backend, 'prototype_label', 'section_prototype')

        if aflow_id is not None and aflow_label is not None:
            self.labels.append(Label(label=aflow_label, type='prototype', source='aflow_prototype_library'))
            self.labels.append(Label(label=aflow_id, type='prototype_id', source='aflow_prototype_library'))