dft.py 12.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
16
DFT specific metadata
17
'''
18
19
20

import re

21
from nomad import config
22
23
from nomad.metainfo import MSection, Section, Quantity, MEnum, SubSection
from nomad.metainfo.search_extension import Search
24

25
26
from .common import get_optional_backend_value
from .optimade import OptimadeEntry
27
28
29
30
31
32
33
34
35
36
37


xc_treatments = {
    'gga': 'GGA',
    'hf_': 'HF',
    'oep': 'OEP',
    'hyb': 'hybrid',
    'mgg': 'meta-GGA',
    'vdw': 'vdW',
    'lda': 'LDA',
}
38
''' https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional '''
39
40
41
42
43
44
45

basis_sets = {
    'gaussians': 'gaussians',
    'realspacegrid': 'real-space grid',
    'planewaves': 'plane waves'
}

46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
compound_types = [
    'unary',
    'binary',
    'ternary',
    'quaternary',
    'quinary',
    'sexinary',
    'septenary',
    'octanary',
    'nonary',
    'decinary'
]

_energy_quantities = [
    'energy_total',
    'energy_total_T0',
    'energy_free',
    'energy_electrostatic',
    'energy_X',
    'energy_XC',
    'energy_sum_eigenvalues']

_electronic_quantities = [
    'dos_values',
    'eigenvalues_values',
    'volumetric_data_values',
    'electronic_kinetic_energy',
    'total_charge',
    'atomic_multipole_values']

_forces_quantities = [
    'atom_forces_free',
    'atom_forces_raw',
    'atom_forces_T0',
    'atom_forces',
    'stress_tensor']

_vibrational_quantities = [
    'thermodynamical_property_heat_capacity_C_v',
    'vibrational_free_energy_at_constant_volume',
    'band_energies']

_magnetic_quantities = [
    'spin_S2'
]

_optical_quantities = [
    'excitation_energies',
    'oscillator_strengths',
    'transition_dipole_moments'
]

98
99
_searchable_quantities = set(_energy_quantities + _electronic_quantities + _forces_quantities + _vibrational_quantities + _magnetic_quantities + _optical_quantities)

100
101
102
103
version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)')


def map_functional_name_to_xc_treatment(name):
Markus Scheidgen's avatar
Markus Scheidgen committed
104
    if name == config.services.unavailable_value:
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
        return name

    return xc_treatments.get(name[:3].lower(), name)


def map_basis_set_to_basis_set_label(name):
    key = name.replace('_', '').replace('-', '').replace(' ', '').lower()
    return basis_sets.get(key, name)


def simplify_version(version):
    match = version_re.search(version)
    if match is None:
        return version
    else:
        return match.group(0)


123
class Label(MSection):
124
    '''
125
126
127
128
129
130
131
    Label that further classify a structure.

    Attributes:
        label: The label as a string
        type: The type of the label
        source: The source that this label was taken from.

132
    '''
133
    label = Quantity(type=str, a_search=Search())
134
135
136

    type = Quantity(type=MEnum(
        'compound_class', 'classification', 'prototype', 'prototype_id'),
137
        a_search=Search())
138
139
140

    source = Quantity(
        type=MEnum('springer', 'aflow_prototype_library'),
141
        a_search=Search())
142
143
144
145
146
147
148
149


class DFTMetadata(MSection):
    m_def = Section(a_domain='dft')

    basis_set = Quantity(
        type=str, default='not processed',
        description='The used basis set functions.',
150
        a_search=Search(statistic_size=20))
151
152
153
154

    xc_functional = Quantity(
        type=str, default='not processed',
        description='The libXC based xc functional classification used in the simulation.',
155
        a_search=Search(statistic_size=20))
156
157
158
159

    system = Quantity(
        type=str, default='not processed',
        description='The system type of the simulated system.',
160
        a_search=Search())
161

162
163
164
    compound_type = Quantity(
        type=str, default='not processed',
        description='The compound type of the simulated system.',
165
        a_search=Search(statistic_size=11)
166
167
    )

168
169
170
    crystal_system = Quantity(
        type=str, default='not processed',
        description='The crystal system type of the simulated system.',
171
        a_search=Search())
172
173

    spacegroup = Quantity(
174
        type=int, default=-1,
175
        description='The spacegroup of the simulated system as number.',
176
        a_search=Search())
177
178
179
180

    spacegroup_symbol = Quantity(
        type=str, default='not processed',
        description='The spacegroup as international short symbol.',
181
        a_search=Search())
182
183
184
185

    code_name = Quantity(
        type=str, default='not processed',
        description='The name of the used code.',
186
        a_search=Search(statistic_size=40))
187
188
189
190

    code_version = Quantity(
        type=str, default='not processed',
        description='The version of the used code.',
191
        a_search=Search())
192
193

    n_geometries = Quantity(
194
        type=int, default=0, description='Number of unique geometries.',
195
        a_search=Search(metric_name='geometries', metric='sum'))
196
197

    n_calculations = Quantity(
198
        type=int, default=0,
199
        description='Number of single configuration calculation sections',
200
        a_search=Search(metric_name='calculations', metric='sum'))
201
202

    n_total_energies = Quantity(
203
        type=int, default=0, description='Number of total energy calculations',
204
        a_search=Search(metric_name='total_energies', metric='sum'))
205
206

    n_quantities = Quantity(
207
        type=int, default=0, description='Number of metainfo quantities parsed from the entry.',
208
        a_search=Search(metric='sum', metric_name='quantities'))
209
210
211
212

    quantities = Quantity(
        type=str, shape=['0..*'],
        description='All quantities that are used by this entry.',
213
        a_search=Search(
214
215
            metric_name='distinct_quantities', metric='cardinality', many_and='append'))

216
    searchable_quantities = Quantity(
217
218
        type=str, shape=['0..*'],
        description='Energy-related quantities.',
219
        a_search=Search(many_and='append', statistic_size=len(_searchable_quantities)))
220

221
222
223
    geometries = Quantity(
        type=str, shape=['0..*'],
        description='Hashes for each simulated geometry',
224
        a_search=Search(metric_name='unique_geometries', metric='cardinality'))
225
226
227
228

    group_hash = Quantity(
        type=str,
        description='Hashes that describe unique geometries simulated by this code run.',
229
        a_search=Search(many_or='append', group='groups_grouped', metric_name='groups', metric='cardinality'))
230
231
232
233
234
235

    labels = SubSection(
        sub_section=Label, repeats=True,
        description='The labels taken from AFLOW prototypes and springer.',
        a_search='labels')

236
237
238
    labels_springer_compound_class = Quantity(
        type=str, shape=['0..*'],
        description='Springer compund classification.',
239
        a_search=Search(
240
            many_and='append', statistic_size=10,
241
            statistic_order='_count'))
242
243
244
245

    labels_springer_classification = Quantity(
        type=str, shape=['0..*'],
        description='Springer classification by property.',
246
        a_search=Search(
247
            many_and='append', statistic_size=10,
248
            statistic_order='_count'))
249

250
    optimade = SubSection(
251
        sub_section=OptimadeEntry,
252
253
254
        description='Metadata used for the optimade API.',
        a_search='optimade')

255
    def apply_domain_metadata(self, backend):
256
        from nomad import utils
257
        from nomad.normalizing.system import normalized_atom_labels
258
        entry = self.m_parent
259

260
        logger = utils.get_logger(__name__).bind(
261
            upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
262

263
264
265
266
267
268
269
270
        if backend is None:
            if entry.parser_name is not None:
                from nomad.parsing import parser_dict
                parser = parser_dict.get(entry.parser_name)
                if hasattr(parser, 'code_name'):
                    self.code_name = parser.code_name
            return

271
        # code and code specific ids
272
        self.code_name = backend.get_value('program_name', 0)
273
274
275
276
        try:
            self.code_version = simplify_version(backend.get_value('program_version', 0))
        except KeyError:
            self.code_version = config.services.unavailable_value
277

278
279
280
        raw_id = get_optional_backend_value(backend, 'raw_id', 'section_run', None)
        if raw_id is not None:
            entry.raw_id = raw_id
Markus Scheidgen's avatar
Markus Scheidgen committed
281

282
        # metadata (system, method, chemistry)
283
284
285
286
287
288
289
        atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
        if hasattr(atoms, 'tolist'):
            atoms = atoms.tolist()
        entry.n_atoms = len(atoms)
        atoms = list(set(normalized_atom_labels(set(atoms))))
        atoms.sort()
        entry.atoms = atoms
290
        self.compound_type = compound_types[len(atoms) - 1] if len(atoms) <= 10 else '>decinary'
291

292
293
294
295
296
        self.crystal_system = get_optional_backend_value(
            backend, 'crystal_system', 'section_symmetry', logger=logger)
        self.spacegroup = get_optional_backend_value(
            backend, 'space_group_number', 'section_symmetry', 0, logger=logger)
        self.spacegroup_symbol = get_optional_backend_value(
297
            backend, 'international_short_symbol', 'section_symmetry', logger=logger)
298
        self.basis_set = map_basis_set_to_basis_set_label(
299
300
301
            get_optional_backend_value(backend, 'program_basis_set_type', 'section_run', logger=logger))
        self.system = get_optional_backend_value(
            backend, 'system_type', 'section_system', logger=logger)
302
        entry.formula = get_optional_backend_value(
303
            backend, 'chemical_composition_bulk_reduced', 'section_system', logger=logger)
304
        self.xc_functional = map_functional_name_to_xc_treatment(
305
            get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
306

307
        # grouping
308
        self.group_hash = utils.hash(
309
            entry.formula,
310
311
312
313
314
            self.spacegroup,
            self.basis_set,
            self.xc_functional,
            self.code_name,
            self.code_version,
315
316
            entry.with_embargo,
            entry.uploader)
317

318
        # metrics and quantities
319
        quantities = set()
320
        searchable_quantities = set()
321
        geometries = set()
322

323
324
        n_quantities = 0
        n_calculations = 0
325
326
327
        n_total_energies = 0
        n_geometries = 0

328
329
        for section_run in backend.entry_archive.section_run:
            quantities.add(section_run.m_def.name)
330
            n_quantities += 1
331

332
            for section, property_def, _ in section_run.m_traverse():
333
334
                property_name = property_def.name
                quantities.add(property_name)
335
336
                n_quantities += 1

337
338
                if property_name in _searchable_quantities:
                    searchable_quantities.add(property_name)
339

340
                if property_name == 'energy_total':
341
342
                    n_total_energies += 1

343
344
                if property_name == 'configuration_raw_gid':
                    geometries.add(section.m_get(property_def))
345

346
                if property_name == 'section_single_configuration_calculation':
347
                    n_calculations += 1
348

349
                if property_name == 'section_system':
350
                    n_geometries += 1
351
352
353

        self.quantities = list(quantities)
        self.geometries = list(geometries)
354
        self.searchable_quantities = list(searchable_quantities)
355
356
        self.n_quantities = n_quantities
        self.n_calculations = n_calculations
357
358
359
        self.n_total_energies = n_total_energies
        self.n_geometries = n_geometries

360
361
362
363
364
365
366
367
368
369
370
        # labels
        compounds = set()
        classifications = set()
        for index in backend.get_sections('section_springer_material'):
            compounds.update(backend.get_value('springer_compound_class', index))
            classifications.update(backend.get_value('springer_classification', index))

        for compound in compounds:
            self.labels.append(Label(label=compound, type='compound_class', source='springer'))
        for classification in classifications:
            self.labels.append(Label(label=classification, type='classification', source='springer'))
371
372
        self.labels_springer_compound_class = list(compounds)
        self.labels_springer_classification = list(classifications)
373
374
375
376
377
378
379

        aflow_id = get_optional_backend_value(backend, 'prototype_aflow_id', 'section_prototype')
        aflow_label = get_optional_backend_value(backend, 'prototype_label', 'section_prototype')

        if aflow_id is not None and aflow_label is not None:
            self.labels.append(Label(label=aflow_label, type='prototype', source='aflow_prototype_library'))
            self.labels.append(Label(label=aflow_id, type='prototype_id', source='aflow_prototype_library'))