dft.py 13.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
16
DFT specific metadata
17
'''
18
19
20

import re

21
from nomad import config, utils
22
23
from nomad.metainfo import MSection, Section, Quantity, MEnum, SubSection
from nomad.metainfo.search_extension import Search
24

25
26
from .common import get_optional_backend_value
from .optimade import OptimadeEntry
27
28
29
30
31
32
33
34
35
36
37


xc_treatments = {
    'gga': 'GGA',
    'hf_': 'HF',
    'oep': 'OEP',
    'hyb': 'hybrid',
    'mgg': 'meta-GGA',
    'vdw': 'vdW',
    'lda': 'LDA',
}
38
''' https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional '''
39
40
41
42
43
44
45

basis_sets = {
    'gaussians': 'gaussians',
    'realspacegrid': 'real-space grid',
    'planewaves': 'plane waves'
}

46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
compound_types = [
    'unary',
    'binary',
    'ternary',
    'quaternary',
    'quinary',
    'sexinary',
    'septenary',
    'octanary',
    'nonary',
    'decinary'
]

_energy_quantities = [
    'energy_total',
    'energy_total_T0',
    'energy_free',
    'energy_electrostatic',
    'energy_X',
    'energy_XC',
    'energy_sum_eigenvalues']

_electronic_quantities = [
    'dos_values',
    'eigenvalues_values',
    'volumetric_data_values',
    'electronic_kinetic_energy',
    'total_charge',
Markus Scheidgen's avatar
Markus Scheidgen committed
74
75
    # 'atomic_multipole_values'
]
76
77
78
79

_forces_quantities = [
    'atom_forces_free',
    'atom_forces_raw',
Markus Scheidgen's avatar
Markus Scheidgen committed
80
    # 'atom_forces_T0',
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
    'atom_forces',
    'stress_tensor']

_vibrational_quantities = [
    'thermodynamical_property_heat_capacity_C_v',
    'vibrational_free_energy_at_constant_volume',
    'band_energies']

_magnetic_quantities = [
    'spin_S2'
]

_optical_quantities = [
    'excitation_energies',
    'oscillator_strengths',
    'transition_dipole_moments'
]

99
100
_searchable_quantities = set(_energy_quantities + _electronic_quantities + _forces_quantities + _vibrational_quantities + _magnetic_quantities + _optical_quantities)

101
102
103
104
version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)')


def map_functional_name_to_xc_treatment(name):
Markus Scheidgen's avatar
Markus Scheidgen committed
105
    if name == config.services.unavailable_value:
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
        return name

    return xc_treatments.get(name[:3].lower(), name)


def map_basis_set_to_basis_set_label(name):
    key = name.replace('_', '').replace('-', '').replace(' ', '').lower()
    return basis_sets.get(key, name)


def simplify_version(version):
    match = version_re.search(version)
    if match is None:
        return version
    else:
        return match.group(0)


124
class Label(MSection):
125
    '''
126
127
128
129
130
131
132
    Label that further classify a structure.

    Attributes:
        label: The label as a string
        type: The type of the label
        source: The source that this label was taken from.

133
    '''
134
    label = Quantity(type=str, a_search=Search())
135
136
137

    type = Quantity(type=MEnum(
        'compound_class', 'classification', 'prototype', 'prototype_id'),
138
        a_search=Search())
139
140
141

    source = Quantity(
        type=MEnum('springer', 'aflow_prototype_library'),
142
        a_search=Search())
143
144
145
146
147
148
149
150


class DFTMetadata(MSection):
    m_def = Section(a_domain='dft')

    basis_set = Quantity(
        type=str, default='not processed',
        description='The used basis set functions.',
151
152
153
154
        a_search=Search(statistic_values=[
            '(L)APW+lo', 'FLAPW', 'gaussians', 'numeric AOs', 'plane waves', 'psinc functions',
            'real-space grid', 'unavailable', 'not processed'
        ]))
155
156
157
158

    xc_functional = Quantity(
        type=str, default='not processed',
        description='The libXC based xc functional classification used in the simulation.',
159
        a_search=Search(statistic_values=list(xc_treatments.values()) + ['unavailable', 'not processed']))
160
161
162
163

    system = Quantity(
        type=str, default='not processed',
        description='The system type of the simulated system.',
164
165
166
167
        a_search=Search(statistic_values=[
            '1D', '2D', 'atom', 'bulk', 'molecule / cluster', 'surface',
            'unavailable', 'not processed'
        ]))
168

169
170
171
    compound_type = Quantity(
        type=str, default='not processed',
        description='The compound type of the simulated system.',
172
        a_search=Search(statistic_values=compound_types + ['not processed'])
173
174
    )

175
176
177
    crystal_system = Quantity(
        type=str, default='not processed',
        description='The crystal system type of the simulated system.',
178
179
180
181
182
        a_search=Search(
            statistic_values=[
                'cubic', 'hexagonal', 'monoclinic', 'orthorombic', 'tetragonal',
                'triclinic', 'trigonal', 'unavailable', 'not processed']
        ))
183
184

    spacegroup = Quantity(
185
        type=int, default=-1,
186
        description='The spacegroup of the simulated system as number.',
187
        a_search=Search())
188
189
190
191

    spacegroup_symbol = Quantity(
        type=str, default='not processed',
        description='The spacegroup as international short symbol.',
192
        a_search=Search())
193
194
195
196

    code_name = Quantity(
        type=str, default='not processed',
        description='The name of the used code.',
197
        a_search=Search())  # in import the parser module is added codes here as statistic_values
198
199
200
201

    code_version = Quantity(
        type=str, default='not processed',
        description='The version of the used code.',
202
        a_search=Search())
203
204

    n_geometries = Quantity(
205
        type=int, default=0, description='Number of unique geometries.',
206
        a_search=Search(metric_name='geometries', metric='sum'))
207
208

    n_calculations = Quantity(
209
        type=int, default=0,
210
        description='Number of single configuration calculation sections',
211
        a_search=Search(metric_name='calculations', metric='sum'))
212
213

    n_total_energies = Quantity(
214
        type=int, default=0, description='Number of total energy calculations',
215
        a_search=Search(metric_name='total_energies', metric='sum'))
216
217

    n_quantities = Quantity(
218
        type=int, default=0, description='Number of metainfo quantities parsed from the entry.',
219
        a_search=Search(metric='sum', metric_name='quantities'))
220
221
222
223

    quantities = Quantity(
        type=str, shape=['0..*'],
        description='All quantities that are used by this entry.',
224
        a_search=Search(
225
226
            metric_name='distinct_quantities', metric='cardinality', many_and='append'))

227
    searchable_quantities = Quantity(
228
        type=str, shape=['0..*'],
229
        description='All quantities with existence filters in the search GUI.',
230
        a_search=Search(many_and='append', statistic_size=len(_searchable_quantities)))
231

232
233
234
    geometries = Quantity(
        type=str, shape=['0..*'],
        description='Hashes for each simulated geometry',
235
        a_search=Search(metric_name='unique_geometries', metric='cardinality'))
236
237
238
239

    group_hash = Quantity(
        type=str,
        description='Hashes that describe unique geometries simulated by this code run.',
240
        a_search=Search(many_or='append', group='groups_grouped', metric_name='groups', metric='cardinality'))
241
242
243
244
245
246

    labels = SubSection(
        sub_section=Label, repeats=True,
        description='The labels taken from AFLOW prototypes and springer.',
        a_search='labels')

247
248
249
    labels_springer_compound_class = Quantity(
        type=str, shape=['0..*'],
        description='Springer compund classification.',
250
        a_search=Search(
251
            many_and='append', statistic_size=20,
252
            statistic_order='_count'))
253
254
255
256

    labels_springer_classification = Quantity(
        type=str, shape=['0..*'],
        description='Springer classification by property.',
257
        a_search=Search(
Markus Scheidgen's avatar
Markus Scheidgen committed
258
            many_and='append', statistic_size=20,
259
            statistic_order='_count'))
260

261
    optimade = SubSection(
262
        sub_section=OptimadeEntry,
263
264
265
        description='Metadata used for the optimade API.',
        a_search='optimade')

Markus Scheidgen's avatar
Markus Scheidgen committed
266
267
268
269
270
271
272
273
274
    def code_name_from_parser(self):
        entry = self.m_parent
        if entry.parser_name is not None:
            from nomad.parsing import parser_dict
            parser = parser_dict.get(entry.parser_name)
            if hasattr(parser, 'code_name'):
                return parser.code_name
        return config.services.unavailable_value

275
276
277
278
279
280
281
282
283
    def update_group_hash(self):
        self.group_hash = utils.hash(
            self.m_parent.formula,
            self.spacegroup,
            self.basis_set,
            self.xc_functional,
            self.code_name,
            self.code_version,
            self.m_parent.with_embargo,
Markus Scheidgen's avatar
Markus Scheidgen committed
284
            self.m_parent.uploader.user_id)
285

286
    def apply_domain_metadata(self, backend):
287
        from nomad.normalizing.system import normalized_atom_labels
288
        entry = self.m_parent
289

290
        logger = utils.get_logger(__name__).bind(
291
            upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
292

293
        if backend is None:
Markus Scheidgen's avatar
Markus Scheidgen committed
294
            self.code_name = self.code_name_from_parser()
295
296
            return

297
        # code and code specific ids
Markus Scheidgen's avatar
Markus Scheidgen committed
298
299
300
301
302
303
        try:
            self.code_name = backend.get_value('program_name', 0)
        except KeyError as e:
            logger.warn('backend after parsing without program_name', exc_info=e)
            self.code_name = self.code_name_from_parser()

304
305
306
307
        try:
            self.code_version = simplify_version(backend.get_value('program_version', 0))
        except KeyError:
            self.code_version = config.services.unavailable_value
308

309
310
311
        raw_id = get_optional_backend_value(backend, 'raw_id', 'section_run', None)
        if raw_id is not None:
            entry.raw_id = raw_id
Markus Scheidgen's avatar
Markus Scheidgen committed
312

313
        # metadata (system, method, chemistry)
314
315
316
317
318
319
320
        atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
        if hasattr(atoms, 'tolist'):
            atoms = atoms.tolist()
        entry.n_atoms = len(atoms)
        atoms = list(set(normalized_atom_labels(set(atoms))))
        atoms.sort()
        entry.atoms = atoms
321
        self.compound_type = compound_types[len(atoms) - 1] if len(atoms) <= 10 else '>decinary'
322

323
324
325
326
327
        self.crystal_system = get_optional_backend_value(
            backend, 'crystal_system', 'section_symmetry', logger=logger)
        self.spacegroup = get_optional_backend_value(
            backend, 'space_group_number', 'section_symmetry', 0, logger=logger)
        self.spacegroup_symbol = get_optional_backend_value(
328
            backend, 'international_short_symbol', 'section_symmetry', logger=logger)
329
        self.basis_set = map_basis_set_to_basis_set_label(
330
331
332
            get_optional_backend_value(backend, 'program_basis_set_type', 'section_run', logger=logger))
        self.system = get_optional_backend_value(
            backend, 'system_type', 'section_system', logger=logger)
333
        entry.formula = get_optional_backend_value(
334
            backend, 'chemical_composition_bulk_reduced', 'section_system', logger=logger)
335
        self.xc_functional = map_functional_name_to_xc_treatment(
336
            get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
337

338
        # grouping
339
        self.update_group_hash()
340

341
        # metrics and quantities
342
        quantities = set()
343
        searchable_quantities = set()
344
        geometries = set()
345

346
347
        n_quantities = 0
        n_calculations = 0
348
349
350
        n_total_energies = 0
        n_geometries = 0

351
352
        for section_run in backend.entry_archive.section_run:
            quantities.add(section_run.m_def.name)
353
            n_quantities += 1
354

355
            for section, property_def, _ in section_run.m_traverse():
356
357
                property_name = property_def.name
                quantities.add(property_name)
358
359
                n_quantities += 1

360
361
                if property_name in _searchable_quantities:
                    searchable_quantities.add(property_name)
362

363
                if property_name == 'energy_total':
364
365
                    n_total_energies += 1

366
367
                if property_name == 'configuration_raw_gid':
                    geometries.add(section.m_get(property_def))
368

369
                if property_name == 'section_single_configuration_calculation':
370
                    n_calculations += 1
371

372
                if property_name == 'section_system':
373
                    n_geometries += 1
374
375
376

        self.quantities = list(quantities)
        self.geometries = list(geometries)
377
        self.searchable_quantities = list(searchable_quantities)
378
379
        self.n_quantities = n_quantities
        self.n_calculations = n_calculations
380
381
382
        self.n_total_energies = n_total_energies
        self.n_geometries = n_geometries

383
384
385
386
387
388
389
390
391
392
393
        # labels
        compounds = set()
        classifications = set()
        for index in backend.get_sections('section_springer_material'):
            compounds.update(backend.get_value('springer_compound_class', index))
            classifications.update(backend.get_value('springer_classification', index))

        for compound in compounds:
            self.labels.append(Label(label=compound, type='compound_class', source='springer'))
        for classification in classifications:
            self.labels.append(Label(label=classification, type='classification', source='springer'))
394
395
        self.labels_springer_compound_class = list(compounds)
        self.labels_springer_classification = list(classifications)
396
397
398
399
400
401
402

        aflow_id = get_optional_backend_value(backend, 'prototype_aflow_id', 'section_prototype')
        aflow_label = get_optional_backend_value(backend, 'prototype_label', 'section_prototype')

        if aflow_id is not None and aflow_label is not None:
            self.labels.append(Label(label=aflow_label, type='prototype', source='aflow_prototype_library'))
            self.labels.append(Label(label=aflow_id, type='prototype_id', source='aflow_prototype_library'))