dft.py 13.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
16
DFT specific metadata
17
'''
18
19
20

import re

21
from nomad import config, utils
22
23
from nomad.metainfo import MSection, Section, Quantity, MEnum, SubSection
from nomad.metainfo.search_extension import Search
24

25
26
from .common import get_optional_backend_value
from .optimade import OptimadeEntry
27
28
29
30
31
32
33
34
35
36
37


xc_treatments = {
    'gga': 'GGA',
    'hf_': 'HF',
    'oep': 'OEP',
    'hyb': 'hybrid',
    'mgg': 'meta-GGA',
    'vdw': 'vdW',
    'lda': 'LDA',
}
38
''' https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional '''
39
40
41
42
43
44
45

basis_sets = {
    'gaussians': 'gaussians',
    'realspacegrid': 'real-space grid',
    'planewaves': 'plane waves'
}

46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
compound_types = [
    'unary',
    'binary',
    'ternary',
    'quaternary',
    'quinary',
    'sexinary',
    'septenary',
    'octanary',
    'nonary',
    'decinary'
]

_energy_quantities = [
    'energy_total',
    'energy_total_T0',
    'energy_free',
    'energy_electrostatic',
    'energy_X',
    'energy_XC',
    'energy_sum_eigenvalues']

_electronic_quantities = [
    'dos_values',
    'eigenvalues_values',
    'volumetric_data_values',
    'electronic_kinetic_energy',
    'total_charge',
Markus Scheidgen's avatar
Markus Scheidgen committed
74
75
    # 'atomic_multipole_values'
]
76
77
78
79

_forces_quantities = [
    'atom_forces_free',
    'atom_forces_raw',
Markus Scheidgen's avatar
Markus Scheidgen committed
80
    # 'atom_forces_T0',
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
    'atom_forces',
    'stress_tensor']

_vibrational_quantities = [
    'thermodynamical_property_heat_capacity_C_v',
    'vibrational_free_energy_at_constant_volume',
    'band_energies']

_magnetic_quantities = [
    'spin_S2'
]

_optical_quantities = [
    'excitation_energies',
    'oscillator_strengths',
    'transition_dipole_moments'
]

99
100
_searchable_quantities = set(_energy_quantities + _electronic_quantities + _forces_quantities + _vibrational_quantities + _magnetic_quantities + _optical_quantities)

101
102
103
104
version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)')


def map_functional_name_to_xc_treatment(name):
Markus Scheidgen's avatar
Markus Scheidgen committed
105
    if name == config.services.unavailable_value:
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
        return name

    return xc_treatments.get(name[:3].lower(), name)


def map_basis_set_to_basis_set_label(name):
    key = name.replace('_', '').replace('-', '').replace(' ', '').lower()
    return basis_sets.get(key, name)


def simplify_version(version):
    match = version_re.search(version)
    if match is None:
        return version
    else:
        return match.group(0)


124
class Label(MSection):
125
    '''
126
127
128
129
130
131
132
    Label that further classify a structure.

    Attributes:
        label: The label as a string
        type: The type of the label
        source: The source that this label was taken from.

133
    '''
134
    label = Quantity(type=str, a_search=Search())
135
136
137

    type = Quantity(type=MEnum(
        'compound_class', 'classification', 'prototype', 'prototype_id'),
138
        a_search=Search())
139
140
141

    source = Quantity(
        type=MEnum('springer', 'aflow_prototype_library'),
142
        a_search=Search())
143
144
145
146
147
148
149
150


class DFTMetadata(MSection):
    m_def = Section(a_domain='dft')

    basis_set = Quantity(
        type=str, default='not processed',
        description='The used basis set functions.',
151
152
153
154
        a_search=Search(statistic_values=[
            '(L)APW+lo', 'FLAPW', 'gaussians', 'numeric AOs', 'plane waves', 'psinc functions',
            'real-space grid', 'unavailable', 'not processed'
        ]))
155
156
157
158

    xc_functional = Quantity(
        type=str, default='not processed',
        description='The libXC based xc functional classification used in the simulation.',
159
        a_search=Search(statistic_values=list(xc_treatments.values()) + ['unavailable', 'not processed']))
160
161
162
163

    system = Quantity(
        type=str, default='not processed',
        description='The system type of the simulated system.',
164
165
166
167
        a_search=Search(statistic_values=[
            '1D', '2D', 'atom', 'bulk', 'molecule / cluster', 'surface',
            'unavailable', 'not processed'
        ]))
168

169
170
171
    compound_type = Quantity(
        type=str, default='not processed',
        description='The compound type of the simulated system.',
172
        a_search=Search(statistic_values=compound_types + ['not processed'])
173
174
    )

175
176
177
    crystal_system = Quantity(
        type=str, default='not processed',
        description='The crystal system type of the simulated system.',
178
179
180
181
182
        a_search=Search(
            statistic_values=[
                'cubic', 'hexagonal', 'monoclinic', 'orthorombic', 'tetragonal',
                'triclinic', 'trigonal', 'unavailable', 'not processed']
        ))
183
184

    spacegroup = Quantity(
185
        type=int, default=-1,
186
        description='The spacegroup of the simulated system as number.',
187
        a_search=Search())
188
189
190
191

    spacegroup_symbol = Quantity(
        type=str, default='not processed',
        description='The spacegroup as international short symbol.',
192
        a_search=Search())
193
194
195
196

    code_name = Quantity(
        type=str, default='not processed',
        description='The name of the used code.',
197
        a_search=Search())  # in import the parser module is added codes here as statistic_values
198
199
200
201

    code_version = Quantity(
        type=str, default='not processed',
        description='The version of the used code.',
202
        a_search=Search())
203
204

    n_geometries = Quantity(
205
        type=int, default=0, description='Number of unique geometries.',
206
        a_search=Search(metric_name='geometries', metric='sum'))
207
208

    n_calculations = Quantity(
209
        type=int, default=0,
210
        description='Number of single configuration calculation sections',
211
        a_search=Search(metric_name='calculations', metric='sum'))
212
213

    n_total_energies = Quantity(
214
        type=int, default=0, description='Number of total energy calculations',
215
        a_search=Search(metric_name='total_energies', metric='sum'))
216
217

    n_quantities = Quantity(
218
        type=int, default=0, description='Number of metainfo quantities parsed from the entry.',
219
        a_search=Search(metric='sum', metric_name='quantities'))
220
221
222
223

    quantities = Quantity(
        type=str, shape=['0..*'],
        description='All quantities that are used by this entry.',
224
        a_search=Search(
225
226
            metric_name='distinct_quantities', metric='cardinality', many_and='append'))

227
    searchable_quantities = Quantity(
228
        type=str, shape=['0..*'],
229
        description='All quantities with existence filters in the search GUI.',
230
        a_search=Search(many_and='append', statistic_size=len(_searchable_quantities)))
231

232
233
234
    geometries = Quantity(
        type=str, shape=['0..*'],
        description='Hashes for each simulated geometry',
235
        a_search=Search(metric_name='unique_geometries', metric='cardinality'))
236
237
238
239

    group_hash = Quantity(
        type=str,
        description='Hashes that describe unique geometries simulated by this code run.',
240
        a_search=Search(many_or='append', group='groups_grouped', metric_name='groups', metric='cardinality'))
241
242
243
244
245
246

    labels = SubSection(
        sub_section=Label, repeats=True,
        description='The labels taken from AFLOW prototypes and springer.',
        a_search='labels')

247
248
249
    labels_springer_compound_class = Quantity(
        type=str, shape=['0..*'],
        description='Springer compund classification.',
250
        a_search=Search(
251
            many_and='append', statistic_size=20,
252
            statistic_order='_count'))
253
254
255
256

    labels_springer_classification = Quantity(
        type=str, shape=['0..*'],
        description='Springer classification by property.',
257
        a_search=Search(
Markus Scheidgen's avatar
Markus Scheidgen committed
258
            many_and='append', statistic_size=20,
259
            statistic_order='_count'))
260

261
    optimade = SubSection(
262
        sub_section=OptimadeEntry,
263
264
265
        description='Metadata used for the optimade API.',
        a_search='optimade')

Markus Scheidgen's avatar
Markus Scheidgen committed
266
267
268
269
270
271
272
273
274
    def code_name_from_parser(self):
        entry = self.m_parent
        if entry.parser_name is not None:
            from nomad.parsing import parser_dict
            parser = parser_dict.get(entry.parser_name)
            if hasattr(parser, 'code_name'):
                return parser.code_name
        return config.services.unavailable_value

275
    def update_group_hash(self):
Markus Scheidgen's avatar
Markus Scheidgen committed
276
277
278
279
        user_id = None
        uploader = self.m_parent.uploader
        if uploader is not None:
            user_id = uploader.user_id
280
281
282
283
284
285
286
287
        self.group_hash = utils.hash(
            self.m_parent.formula,
            self.spacegroup,
            self.basis_set,
            self.xc_functional,
            self.code_name,
            self.code_version,
            self.m_parent.with_embargo,
Markus Scheidgen's avatar
Markus Scheidgen committed
288
            user_id)
289

290
    def apply_domain_metadata(self, backend):
291
        from nomad.normalizing.system import normalized_atom_labels
292
        entry = self.m_parent
293

294
        logger = utils.get_logger(__name__).bind(
295
            upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
296

297
        if backend is None:
Markus Scheidgen's avatar
Markus Scheidgen committed
298
            self.code_name = self.code_name_from_parser()
299
300
            return

301
        # code and code specific ids
Markus Scheidgen's avatar
Markus Scheidgen committed
302
303
304
305
306
307
        try:
            self.code_name = backend.get_value('program_name', 0)
        except KeyError as e:
            logger.warn('backend after parsing without program_name', exc_info=e)
            self.code_name = self.code_name_from_parser()

308
309
310
311
        try:
            self.code_version = simplify_version(backend.get_value('program_version', 0))
        except KeyError:
            self.code_version = config.services.unavailable_value
312

313
314
315
        raw_id = get_optional_backend_value(backend, 'raw_id', 'section_run', None)
        if raw_id is not None:
            entry.raw_id = raw_id
Markus Scheidgen's avatar
Markus Scheidgen committed
316

317
        # metadata (system, method, chemistry)
318
319
320
321
322
323
324
        atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
        if hasattr(atoms, 'tolist'):
            atoms = atoms.tolist()
        entry.n_atoms = len(atoms)
        atoms = list(set(normalized_atom_labels(set(atoms))))
        atoms.sort()
        entry.atoms = atoms
325
        self.compound_type = compound_types[len(atoms) - 1] if len(atoms) <= 10 else '>decinary'
326

327
328
329
330
331
        self.crystal_system = get_optional_backend_value(
            backend, 'crystal_system', 'section_symmetry', logger=logger)
        self.spacegroup = get_optional_backend_value(
            backend, 'space_group_number', 'section_symmetry', 0, logger=logger)
        self.spacegroup_symbol = get_optional_backend_value(
332
            backend, 'international_short_symbol', 'section_symmetry', logger=logger)
333
        self.basis_set = map_basis_set_to_basis_set_label(
334
335
336
            get_optional_backend_value(backend, 'program_basis_set_type', 'section_run', logger=logger))
        self.system = get_optional_backend_value(
            backend, 'system_type', 'section_system', logger=logger)
337
        entry.formula = get_optional_backend_value(
338
            backend, 'chemical_composition_bulk_reduced', 'section_system', logger=logger)
339
        self.xc_functional = map_functional_name_to_xc_treatment(
340
            get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
341

342
        # grouping
343
        self.update_group_hash()
344

345
        # metrics and quantities
346
        quantities = set()
347
        searchable_quantities = set()
348
        geometries = set()
349

350
351
        n_quantities = 0
        n_calculations = 0
352
353
354
        n_total_energies = 0
        n_geometries = 0

355
356
        for section_run in backend.entry_archive.section_run:
            quantities.add(section_run.m_def.name)
357
            n_quantities += 1
358

359
            for section, property_def, _ in section_run.m_traverse():
360
361
                property_name = property_def.name
                quantities.add(property_name)
362
363
                n_quantities += 1

364
365
                if property_name in _searchable_quantities:
                    searchable_quantities.add(property_name)
366

367
                if property_name == 'energy_total':
368
369
                    n_total_energies += 1

370
371
                if property_name == 'configuration_raw_gid':
                    geometries.add(section.m_get(property_def))
372

373
                if property_name == 'section_single_configuration_calculation':
374
                    n_calculations += 1
375

376
                if property_name == 'section_system':
377
                    n_geometries += 1
378
379
380

        self.quantities = list(quantities)
        self.geometries = list(geometries)
381
        self.searchable_quantities = list(searchable_quantities)
382
383
        self.n_quantities = n_quantities
        self.n_calculations = n_calculations
384
385
386
        self.n_total_energies = n_total_energies
        self.n_geometries = n_geometries

387
388
389
390
391
392
393
394
395
396
397
        # labels
        compounds = set()
        classifications = set()
        for index in backend.get_sections('section_springer_material'):
            compounds.update(backend.get_value('springer_compound_class', index))
            classifications.update(backend.get_value('springer_classification', index))

        for compound in compounds:
            self.labels.append(Label(label=compound, type='compound_class', source='springer'))
        for classification in classifications:
            self.labels.append(Label(label=classification, type='classification', source='springer'))
398
399
        self.labels_springer_compound_class = list(compounds)
        self.labels_springer_classification = list(classifications)
400
401
402
403
404
405
406

        aflow_id = get_optional_backend_value(backend, 'prototype_aflow_id', 'section_prototype')
        aflow_label = get_optional_backend_value(backend, 'prototype_label', 'section_prototype')

        if aflow_id is not None and aflow_label is not None:
            self.labels.append(Label(label=aflow_label, type='prototype', source='aflow_prototype_library'))
            self.labels.append(Label(label=aflow_id, type='prototype_id', source='aflow_prototype_library'))