dft.py 15.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
16
DFT specific metadata
17
'''
18
19
20

import re

21
from nomad import config, utils
22
23
from nomad.metainfo import MSection, Section, Quantity, MEnum, SubSection
from nomad.metainfo.search_extension import Search
24

25
from .optimade import OptimadeEntry
26
from .metainfo.public import Workflow, fast_access
27
from .metainfo.public import section_XC_functionals
28
29
30
31
32
33
34
35
36
37
38


xc_treatments = {
    'gga': 'GGA',
    'hf_': 'HF',
    'oep': 'OEP',
    'hyb': 'hybrid',
    'mgg': 'meta-GGA',
    'vdw': 'vdW',
    'lda': 'LDA',
}
39
''' https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional '''
40
41
42
43
44
45
46

basis_sets = {
    'gaussians': 'gaussians',
    'realspacegrid': 'real-space grid',
    'planewaves': 'plane waves'
}

47
48
49
50
51
52
53
54
55
56
57
58
59
60
compound_types = [
    'unary',
    'binary',
    'ternary',
    'quaternary',
    'quinary',
    'sexinary',
    'septenary',
    'octanary',
    'nonary',
    'decinary'
]

_electronic_quantities = [
61
62
    'electronic_band_structure',
    'electronic_dos',
63
    'eigenvalues_values',
Markus Scheidgen's avatar
Markus Scheidgen committed
64
]
65

66
67
68
_mechanical_quantities = [
    'stress_tensor'
]
69

70
_thermal_quantities = [
71
72
    'thermodynamical_property_heat_capacity_C_v',
    'vibrational_free_energy_at_constant_volume',
73
74
75
    'phonon_band_structure',
    'phonon_dos',
]
76
77
78
79
80
81
82
83
84
85

_magnetic_quantities = [
    'spin_S2'
]

_optical_quantities = [
    'oscillator_strengths',
    'transition_dipole_moments'
]

86
_searchable_quantities = set(_electronic_quantities + _mechanical_quantities + _thermal_quantities + _magnetic_quantities + _optical_quantities)
87

88
89
90
91
version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)')


def map_functional_name_to_xc_treatment(name):
Markus Scheidgen's avatar
Markus Scheidgen committed
92
    if name == config.services.unavailable_value:
93
94
        return name

95
    return xc_treatments.get(name[:3].lower(), config.services.unavailable_value)
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110


def map_basis_set_to_basis_set_label(name):
    key = name.replace('_', '').replace('-', '').replace(' ', '').lower()
    return basis_sets.get(key, name)


def simplify_version(version):
    match = version_re.search(version)
    if match is None:
        return version
    else:
        return match.group(0)


111
class Label(MSection):
112
    '''
113
114
115
116
117
118
119
    Label that further classify a structure.

    Attributes:
        label: The label as a string
        type: The type of the label
        source: The source that this label was taken from.

120
    '''
121
    label = Quantity(type=str, a_search=Search())
122
123
124

    type = Quantity(type=MEnum(
        'compound_class', 'classification', 'prototype', 'prototype_id'),
125
        a_search=Search())
126
127
128

    source = Quantity(
        type=MEnum('springer', 'aflow_prototype_library'),
129
        a_search=Search())
130
131
132
133
134
135
136
137


class DFTMetadata(MSection):
    m_def = Section(a_domain='dft')

    basis_set = Quantity(
        type=str, default='not processed',
        description='The used basis set functions.',
138
        a_search=Search(statistic_values=[
139
            '(L)APW+lo', 'gaussians', 'numeric AOs', 'plane waves', 'psinc functions',
140
141
            'real-space grid', 'unavailable', 'not processed'
        ]))
142
143
144
145

    xc_functional = Quantity(
        type=str, default='not processed',
        description='The libXC based xc functional classification used in the simulation.',
146
147
148
        a_search=Search(
            statistic_values=list(xc_treatments.values()) + ['unavailable', 'not processed'],
            statistic_size=100))
149

150
151
152
    xc_functional_names = Quantity(
        type=str, default=[], shape=['*'],
        description='The list of libXC functional names that where used in this entry.',
153
        a_search=Search(many_and='append'))
154

155
156
157
    system = Quantity(
        type=str, default='not processed',
        description='The system type of the simulated system.',
158
159
160
161
        a_search=Search(statistic_values=[
            '1D', '2D', 'atom', 'bulk', 'molecule / cluster', 'surface',
            'unavailable', 'not processed'
        ]))
162

163
164
165
    compound_type = Quantity(
        type=str, default='not processed',
        description='The compound type of the simulated system.',
166
        a_search=Search(statistic_values=compound_types + ['not processed'])
167
168
    )

169
170
171
    crystal_system = Quantity(
        type=str, default='not processed',
        description='The crystal system type of the simulated system.',
172
173
        a_search=Search(
            statistic_values=[
174
                'cubic', 'hexagonal', 'monoclinic', 'orthorhombic', 'tetragonal',
175
176
                'triclinic', 'trigonal', 'unavailable', 'not processed']
        ))
177
178

    spacegroup = Quantity(
179
        type=int, default=-1,
180
        description='The spacegroup of the simulated system as number.',
181
        a_search=Search())
182
183
184
185

    spacegroup_symbol = Quantity(
        type=str, default='not processed',
        description='The spacegroup as international short symbol.',
186
        a_search=Search())
187
188
189
190

    code_name = Quantity(
        type=str, default='not processed',
        description='The name of the used code.',
191
        a_search=Search())  # in import the parser module is added codes here as statistic_values
192
193
194
195

    code_version = Quantity(
        type=str, default='not processed',
        description='The version of the used code.',
196
        a_search=Search())
197
198

    n_geometries = Quantity(
199
        type=int, default=0, description='Number of unique geometries.',
200
        a_search=Search(metric_name='geometries', metric='sum'))
201
202

    n_calculations = Quantity(
203
        type=int, default=0,
204
        description='Number of single configuration calculation sections',
205
        a_search=Search(metric_name='calculations', metric='sum'))
206
207

    n_total_energies = Quantity(
208
        type=int, default=0, description='Number of total energy calculations',
209
        a_search=Search(metric_name='total_energies', metric='sum'))
210
211

    n_quantities = Quantity(
212
        type=int, default=0, description='Number of metainfo quantities parsed from the entry.',
213
        a_search=Search(metric='sum', metric_name='quantities'))
214
215
216
217

    quantities = Quantity(
        type=str, shape=['0..*'],
        description='All quantities that are used by this entry.',
218
        a_search=Search(
219
220
            metric_name='distinct_quantities', metric='cardinality', many_and='append'))

221
    searchable_quantities = Quantity(
222
        type=str, shape=['0..*'],
223
        description='All quantities with existence filters in the search GUI.',
224
        a_search=Search(many_and='append', statistic_size=len(_searchable_quantities) + 15))  # Temporarily increased the statistics size while migrating from old set to new one.
225

226
227
228
    geometries = Quantity(
        type=str, shape=['0..*'],
        description='Hashes for each simulated geometry',
229
        a_search=Search(metric_name='unique_geometries', metric='cardinality'))
230
231
232
233

    group_hash = Quantity(
        type=str,
        description='Hashes that describe unique geometries simulated by this code run.',
234
        a_search=Search(many_or='append', group='groups_grouped', metric_name='groups', metric='cardinality'))
235
236

    labels = SubSection(
237
        sub_section=Label, repeats=True, categories=[fast_access],
238
239
240
        description='The labels taken from AFLOW prototypes and springer.',
        a_search='labels')

241
242
243
    labels_springer_compound_class = Quantity(
        type=str, shape=['0..*'],
        description='Springer compund classification.',
244
        a_search=Search(
245
            many_and='append', statistic_size=20,
246
            statistic_order='_count'))
247
248
249
250

    labels_springer_classification = Quantity(
        type=str, shape=['0..*'],
        description='Springer classification by property.',
251
        a_search=Search(
Markus Scheidgen's avatar
Markus Scheidgen committed
252
            many_and='append', statistic_size=20,
253
            statistic_order='_count'))
254

255
    optimade = SubSection(
256
        sub_section=OptimadeEntry,
257
258
259
        description='Metadata used for the optimade API.',
        a_search='optimade')

260
261
    workflow = Quantity(type=Workflow, a_search=Search())

Markus Scheidgen's avatar
Markus Scheidgen committed
262
263
264
    def code_name_from_parser(self):
        entry = self.m_parent
        if entry.parser_name is not None:
Markus Scheidgen's avatar
Markus Scheidgen committed
265
            from nomad.parsing.parsers import parser_dict
Markus Scheidgen's avatar
Markus Scheidgen committed
266
267
268
269
270
            parser = parser_dict.get(entry.parser_name)
            if hasattr(parser, 'code_name'):
                return parser.code_name
        return config.services.unavailable_value

271
    def update_group_hash(self):
Markus Scheidgen's avatar
Markus Scheidgen committed
272
273
274
275
        user_id = None
        uploader = self.m_parent.uploader
        if uploader is not None:
            user_id = uploader.user_id
276
277
278
279
280
281
282
283
        self.group_hash = utils.hash(
            self.m_parent.formula,
            self.spacegroup,
            self.basis_set,
            self.xc_functional,
            self.code_name,
            self.code_version,
            self.m_parent.with_embargo,
Markus Scheidgen's avatar
Markus Scheidgen committed
284
            user_id)
285

Markus Scheidgen's avatar
Markus Scheidgen committed
286
    def apply_domain_metadata(self, entry_archive):
287
        from nomad.normalizing.system import normalized_atom_labels
288
        entry = self.m_parent
289

290
        logger = utils.get_logger(__name__).bind(
291
            upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
292

293
294
        self.code_name = self.code_name_from_parser()

Markus Scheidgen's avatar
Markus Scheidgen committed
295
        if entry_archive is None:
296
297
            return

298
299
300
301
302
303
        section_run = entry_archive.section_run
        if not section_run:
            logger.warn('no section_run found')
            return
        section_run = section_run[0]

304
305
306
307
308
309
310
311
        # default values
        self.system = config.services.unavailable_value
        self.crystal_system = config.services.unavailable_value
        self.spacegroup_symbol = config.services.unavailable_value
        self.basis_set = config.services.unavailable_value
        self.xc_functional = config.services.unavailable_value

        section_system = None
312
313
314
315
316
        for section in section_run.section_system:
            if section.is_representative:
                section_system = section
                break

317
        # code and code specific ids
Markus Scheidgen's avatar
Markus Scheidgen committed
318
        try:
319
320
321
322
323
            code_name = section_run.program_name
            if code_name:
                self.code_name = code_name
            else:
                raise KeyError
Markus Scheidgen's avatar
Markus Scheidgen committed
324
        except KeyError as e:
Markus Scheidgen's avatar
Markus Scheidgen committed
325
            logger.warn('archive without program_name', exc_info=e)
Markus Scheidgen's avatar
Markus Scheidgen committed
326

327
        try:
328
329
330
331
332
            version = section_run.program_version
            if version:
                self.code_version = simplify_version(version)
            else:
                raise KeyError
333
334
        except KeyError:
            self.code_version = config.services.unavailable_value
335

336
337
338
339
        def get_value(value):
            return value if value else config.services.unavailable_value

        raw_id = section_run.raw_id
340
341
        if raw_id is not None:
            entry.raw_id = raw_id
Markus Scheidgen's avatar
Markus Scheidgen committed
342

343
        # metadata (system, method, chemistry)
344
345
        atom_labels = section_system.atom_labels if section_system else []
        atoms = atom_labels if atom_labels else []
346
347
348
349
        entry.n_atoms = len(atoms)
        atoms = list(set(normalized_atom_labels(set(atoms))))
        atoms.sort()
        entry.atoms = atoms
350
        self.compound_type = compound_types[len(atoms) - 1] if len(atoms) <= 10 else '>decinary'
351

352
353
354
355
356
357
358
        self.system = config.services.unavailable_value
        self.crystal_system = config.services.unavailable_value
        self.spacegroup_symbol = config.services.unavailable_value

        section_symmetry = None
        if section_system and len(section_system.section_symmetry) > 0:
            section_symmetry = section_system.section_symmetry[0]
359
360
361
362
363
364
365
366
367
368
369
370
371
            self.crystal_system = get_value(section_symmetry.crystal_system)
            spacegroup = section_symmetry.space_group_number
            self.spacegroup = 0 if not spacegroup else int(spacegroup)
            self.spacegroup_symbol = get_value(section_symmetry.international_short_symbol)

        program_basis_set_type = section_run.program_basis_set_type
        if program_basis_set_type:
            self.basis_set = map_basis_set_to_basis_set_label(program_basis_set_type)

        if section_system:
            self.system = get_value(section_system.system_type)
            entry.formula = get_value(section_system.chemical_composition_bulk_reduced)

372
        # metrics and quantities
373
        quantities = set()
374
        searchable_quantities = set()
375
        geometries = set()
376
377
        xc_functionals = set()
        xc_functional = None
378

379
380
        n_quantities = 0
        n_calculations = 0
381
382
383
        n_total_energies = 0
        n_geometries = 0

384
385
386
        for section, property_def, _ in entry_archive.m_traverse():
            property_name = property_def.name
            quantities.add(property_name)
387
            n_quantities += 1
388

389
390
            if property_name in _searchable_quantities:
                searchable_quantities.add(property_name)
391

392
393
394
395
396
            if property_def == section_XC_functionals.XC_functional_name:
                xc_functional = getattr(section, property_name)
                if xc_functional:
                    xc_functionals.add(xc_functional)

397
398
            if property_name == 'energy_total':
                n_total_energies += 1
399

400
401
            if property_name == 'configuration_raw_gid':
                geometries.add(section.m_get(property_def))
402

403
404
            if property_name == 'section_single_configuration_calculation':
                n_calculations += 1
405

406
407
            if property_name == 'section_system':
                n_geometries += 1
408

409
410
        self.xc_functional_names = sorted(xc_functionals)
        if len(self.xc_functional_names) > 0:
411
412
            self.xc_functional = map_functional_name_to_xc_treatment(
                get_value(self.xc_functional_names[0]))
413
414
        else:
            self.xc_functional = config.services.unavailable_value
415

416
417
        self.quantities = list(quantities)
        self.geometries = list(geometries)
418
        self.searchable_quantities = list(searchable_quantities)
419
420
        self.n_quantities = n_quantities
        self.n_calculations = n_calculations
421
422
423
        self.n_total_energies = n_total_energies
        self.n_geometries = n_geometries

424
425
426
        # grouping
        self.update_group_hash()

427
428
429
        # labels
        compounds = set()
        classifications = set()
430
431
432
433
        if section_system:
            for section in section_system.section_springer_material:
                compounds.update(section.springer_compound_class)
                classifications.update(section.springer_classification)
434
435
436
437
438

        for compound in compounds:
            self.labels.append(Label(label=compound, type='compound_class', source='springer'))
        for classification in classifications:
            self.labels.append(Label(label=classification, type='classification', source='springer'))
439
440
        self.labels_springer_compound_class = list(compounds)
        self.labels_springer_classification = list(classifications)
441

442
443
444
445
446
        aflow_id, aflow_label = None, None
        section_prototype = section_system.section_prototype if section_system else []
        if section_prototype:
            aflow_id = get_value(section_prototype[0].prototype_aflow_id)
            aflow_label = get_value(section_prototype[0].prototype_label)
447
448
449
450

        if aflow_id is not None and aflow_label is not None:
            self.labels.append(Label(label=aflow_label, type='prototype', source='aflow_prototype_library'))
            self.labels.append(Label(label=aflow_id, type='prototype_id', source='aflow_prototype_library'))
451

452
453
        if entry_archive.section_workflow:
            self.workflow = entry_archive.section_workflow