dft.py 10 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
16
DFT specific metadata
17
'''
18
19
20
21

import re

from nomad import utils, config
22
23
from nomad.metainfo import MSection, Section, Quantity, MEnum, SubSection
from nomad.metainfo.search_extension import Search
24

25
26
from .common import get_optional_backend_value
from .optimade import OptimadeEntry
27
28
29
30
31
32
33
34
35
36
37


xc_treatments = {
    'gga': 'GGA',
    'hf_': 'HF',
    'oep': 'OEP',
    'hyb': 'hybrid',
    'mgg': 'meta-GGA',
    'vdw': 'vdW',
    'lda': 'LDA',
}
38
''' https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional '''
39
40
41
42
43
44
45
46
47
48
49

basis_sets = {
    'gaussians': 'gaussians',
    'realspacegrid': 'real-space grid',
    'planewaves': 'plane waves'
}

version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)')


def map_functional_name_to_xc_treatment(name):
Markus Scheidgen's avatar
Markus Scheidgen committed
50
    if name == config.services.unavailable_value:
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
        return name

    return xc_treatments.get(name[:3].lower(), name)


def map_basis_set_to_basis_set_label(name):
    key = name.replace('_', '').replace('-', '').replace(' ', '').lower()
    return basis_sets.get(key, name)


def simplify_version(version):
    match = version_re.search(version)
    if match is None:
        return version
    else:
        return match.group(0)


69
class Label(MSection):
70
    '''
71
72
73
74
75
76
77
    Label that further classify a structure.

    Attributes:
        label: The label as a string
        type: The type of the label
        source: The source that this label was taken from.

78
    '''
79
    label = Quantity(type=str, a_search=Search())
80
81
82

    type = Quantity(type=MEnum(
        'compound_class', 'classification', 'prototype', 'prototype_id'),
83
        a_search=Search())
84
85
86

    source = Quantity(
        type=MEnum('springer', 'aflow_prototype_library'),
87
        a_search=Search())
88
89
90
91
92
93
94
95


class DFTMetadata(MSection):
    m_def = Section(a_domain='dft')

    basis_set = Quantity(
        type=str, default='not processed',
        description='The used basis set functions.',
96
        a_search=Search(statistic_size=20, default_statistic=True))
97
98
99
100

    xc_functional = Quantity(
        type=str, default='not processed',
        description='The libXC based xc functional classification used in the simulation.',
101
        a_search=Search(statistic_size=20, default_statistic=True))
102
103
104
105

    system = Quantity(
        type=str, default='not processed',
        description='The system type of the simulated system.',
106
        a_search=Search(default_statistic=True))
107
108
109
110

    crystal_system = Quantity(
        type=str, default='not processed',
        description='The crystal system type of the simulated system.',
111
        a_search=Search(default_statistic=True))
112
113
114
115

    spacegroup = Quantity(
        type=int, default='not processed',
        description='The spacegroup of the simulated system as number.',
116
        a_search=Search())
117
118
119
120

    spacegroup_symbol = Quantity(
        type=str, default='not processed',
        description='The spacegroup as international short symbol.',
121
        a_search=Search())
122
123
124
125

    code_name = Quantity(
        type=str, default='not processed',
        description='The name of the used code.',
126
        a_search=Search(statistic_size=40, default_statistic=True))
127
128
129
130

    code_version = Quantity(
        type=str, default='not processed',
        description='The version of the used code.',
131
        a_search=Search())
132
133
134

    n_geometries = Quantity(
        type=int, description='Number of unique geometries.',
135
        a_search=Search(metric_name='geometries', metric='sum'))
136
137
138
139

    n_calculations = Quantity(
        type=int,
        description='Number of single configuration calculation sections',
140
        a_search=Search(metric_name='calculations', metric='sum'))
141
142
143

    n_total_energies = Quantity(
        type=int, description='Number of total energy calculations',
144
        a_search=Search(metric_name='total_energies', metric='sum'))
145
146
147

    n_quantities = Quantity(
        type=int, description='Number of metainfo quantities parsed from the entry.',
148
        a_search=Search(metric='sum', metric_name='quantities'))
149
150
151
152

    quantities = Quantity(
        type=str, shape=['0..*'],
        description='All quantities that are used by this entry.',
153
        a_search=Search(
154
155
156
157
158
            metric_name='distinct_quantities', metric='cardinality', many_and='append'))

    geometries = Quantity(
        type=str, shape=['0..*'],
        description='Hashes for each simulated geometry',
159
        a_search=Search(metric_name='unique_geometries', metric='cardinality'))
160
161
162
163

    group_hash = Quantity(
        type=str,
        description='Hashes that describe unique geometries simulated by this code run.',
164
        a_search=Search(many_or='append', group='groups_grouped', metric_name='groups', metric='cardinality'))
165
166
167
168
169
170
171

    labels = SubSection(
        sub_section=Label, repeats=True,
        description='The labels taken from AFLOW prototypes and springer.',
        a_search='labels')

    optimade = SubSection(
172
        sub_section=OptimadeEntry,
173
174
175
        description='Metadata used for the optimade API.',
        a_search='optimade')

176
    def apply_domain_metadata(self, backend):
177
        from nomad.normalizing.system import normalized_atom_labels
178
        entry = self.m_parent
179

180
        logger = utils.get_logger(__name__).bind(
181
            upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
182

183
        # code and code specific ids
184
        self.code_name = backend.get_value('program_name', 0)
185
186
187
188
        try:
            self.code_version = simplify_version(backend.get_value('program_version', 0))
        except KeyError:
            self.code_version = config.services.unavailable_value
189

190
191
192
        raw_id = get_optional_backend_value(backend, 'raw_id', 'section_run', None)
        if raw_id is not None:
            entry.raw_id = raw_id
Markus Scheidgen's avatar
Markus Scheidgen committed
193

194
        # metadata (system, method, chemistry)
195
196
197
198
199
200
201
        atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
        if hasattr(atoms, 'tolist'):
            atoms = atoms.tolist()
        entry.n_atoms = len(atoms)
        atoms = list(set(normalized_atom_labels(set(atoms))))
        atoms.sort()
        entry.atoms = atoms
202

203
204
205
206
207
        self.crystal_system = get_optional_backend_value(
            backend, 'crystal_system', 'section_symmetry', logger=logger)
        self.spacegroup = get_optional_backend_value(
            backend, 'space_group_number', 'section_symmetry', 0, logger=logger)
        self.spacegroup_symbol = get_optional_backend_value(
208
            backend, 'international_short_symbol', 'section_symmetry', logger=logger)
209
        self.basis_set = map_basis_set_to_basis_set_label(
210
211
212
            get_optional_backend_value(backend, 'program_basis_set_type', 'section_run', logger=logger))
        self.system = get_optional_backend_value(
            backend, 'system_type', 'section_system', logger=logger)
213
        entry.formula = get_optional_backend_value(
214
            backend, 'chemical_composition_bulk_reduced', 'section_system', logger=logger)
215
        self.xc_functional = map_functional_name_to_xc_treatment(
216
            get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
217

218
        # grouping
219
        self.group_hash = utils.hash(
220
            entry.formula,
221
222
223
224
225
            self.spacegroup,
            self.basis_set,
            self.xc_functional,
            self.code_name,
            self.code_version,
226
227
            entry.with_embargo,
            entry.uploader)
228

229
        # metrics and quantities
230
231
        quantities = set()
        geometries = set()
232
233
        n_quantities = 0
        n_calculations = 0
234
235
236
        n_total_energies = 0
        n_geometries = 0

237
238
        for section_run in backend.entry_archive.section_run:
            quantities.add(section_run.m_def.name)
239
            n_quantities += 1
240

241
            for section, property_def, _ in section_run.m_traverse():
242
243
                property_name = property_def.name
                quantities.add(property_name)
244
245
                n_quantities += 1

246
                if property_name == 'energy_total':
247
248
                    n_total_energies += 1

249
250
                if property_name == 'configuration_raw_gid':
                    geometries.add(section.m_get(property_def))
251

252
                if property_name == 'section_single_configuration_calculation':
253
                    n_calculations += 1
254

255
                if property_name == 'section_system':
256
                    n_geometries += 1
257
258
259

        self.quantities = list(quantities)
        self.geometries = list(geometries)
260
261
        self.n_quantities = n_quantities
        self.n_calculations = n_calculations
262
263
264
        self.n_total_energies = n_total_energies
        self.n_geometries = n_geometries

265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
        # labels
        compounds = set()
        classifications = set()
        for index in backend.get_sections('section_springer_material'):
            compounds.update(backend.get_value('springer_compound_class', index))
            classifications.update(backend.get_value('springer_classification', index))

        for compound in compounds:
            self.labels.append(Label(label=compound, type='compound_class', source='springer'))
        for classification in classifications:
            self.labels.append(Label(label=classification, type='classification', source='springer'))

        aflow_id = get_optional_backend_value(backend, 'prototype_aflow_id', 'section_prototype')
        aflow_label = get_optional_backend_value(backend, 'prototype_label', 'section_prototype')

        if aflow_id is not None and aflow_label is not None:
            self.labels.append(Label(label=aflow_label, type='prototype', source='aflow_prototype_library'))
            self.labels.append(Label(label=aflow_id, type='prototype_id', source='aflow_prototype_library'))