dft.py 10.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
16
DFT specific metadata
17
'''
18
19
20
21

import re

from nomad import utils, config
22
23
from nomad.metainfo import MSection, Section, Quantity, MEnum, SubSection
from nomad.metainfo.search_extension import Search
24

25
26
from .common import get_optional_backend_value
from .optimade import OptimadeEntry
27
from .metainfo.public import section_run
28
29
30
31
32
33
34
35
36
37
38


xc_treatments = {
    'gga': 'GGA',
    'hf_': 'HF',
    'oep': 'OEP',
    'hyb': 'hybrid',
    'mgg': 'meta-GGA',
    'vdw': 'vdW',
    'lda': 'LDA',
}
39
''' https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional '''
40
41
42
43
44
45
46
47
48
49
50

basis_sets = {
    'gaussians': 'gaussians',
    'realspacegrid': 'real-space grid',
    'planewaves': 'plane waves'
}

version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)')


def map_functional_name_to_xc_treatment(name):
Markus Scheidgen's avatar
Markus Scheidgen committed
51
    if name == config.services.unavailable_value:
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
        return name

    return xc_treatments.get(name[:3].lower(), name)


def map_basis_set_to_basis_set_label(name):
    key = name.replace('_', '').replace('-', '').replace(' ', '').lower()
    return basis_sets.get(key, name)


def simplify_version(version):
    match = version_re.search(version)
    if match is None:
        return version
    else:
        return match.group(0)


70
class Label(MSection):
71
    '''
72
73
74
75
76
77
78
    Label that further classify a structure.

    Attributes:
        label: The label as a string
        type: The type of the label
        source: The source that this label was taken from.

79
    '''
80
    label = Quantity(type=str, a_search=Search())
81
82
83

    type = Quantity(type=MEnum(
        'compound_class', 'classification', 'prototype', 'prototype_id'),
84
        a_search=Search())
85
86
87

    source = Quantity(
        type=MEnum('springer', 'aflow_prototype_library'),
88
        a_search=Search())
89
90
91
92
93
94
95
96


class DFTMetadata(MSection):
    m_def = Section(a_domain='dft')

    basis_set = Quantity(
        type=str, default='not processed',
        description='The used basis set functions.',
97
        a_search=Search(statistic_size=20, default_statistic=True))
98
99
100
101

    xc_functional = Quantity(
        type=str, default='not processed',
        description='The libXC based xc functional classification used in the simulation.',
102
        a_search=Search(statistic_size=20, default_statistic=True))
103
104
105
106

    system = Quantity(
        type=str, default='not processed',
        description='The system type of the simulated system.',
107
        a_search=Search(default_statistic=True))
108
109
110
111

    crystal_system = Quantity(
        type=str, default='not processed',
        description='The crystal system type of the simulated system.',
112
        a_search=Search(default_statistic=True))
113
114
115
116

    spacegroup = Quantity(
        type=int, default='not processed',
        description='The spacegroup of the simulated system as number.',
117
        a_search=Search())
118
119
120
121

    spacegroup_symbol = Quantity(
        type=str, default='not processed',
        description='The spacegroup as international short symbol.',
122
        a_search=Search())
123
124
125
126

    code_name = Quantity(
        type=str, default='not processed',
        description='The name of the used code.',
127
        a_search=Search(statistic_size=40, default_statistic=True))
128
129
130
131

    code_version = Quantity(
        type=str, default='not processed',
        description='The version of the used code.',
132
        a_search=Search())
133
134
135

    n_geometries = Quantity(
        type=int, description='Number of unique geometries.',
136
        a_search=Search(metric_name='geometries', metric='sum'))
137
138
139
140

    n_calculations = Quantity(
        type=int,
        description='Number of single configuration calculation sections',
141
        a_search=Search(metric_name='calculations', metric='sum'))
142
143
144

    n_total_energies = Quantity(
        type=int, description='Number of total energy calculations',
145
        a_search=Search(metric_name='total_energies', metric='sum'))
146
147
148

    n_quantities = Quantity(
        type=int, description='Number of metainfo quantities parsed from the entry.',
149
        a_search=Search(metric='sum', metric_name='quantities'))
150
151
152
153

    quantities = Quantity(
        type=str, shape=['0..*'],
        description='All quantities that are used by this entry.',
154
        a_search=Search(
155
156
157
158
159
            metric_name='distinct_quantities', metric='cardinality', many_and='append'))

    geometries = Quantity(
        type=str, shape=['0..*'],
        description='Hashes for each simulated geometry',
160
        a_search=Search(metric_name='unique_geometries', metric='cardinality'))
161
162
163
164

    group_hash = Quantity(
        type=str,
        description='Hashes that describe unique geometries simulated by this code run.',
165
        a_search=Search(many_or='append', group='groups_grouped', metric_name='groups', metric='cardinality'))
166
167
168
169
170
171
172

    labels = SubSection(
        sub_section=Label, repeats=True,
        description='The labels taken from AFLOW prototypes and springer.',
        a_search='labels')

    optimade = SubSection(
173
        sub_section=OptimadeEntry,
174
175
176
        description='Metadata used for the optimade API.',
        a_search='optimade')

177
    def apply_domain_metadata(self, backend):
178
        from nomad.normalizing.system import normalized_atom_labels
179
        entry = self.m_parent
180

181
        logger = utils.get_logger(__name__).bind(
182
            upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
183

184
        # code and code specific ids
185
        self.code_name = backend.get_value('program_name', 0)
186
187
188
189
        try:
            self.code_version = simplify_version(backend.get_value('program_version', 0))
        except KeyError:
            self.code_version = config.services.unavailable_value
190

191
192
193
        raw_id = get_optional_backend_value(backend, 'raw_id', 'section_run', None)
        if raw_id is not None:
            entry.raw_id = raw_id
Markus Scheidgen's avatar
Markus Scheidgen committed
194

195
        # metadata (system, method, chemistry)
196
197
198
199
200
201
202
        atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
        if hasattr(atoms, 'tolist'):
            atoms = atoms.tolist()
        entry.n_atoms = len(atoms)
        atoms = list(set(normalized_atom_labels(set(atoms))))
        atoms.sort()
        entry.atoms = atoms
203

204
205
206
207
208
        self.crystal_system = get_optional_backend_value(
            backend, 'crystal_system', 'section_symmetry', logger=logger)
        self.spacegroup = get_optional_backend_value(
            backend, 'space_group_number', 'section_symmetry', 0, logger=logger)
        self.spacegroup_symbol = get_optional_backend_value(
209
            backend, 'international_short_symbol', 'section_symmetry', logger=logger)
210
        self.basis_set = map_basis_set_to_basis_set_label(
211
212
213
            get_optional_backend_value(backend, 'program_basis_set_type', 'section_run', logger=logger))
        self.system = get_optional_backend_value(
            backend, 'system_type', 'section_system', logger=logger)
214
        entry.formula = get_optional_backend_value(
215
            backend, 'chemical_composition_bulk_reduced', 'section_system', logger=logger)
216
        self.xc_functional = map_functional_name_to_xc_treatment(
217
            get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
218

219
        # grouping
220
        self.group_hash = utils.hash(
221
            entry.formula,
222
223
224
225
226
            self.spacegroup,
            self.basis_set,
            self.xc_functional,
            self.code_name,
            self.code_version,
227
228
            entry.with_embargo,
            entry.uploader)
229

230
        # metrics and quantities
231
232
        quantities = set()
        geometries = set()
233
234
        n_quantities = 0
        n_calculations = 0
235
236
237
        n_total_energies = 0
        n_geometries = 0

238
239
240
241
242
243
        for root_section in backend.resource.contents:
            if not root_section.m_follows(section_run.m_def):
                continue

            quantities.add(root_section.m_def.name)
            n_quantities += 1
244

245
246
247
            for section, property_def, _ in root_section.m_traverse():
                property_name = property_def.name
                quantities.add(property_name)
248
249
                n_quantities += 1

250
                if property_name == 'energy_total':
251
252
                    n_total_energies += 1

253
254
                if property_name == 'configuration_raw_gid':
                    geometries.add(section.m_get(property_def))
255

256
                if property_name == 'section_single_configuration_calculation':
257
                    n_calculations += 1
258

259
                if property_name == 'section_system':
260
                    n_geometries += 1
261
262
263

        self.quantities = list(quantities)
        self.geometries = list(geometries)
264
265
        self.n_quantities = n_quantities
        self.n_calculations = n_calculations
266
267
268
        self.n_total_energies = n_total_energies
        self.n_geometries = n_geometries

269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
        # labels
        compounds = set()
        classifications = set()
        for index in backend.get_sections('section_springer_material'):
            compounds.update(backend.get_value('springer_compound_class', index))
            classifications.update(backend.get_value('springer_classification', index))

        for compound in compounds:
            self.labels.append(Label(label=compound, type='compound_class', source='springer'))
        for classification in classifications:
            self.labels.append(Label(label=classification, type='classification', source='springer'))

        aflow_id = get_optional_backend_value(backend, 'prototype_aflow_id', 'section_prototype')
        aflow_label = get_optional_backend_value(backend, 'prototype_label', 'section_prototype')

        if aflow_id is not None and aflow_label is not None:
            self.labels.append(Label(label=aflow_label, type='prototype', source='aflow_prototype_library'))
            self.labels.append(Label(label=aflow_id, type='prototype_id', source='aflow_prototype_library'))

        # optimade
289
290
291
        optimade = backend.get_mi2_section(OptimadeEntry.m_def)
        if optimade is not None:
            self.optimade = optimade.m_copy()