dft.py 10.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
16
DFT specific metadata
17
'''
18
19
20

import re

21
22
from nomadcore.local_backend import ParserEvent

23
from nomad import utils, config
24
25
from nomad.metainfo import MSection, Section, Quantity, MEnum, SubSection
from nomad.metainfo.search_extension import Search
26

27
28
from .common import get_optional_backend_value
from .optimade import OptimadeEntry
29
30
31
32
33
34
35
36
37
38
39


xc_treatments = {
    'gga': 'GGA',
    'hf_': 'HF',
    'oep': 'OEP',
    'hyb': 'hybrid',
    'mgg': 'meta-GGA',
    'vdw': 'vdW',
    'lda': 'LDA',
}
40
''' https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional '''
41
42
43
44
45
46
47
48
49
50
51

basis_sets = {
    'gaussians': 'gaussians',
    'realspacegrid': 'real-space grid',
    'planewaves': 'plane waves'
}

version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)')


def map_functional_name_to_xc_treatment(name):
Markus Scheidgen's avatar
Markus Scheidgen committed
52
    if name == config.services.unavailable_value:
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
        return name

    return xc_treatments.get(name[:3].lower(), name)


def map_basis_set_to_basis_set_label(name):
    key = name.replace('_', '').replace('-', '').replace(' ', '').lower()
    return basis_sets.get(key, name)


def simplify_version(version):
    match = version_re.search(version)
    if match is None:
        return version
    else:
        return match.group(0)


71
class Label(MSection):
72
    '''
73
74
75
76
77
78
79
    Label that further classify a structure.

    Attributes:
        label: The label as a string
        type: The type of the label
        source: The source that this label was taken from.

80
    '''
81
    label = Quantity(type=str, a_search=Search())
82
83
84

    type = Quantity(type=MEnum(
        'compound_class', 'classification', 'prototype', 'prototype_id'),
85
        a_search=Search())
86
87
88

    source = Quantity(
        type=MEnum('springer', 'aflow_prototype_library'),
89
        a_search=Search())
90
91
92
93
94
95
96
97


class DFTMetadata(MSection):
    m_def = Section(a_domain='dft')

    basis_set = Quantity(
        type=str, default='not processed',
        description='The used basis set functions.',
98
        a_search=Search(statistic_size=20, default_statistic=True))
99
100
101
102

    xc_functional = Quantity(
        type=str, default='not processed',
        description='The libXC based xc functional classification used in the simulation.',
103
        a_search=Search(statistic_size=20, default_statistic=True))
104
105
106
107

    system = Quantity(
        type=str, default='not processed',
        description='The system type of the simulated system.',
108
        a_search=Search(default_statistic=True))
109
110
111
112

    crystal_system = Quantity(
        type=str, default='not processed',
        description='The crystal system type of the simulated system.',
113
        a_search=Search(default_statistic=True))
114
115
116
117

    spacegroup = Quantity(
        type=int, default='not processed',
        description='The spacegroup of the simulated system as number.',
118
        a_search=Search())
119
120
121
122

    spacegroup_symbol = Quantity(
        type=str, default='not processed',
        description='The spacegroup as international short symbol.',
123
        a_search=Search())
124
125
126
127

    code_name = Quantity(
        type=str, default='not processed',
        description='The name of the used code.',
128
        a_search=Search(statistic_size=40, default_statistic=True))
129
130
131
132

    code_version = Quantity(
        type=str, default='not processed',
        description='The version of the used code.',
133
        a_search=Search())
134
135
136

    n_geometries = Quantity(
        type=int, description='Number of unique geometries.',
137
        a_search=Search(metric_name='geometries', metric='sum'))
138
139
140
141

    n_calculations = Quantity(
        type=int,
        description='Number of single configuration calculation sections',
142
        a_search=Search(metric_name='calculations', metric='sum'))
143
144
145

    n_total_energies = Quantity(
        type=int, description='Number of total energy calculations',
146
        a_search=Search(metric_name='total_energies', metric='sum'))
147
148
149

    n_quantities = Quantity(
        type=int, description='Number of metainfo quantities parsed from the entry.',
150
        a_search=Search(metric='sum', metric_name='quantities'))
151
152
153
154

    quantities = Quantity(
        type=str, shape=['0..*'],
        description='All quantities that are used by this entry.',
155
        a_search=Search(
156
157
158
159
160
            metric_name='distinct_quantities', metric='cardinality', many_and='append'))

    geometries = Quantity(
        type=str, shape=['0..*'],
        description='Hashes for each simulated geometry',
161
        a_search=Search(metric_name='unique_geometries', metric='cardinality'))
162
163
164
165

    group_hash = Quantity(
        type=str,
        description='Hashes that describe unique geometries simulated by this code run.',
166
        a_search=Search(many_or='append', group='groups_grouped', metric_name='groups', metric='cardinality'))
167
168
169
170
171
172
173

    labels = SubSection(
        sub_section=Label, repeats=True,
        description='The labels taken from AFLOW prototypes and springer.',
        a_search='labels')

    optimade = SubSection(
174
        sub_section=OptimadeEntry,
175
176
177
178
179
180
181
182
183
184
185
        description='Metadata used for the optimade API.',
        a_search='optimade')

    def m_update(self, **kwargs):
        # TODO necessary?
        if 'labels' in kwargs:
            print('########################## A')
            self.labels = [Label.m_from_dict(label) for label in kwargs.pop('labels')]

        if 'optimade' in kwargs:
            print('########################## B')
186
            self.optimade = OptimadeEntry.m_from_dict(kwargs.pop('optimade'))
187
188

        super().m_update(**kwargs)
189

190
    def apply_domain_metadata(self, backend):
191
        from nomad.normalizing.system import normalized_atom_labels
192
        entry = self.m_parent
193

194
        logger = utils.get_logger(__name__).bind(
195
            upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
196

197
        # code and code specific ids
198
        self.code_name = backend.get_value('program_name', 0)
199
200
201
202
        try:
            self.code_version = simplify_version(backend.get_value('program_version', 0))
        except KeyError:
            self.code_version = config.services.unavailable_value
203

204
205
206
        raw_id = get_optional_backend_value(backend, 'raw_id', 'section_run', None)
        if raw_id is not None:
            entry.raw_id = raw_id
Markus Scheidgen's avatar
Markus Scheidgen committed
207

208
        # metadata (system, method, chemistry)
209
210
211
212
213
214
215
        atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
        if hasattr(atoms, 'tolist'):
            atoms = atoms.tolist()
        entry.n_atoms = len(atoms)
        atoms = list(set(normalized_atom_labels(set(atoms))))
        atoms.sort()
        entry.atoms = atoms
216

217
218
219
220
221
        self.crystal_system = get_optional_backend_value(
            backend, 'crystal_system', 'section_symmetry', logger=logger)
        self.spacegroup = get_optional_backend_value(
            backend, 'space_group_number', 'section_symmetry', 0, logger=logger)
        self.spacegroup_symbol = get_optional_backend_value(
222
            backend, 'international_short_symbol', 'section_symmetry', logger=logger)
223
        self.basis_set = map_basis_set_to_basis_set_label(
224
225
226
            get_optional_backend_value(backend, 'program_basis_set_type', 'section_run', logger=logger))
        self.system = get_optional_backend_value(
            backend, 'system_type', 'section_system', logger=logger)
227
        entry.formula = get_optional_backend_value(
228
            backend, 'chemical_composition_bulk_reduced', 'section_system', logger=logger)
229
        self.xc_functional = map_functional_name_to_xc_treatment(
230
            get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
231

232
        # grouping
233
        self.group_hash = utils.hash(
234
            entry.formula,
235
236
237
238
239
            self.spacegroup,
            self.basis_set,
            self.xc_functional,
            self.code_name,
            self.code_version,
240
241
            entry.with_embargo,
            entry.uploader)
242

243
        # metrics and quantities
244
245
        quantities = set()
        geometries = set()
246
247
        n_quantities = 0
        n_calculations = 0
248
249
250
        n_total_energies = 0
        n_geometries = 0

251
        for meta_info, event, value in backend.traverse():
252
            quantities.add(meta_info)
253
254
255
256
257
258
259
260
261
262
263

            if event == ParserEvent.add_value or event == ParserEvent.add_array_value:
                n_quantities += 1

                if meta_info == 'energy_total':
                    n_total_energies += 1

                if meta_info == 'configuration_raw_gid':
                    geometries.add(value)

            elif event == ParserEvent.open_section:
264
                if meta_info == 'section_single_configuration_calculation':
265
                    n_calculations += 1
266

267
268
                if meta_info == 'section_system':
                    n_geometries += 1
269
270
271

        self.quantities = list(quantities)
        self.geometries = list(geometries)
272
273
        self.n_quantities = n_quantities
        self.n_calculations = n_calculations
274
275
276
        self.n_total_energies = n_total_energies
        self.n_geometries = n_geometries

277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
        # labels
        compounds = set()
        classifications = set()
        for index in backend.get_sections('section_springer_material'):
            compounds.update(backend.get_value('springer_compound_class', index))
            classifications.update(backend.get_value('springer_classification', index))

        for compound in compounds:
            self.labels.append(Label(label=compound, type='compound_class', source='springer'))
        for classification in classifications:
            self.labels.append(Label(label=classification, type='classification', source='springer'))

        aflow_id = get_optional_backend_value(backend, 'prototype_aflow_id', 'section_prototype')
        aflow_label = get_optional_backend_value(backend, 'prototype_label', 'section_prototype')

        if aflow_id is not None and aflow_label is not None:
            self.labels.append(Label(label=aflow_label, type='prototype', source='aflow_prototype_library'))
            self.labels.append(Label(label=aflow_id, type='prototype_id', source='aflow_prototype_library'))

        # optimade
297
        self.optimade = backend.get_mi2_section(OptimadeEntry.m_def)