dft.py 10.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
16
DFT specific metadata
17
'''
18
19
20

import re

21
22
from nomadcore.local_backend import ParserEvent

23
from nomad import utils, config
24
25
from nomad.metainfo import optimade, MSection, Section, Quantity, MEnum, SubSection
from nomad.metainfo.search import SearchQuantity
26

27
from .base import get_optional_backend_value
28
29
30
31
32
33
34
35
36
37
38


xc_treatments = {
    'gga': 'GGA',
    'hf_': 'HF',
    'oep': 'OEP',
    'hyb': 'hybrid',
    'mgg': 'meta-GGA',
    'vdw': 'vdW',
    'lda': 'LDA',
}
39
''' https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info/wikis/metainfo/XC-functional '''
40
41
42
43
44
45
46
47
48
49
50

basis_sets = {
    'gaussians': 'gaussians',
    'realspacegrid': 'real-space grid',
    'planewaves': 'plane waves'
}

version_re = re.compile(r'(\d+(\.\d+(\.\d+)?)?)')


def map_functional_name_to_xc_treatment(name):
Markus Scheidgen's avatar
Markus Scheidgen committed
51
    if name == config.services.unavailable_value:
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
        return name

    return xc_treatments.get(name[:3].lower(), name)


def map_basis_set_to_basis_set_label(name):
    key = name.replace('_', '').replace('-', '').replace(' ', '').lower()
    return basis_sets.get(key, name)


def simplify_version(version):
    match = version_re.search(version)
    if match is None:
        return version
    else:
        return match.group(0)


70
class Label(MSection):
71
    '''
72
73
74
75
76
77
78
    Label that further classify a structure.

    Attributes:
        label: The label as a string
        type: The type of the label
        source: The source that this label was taken from.

79
80
    '''
    label = Quantity(type=str, a_search=SearchQuantity())
81
82
83

    type = Quantity(type=MEnum(
        'compound_class', 'classification', 'prototype', 'prototype_id'),
84
        a_search=SearchQuantity())
85
86
87

    source = Quantity(
        type=MEnum('springer', 'aflow_prototype_library'),
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
        a_search=SearchQuantity())


class DFTMetadata(MSection):
    m_def = Section(a_domain='dft')

    basis_set = Quantity(
        type=str, default='not processed',
        description='The used basis set functions.',
        a_search=SearchQuantity(statistic_size=20, default_statistic=True))

    xc_functional = Quantity(
        type=str, default='not processed',
        description='The libXC based xc functional classification used in the simulation.',
        a_search=SearchQuantity(statistic_size=20, default_statistic=True))

    system = Quantity(
        type=str, default='not processed',
        description='The system type of the simulated system.',
        a_search=SearchQuantity(default_statistic=True))

    crystal_system = Quantity(
        type=str, default='not processed',
        description='The crystal system type of the simulated system.',
        a_search=SearchQuantity(default_statistic=True))

    spacegroup = Quantity(
        type=int, default='not processed',
        description='The spacegroup of the simulated system as number.',
        a_search=SearchQuantity())

    spacegroup_symbol = Quantity(
        type=str, default='not processed',
        description='The spacegroup as international short symbol.',
        a_search=SearchQuantity())

    code_name = Quantity(
        type=str, default='not processed',
        description='The name of the used code.',
        a_search=SearchQuantity(statistic_size=40, default_statistic=True))

    code_version = Quantity(
        type=str, default='not processed',
        description='The version of the used code.',
        a_search=SearchQuantity())

    n_geometries = Quantity(
        type=int, description='Number of unique geometries.',
136
        a_search=SearchQuantity(metric_name='geometries', metric='sum'))
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164

    n_calculations = Quantity(
        type=int,
        description='Number of single configuration calculation sections',
        a_search=SearchQuantity(metric_name='calculations', metric='sum'))

    n_total_energies = Quantity(
        type=int, description='Number of total energy calculations',
        a_search=SearchQuantity(metric_name='total_energies', metric='sum'))

    n_quantities = Quantity(
        type=int, description='Number of metainfo quantities parsed from the entry.',
        a_search=SearchQuantity(metric='sum', metric_name='quantities'))

    quantities = Quantity(
        type=str, shape=['0..*'],
        description='All quantities that are used by this entry.',
        a_search=SearchQuantity(
            metric_name='distinct_quantities', metric='cardinality', many_and='append'))

    geometries = Quantity(
        type=str, shape=['0..*'],
        description='Hashes for each simulated geometry',
        a_search=SearchQuantity(metric_name='unique_geometries', metric='cardinality'))

    group_hash = Quantity(
        type=str,
        description='Hashes that describe unique geometries simulated by this code run.',
165
        a_search=SearchQuantity(many_or='append', group='groups_grouped', metric_name='groups', metric='cardinality'))
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187

    labels = SubSection(
        sub_section=Label, repeats=True,
        description='The labels taken from AFLOW prototypes and springer.',
        a_search='labels')

    optimade = SubSection(
        sub_section=optimade.OptimadeEntry,
        description='Metadata used for the optimade API.',
        a_search='optimade')

    def m_update(self, **kwargs):
        # TODO necessary?
        if 'labels' in kwargs:
            print('########################## A')
            self.labels = [Label.m_from_dict(label) for label in kwargs.pop('labels')]

        if 'optimade' in kwargs:
            print('########################## B')
            self.optimade = optimade.OptimadeEntry.m_from_dict(kwargs.pop('optimade'))

        super().m_update(**kwargs)
188

189
    def apply_domain_metadata(self, backend):
190
        from nomad.normalizing.system import normalized_atom_labels
191
        entry = self.m_parent
192

193
        logger = utils.get_logger(__name__).bind(
194
            upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
195

196
        # code and code specific ids
197
        self.code_name = backend.get_value('program_name', 0)
198
199
200
201
        try:
            self.code_version = simplify_version(backend.get_value('program_version', 0))
        except KeyError:
            self.code_version = config.services.unavailable_value
202

203
204
205
        raw_id = get_optional_backend_value(backend, 'raw_id', 'section_run', None)
        if raw_id is not None:
            entry.raw_id = raw_id
Markus Scheidgen's avatar
Markus Scheidgen committed
206

207
        # metadata (system, method, chemistry)
208
209
210
211
212
213
214
        atoms = get_optional_backend_value(backend, 'atom_labels', 'section_system', [], logger=logger)
        if hasattr(atoms, 'tolist'):
            atoms = atoms.tolist()
        entry.n_atoms = len(atoms)
        atoms = list(set(normalized_atom_labels(set(atoms))))
        atoms.sort()
        entry.atoms = atoms
215

216
217
218
219
220
        self.crystal_system = get_optional_backend_value(
            backend, 'crystal_system', 'section_symmetry', logger=logger)
        self.spacegroup = get_optional_backend_value(
            backend, 'space_group_number', 'section_symmetry', 0, logger=logger)
        self.spacegroup_symbol = get_optional_backend_value(
221
            backend, 'international_short_symbol', 'section_symmetry', logger=logger)
222
        self.basis_set = map_basis_set_to_basis_set_label(
223
224
225
            get_optional_backend_value(backend, 'program_basis_set_type', 'section_run', logger=logger))
        self.system = get_optional_backend_value(
            backend, 'system_type', 'section_system', logger=logger)
226
        entry.formula = get_optional_backend_value(
227
            backend, 'chemical_composition_bulk_reduced', 'section_system', logger=logger)
228
        self.xc_functional = map_functional_name_to_xc_treatment(
229
            get_optional_backend_value(backend, 'XC_functional_name', 'section_method', logger=logger))
230

231
        # grouping
232
        self.group_hash = utils.hash(
233
            entry.formula,
234
235
236
237
238
            self.spacegroup,
            self.basis_set,
            self.xc_functional,
            self.code_name,
            self.code_version,
239
240
            entry.with_embargo,
            entry.uploader)
241

242
        # metrics and quantities
243
244
        quantities = set()
        geometries = set()
245
246
        n_quantities = 0
        n_calculations = 0
247
248
249
        n_total_energies = 0
        n_geometries = 0

250
        for meta_info, event, value in backend.traverse():
251
            quantities.add(meta_info)
252
253
254
255
256
257
258
259
260
261
262

            if event == ParserEvent.add_value or event == ParserEvent.add_array_value:
                n_quantities += 1

                if meta_info == 'energy_total':
                    n_total_energies += 1

                if meta_info == 'configuration_raw_gid':
                    geometries.add(value)

            elif event == ParserEvent.open_section:
263
                if meta_info == 'section_single_configuration_calculation':
264
                    n_calculations += 1
265

266
267
                if meta_info == 'section_system':
                    n_geometries += 1
268
269
270

        self.quantities = list(quantities)
        self.geometries = list(geometries)
271
272
        self.n_quantities = n_quantities
        self.n_calculations = n_calculations
273
274
275
        self.n_total_energies = n_total_energies
        self.n_geometries = n_geometries

276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
        # labels
        compounds = set()
        classifications = set()
        for index in backend.get_sections('section_springer_material'):
            compounds.update(backend.get_value('springer_compound_class', index))
            classifications.update(backend.get_value('springer_classification', index))

        for compound in compounds:
            self.labels.append(Label(label=compound, type='compound_class', source='springer'))
        for classification in classifications:
            self.labels.append(Label(label=classification, type='classification', source='springer'))

        aflow_id = get_optional_backend_value(backend, 'prototype_aflow_id', 'section_prototype')
        aflow_label = get_optional_backend_value(backend, 'prototype_label', 'section_prototype')

        if aflow_id is not None and aflow_label is not None:
            self.labels.append(Label(label=aflow_label, type='prototype', source='aflow_prototype_library'))
            self.labels.append(Label(label=aflow_id, type='prototype_id', source='aflow_prototype_library'))

        # optimade
296
        self.optimade = backend.get_mi2_section(optimade.OptimadeEntry.m_def)