encyclopedia.py 76.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
from typing import Dict, List, Any
16
17
from math import gcd as gcd
from functools import reduce
18
from abc import abstractmethod
19
from collections import OrderedDict
20
import re
21
import json
22
import ase
23
import ase.data
24
from ase import Atoms
25
import numpy as np
26
from matid import SymmetryAnalyzer
27
import matid.geometry
28

29
30
from nomad.normalizing.normalizer import (
    Normalizer,
31
    s_run,
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
    s_scc,
    s_system,
    s_method,
    s_frame_sequence,
    r_frame_sequence_to_sampling,
    s_sampling_method,
    r_frame_sequence_local_frames,
)
from nomad.metainfo.encyclopedia import (
    Encyclopedia,
    Material,
    Method,
    Properties,
    RunType,
    WyckoffSet,
    WyckoffVariables,
    ElectronicBandStructure,
    BandGap,
)
51
from nomad.parsing.backend import Section, LocalBackend
52
from nomad.normalizing.settingsbasisset import get_basis_set_settings
53
from nomad.normalizing import structure
54
from nomad.utils import hash, RestrictedDict, timer
55
from nomad import config
56

57
58
J_to_Ry = 4.587425e+17

59

Lauri Himanen's avatar
Lauri Himanen committed
60
61
62
63
64
65
66
67
68
69
class Context():
    """A simple class for holding the context related to an Encylopedia entry.
    """
    def __init__(
        self,
        system_type: str,
        method_type: str,
        run_type: str,
        representative_system,
        representative_method,
70
        representative_scc,
71
        representative_scc_idx,
Lauri Himanen's avatar
Lauri Himanen committed
72
73
74
75
76
77
    ):
        self.system_type = system_type
        self.method_type = method_type
        self.run_type = run_type
        self.representative_system = representative_system
        self.representative_method = representative_method
78
        self.representative_scc = representative_scc
79
        self.representative_scc_idx = representative_scc_idx
80
        self.greatest_common_divisor: int = None
Lauri Himanen's avatar
Lauri Himanen committed
81
82


83
84
85
86
87
88
89
class EncyclopediaNormalizer(Normalizer):
    """
    This normalizer emulates the functionality of the old Encyclopedia backend.
    The data used by the encyclopedia have been assigned under new metainfo
    within section_encyclopedia. In the future these separate metainfos could
    be absorbed into the existing metainfo hiearchy.
    """
90
    def __init__(self, backend: LocalBackend):
91
        super().__init__(backend)
92
        self.backend: LocalBackend = backend
93

94
    def run_type(self, run_type_sec: RunType) -> str:
95
96
97
        """Decides what type of calculation this is: single_point, md,
        geometry_optimization, etc.
        """
98
        run_enums = RunType.run_type.type
99
100
        run_type = run_enums.unavailable

101
102
103
104
105
106
107
108
109
110
111
112
        try:
            sccs = self._backend[s_scc]
        except Exception:
            sccs = []
        try:
            frame_sequences = self._backend[s_frame_sequence]
        except Exception:
            frame_sequences = []

        n_scc = len(sccs)
        n_frame_seq = len(frame_sequences)

113
114
        # No sequences, only a few calculations
        if n_scc <= 3 and n_frame_seq == 0:
115
116
117
            program_name = self._backend["program_name"]
            if program_name == "elastic":
                # TODO move to taylor expansion as soon as data is correct in archive
118
                run_type = run_enums.elastic_constants
119
            else:
120
                run_type = run_enums.single_point
121

122
123
124
125
        # One sequence. Currently calculations with multiple sequences are
        # unsupported.
        elif n_frame_seq == 1:
            frame_seq = frame_sequences[0]
126
127
128
129
130
131
132

            # See if sampling_method is present
            try:
                i_sampling_method = frame_seq[r_frame_sequence_to_sampling]
            except KeyError:
                self.logger.info(
                    "Cannot determine encyclopedia run type because missing "
133
                    "value for frame_sequence_to_sampling_ref."
134
135
136
137
138
139
140
141
142
                )
                return run_type

            # See if local frames are present
            try:
                frames = frame_seq[r_frame_sequence_local_frames]
            except KeyError:
                self.logger.info(
                    "section_frame_sequence_local_frames not found although a "
143
                    "frame_sequence exists."
144
145
146
                )
                return run_type
            if len(frames) == 0:
147
                self.logger.info("No frames referenced in section_frame_sequence_local_frames.")
148
149
                return run_type

150
            section_sampling_method = self._backend[s_sampling_method][i_sampling_method]
151
            sampling_method = section_sampling_method["sampling_method"]
152
153

            if sampling_method == "molecular_dynamics":
154
                run_type = run_enums.molecular_dynamics
155
            if sampling_method == "geometry_optimization":
156
                run_type = run_enums.geometry_optimization
157
            if sampling_method == "taylor_expansion":
158
                run_type = run_enums.phonon_calculation
159

160
        run_type_sec.run_type = run_type
161
        return run_type
162

163
    def system_type(self, material: Material) -> tuple:
164
        # Try to fetch representative system
165
        system = None
166
        system_type = config.services.unavailable_value
167
        system_enums = Material.system_type.type
168
169
170
171
172
173
174
175
176
        system_idx = self._backend["section_run"][0].tmp["representative_system_idx"]
        if system_idx is not None:
            # Try to find system type information from backend for the selected system.
            try:
                system = self._backend[s_system][system_idx]
                stype = system["system_type"]
            except KeyError:
                pass
            else:
177
                if stype == system_enums.one_d or stype == system_enums.two_d:
178
                    system_type = stype
179
180
181
182
183
184
185
186
                # For bulk systems we also ensure that the symmetry information is available
                if stype == system_enums.bulk:
                    try:
                        system["section_symmetry"][0]
                    except (KeyError, IndexError):
                        self.logger.info("Symmetry information is not available for a bulk system. No Encylopedia entry created.")
                    else:
                        system_type = stype
187

188
189
        material.system_type = system_type
        return system, system_type
190

191
    def method_type(self, method: Method) -> tuple:
192
193
194
195
196
197
        repr_method = None
        method_id = config.services.unavailable_value
        methods = self._backend[s_method]
        n_methods = len(methods)

        if n_methods == 1:
198
199
            repr_method = methods[0]
            method_id = repr_method.get("electronic_structure_method", config.services.unavailable_value)
200
201
202
203
204
205
        elif n_methods > 1:
            for sec_method in self._backend[s_method]:
                # GW
                electronic_structure_method = sec_method.get("electronic_structure_method", None)
                if electronic_structure_method in {"G0W0", "scGW"}:
                    repr_method = sec_method
206
                    method_id = "GW"
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
                    break

                # Methods linked to each other through references. Get all
                # linked methods, try to get electronic_structure_method from
                # each.
                try:
                    refs = sec_method["section_method_to_method_refs"]
                except KeyError:
                    pass
                else:
                    linked_methods = [sec_method]
                    for ref in refs:
                        method_to_method_kind = ref["method_to_method_kind"]
                        method_to_method_ref = ref["method_to_method_ref"]
                        if method_to_method_kind == "core_settings":
                            linked_methods.append(methods[method_to_method_ref])

224
                    for i_method in linked_methods:
225
                        try:
226
                            electronic_structure_method = i_method["electronic_structure_method"]
227
228
229
230
231
232
                        except KeyError:
                            pass
                        else:
                            repr_method = sec_method
                            method_id = electronic_structure_method

233
        method.method_type = method_id
234
        return repr_method, method_id
235

236
    def mainfile_uri(self, encyclopedia: Encyclopedia):
237
238
239
240
        entry_info = self._backend["section_entry_info"][0]
        upload_id = entry_info["upload_id"]
        mainfile_path = entry_info["mainfile"]
        uri = f"nmd://R{upload_id}/data/{mainfile_path}"
241
        encyclopedia.mainfile_uri = uri
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266

    # def similar_materials(self) -> None:
        # pass

    # def calculation_pid(self):
        # pass

    # def calculation(self) -> None:
        # pass

    # def contributor_first_name(self) -> None:
        # pass

    # def contributor_last_name(self) -> None:
        # pass

    # def contributor_type(self) -> None:
        # pass

    # def contributors(self) -> None:
        # pass

    # def number_of_calculations(self) -> None:
        # pass

267
    def fill(self, ctx: Context):
268
        # Fill structure related metainfo
269
        struct: Any = None
Lauri Himanen's avatar
Lauri Himanen committed
270
        if ctx.system_type == Material.system_type.type.bulk:
271
            struct = MaterialBulkNormalizer(self.backend, self.logger)
Lauri Himanen's avatar
Lauri Himanen committed
272
        elif ctx.system_type == Material.system_type.type.two_d:
273
            struct = Material2DNormalizer(self.backend, self.logger)
Lauri Himanen's avatar
Lauri Himanen committed
274
        elif ctx.system_type == Material.system_type.type.one_d:
275
            struct = Material1DNormalizer(self.backend, self.logger)
Lauri Himanen's avatar
Lauri Himanen committed
276
        if struct is not None:
277
            struct.normalize(ctx)
278

279
        # Fill method related metainfo
280
        method = None
281
        if ctx.method_type == Method.method_type.type.DFT or ctx.method_type == Method.method_type.type.DFTU:
282
            method = MethodDFTNormalizer(self._backend, self.logger)
283
        elif ctx.method_type == Method.method_type.type.GW:
284
            method = MethodGWNormalizer(self._backend, self.logger)
285
        if method is not None:
286
            method.normalize(ctx)
287

288
        # Fill properties related metainfo
289
        properties = PropertiesNormalizer(self.backend, self.logger)
290
291
        properties.normalize(ctx)

292
    def normalize(self, logger=None) -> None:
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
        with timer(self.logger, 'EncyclopediaNormalizer finished') as log:
            try:
                super().normalize(logger)

                # Initialise metainfo structure
                sec_enc = Encyclopedia()
                material = sec_enc.m_create(Material)
                method = sec_enc.m_create(Method)
                sec_enc.m_create(Properties)
                run_type = sec_enc.m_create(RunType)

                # Get generic data
                self.mainfile_uri(sec_enc)

                # Determine run type, stop if unknown
                run_type_name = self.run_type(run_type)
                if run_type_name == config.services.unavailable_value:
                    log["enc_status"] = "unknown_run_type"
                    log["enc_message"] = "Unknown run type for encyclopedia, encyclopedia metainfo not created."
                    return

                # Get the system type, stop if unknown
                system_enums = Material.system_type.type
                representative_system, system_type = self.system_type(material)
                if system_type != system_enums.bulk and system_type != system_enums.two_d and system_type != system_enums.one_d:
                    log["enc_status"] = "unknown_system_type"
                    log["enc_message"] = "Unknown system type for encyclopedia, encyclopedia metainfo not created."
                    return

                # Get the method type, stop if unknown
                representative_method, method_type = self.method_type(method)

                # Get representative scc
                try:
                    representative_scc_idx = self._backend[s_run][0].tmp["representative_scc_idx"]
                    representative_scc = self._backend[s_scc][representative_scc_idx]
                except (KeyError, IndexError):
                    representative_scc = None
                    representative_scc_idx = None

                # Create one context that holds all details
                context = Context(
                    system_type=system_type,
                    method_type=method_type,
                    run_type=run_type_name,
                    representative_system=representative_system,
                    representative_method=representative_method,
                    representative_scc=representative_scc,
                    representative_scc_idx=representative_scc_idx,
342
                )
343

344
345
346
                # Put the encyclopedia section into backend
                self._backend.add_mi2_section(sec_enc)
                self.fill(context)
347

348
349
350
351
352
353
354
            except Exception as e:
                log["enc_status"] = "failure"
                log["enc_message"] = "Failed to create an Encyclopedia entry due to an unhandlable exception."
                log["exc_info"] = e
            else:
                log["enc_status"] = "success"
                log["enc_message"] = "Successfully created metainfo for Encyclopedia."
355
356


357
class MaterialNormalizer():
358
    """A base class that is used for processing material-related information
359
360
    in the Encylopedia.
    """
361
    def __init__(self, backend: LocalBackend, logger):
362
363
364
        self.backend = backend
        self.logger = logger

365
    def atom_labels(self, material: Material, std_atoms: Atoms) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
366
        material.atom_labels = std_atoms.get_chemical_symbols()
367

368
    def atom_positions(self, material: Material, std_atoms: Atoms) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
369
        material.atom_positions = std_atoms.get_scaled_positions(wrap=False)
370
371

    @abstractmethod
372
    def cell_normalized(self, material: Material, std_atoms: Atoms) -> None:
373
        pass
374

375
    def cell_volume(self, material: Material, std_atoms: Atoms) -> None:
376
        material.cell_volume = float(std_atoms.get_volume() * 1e-10**3)
377

Lauri Himanen's avatar
Lauri Himanen committed
378
379
380
    def formula(self, material: Material, names: List[str], counts: List[int]) -> None:
        formula = structure.get_formula_string(names, counts)
        material.formula = formula
381

382
    def formula_reduced(self, material: Material, names: list, counts_reduced: list) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
383
384
        formula = structure.get_formula_string(names, counts_reduced)
        material.formula_reduced = formula
385

386
    def material_hash(self, material: Material, spg_number: int, wyckoff_sets: List[WyckoffSet]) -> None:
387
        # Create and store hash based on SHA512
388
        norm_hash_string = structure.get_symmetry_string(spg_number, wyckoff_sets)
389
        material.material_hash = hash(norm_hash_string)
390

391
    def number_of_atoms(self, material: Material, std_atoms: Atoms) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
392
        material.number_of_atoms = len(std_atoms)
393

Lauri Himanen's avatar
Lauri Himanen committed
394
    @abstractmethod
395
    def normalize(self, ctx: Context) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
396
397
        pass

398

399
class MaterialBulkNormalizer(MaterialNormalizer):
400
401
    """Processes structure related metainfo for Encyclopedia bulk structures.
    """
402
    def atomic_density(self, properties: Properties, repr_system: Atoms) -> None:
403
404
        orig_n_atoms = len(repr_system)
        orig_volume = repr_system.get_volume() * (1e-10)**3
405
        properties.atomic_density = float(orig_n_atoms / orig_volume)
406

407
    def bravais_lattice(self, material: Material, section_symmetry: Section) -> None:
408
        bravais_lattice = section_symmetry["bravais_lattice"]
409
410
        material.bravais_lattice = bravais_lattice

411
    def cell_normalized(self, material: Material, std_atoms: Atoms) -> None:
412
413
414
415
        cell_normalized = std_atoms.get_cell()
        cell_normalized *= 1e-10
        material.cell_normalized = cell_normalized

416
    def cell_primitive(self, material: Material, prim_atoms: Atoms) -> None:
417
418
419
420
        cell_prim = prim_atoms.get_cell()
        cell_prim *= 1e-10
        material.cell_primitive = cell_prim

421
    def crystal_system(self, material: Material, section_symmetry: Section) -> None:
422
423
        material.crystal_system = section_symmetry["crystal_system"]

424
425
426
427
    def has_free_wyckoff_parameters(self, material: Material, symmetry_analyzer: SymmetryAnalyzer) -> None:
        has_free_param = symmetry_analyzer.get_has_free_wyckoff_parameters()
        material.has_free_wyckoff_parameters = has_free_param

428
    def lattice_parameters(self, material: Material, std_atoms: Atoms) -> None:
429
430
        cell_normalized = std_atoms.get_cell() * 1E-10
        material.lattice_parameters = structure.get_lattice_parameters(cell_normalized)
431

432
    def mass_density(self, properties: Properties, repr_system: Atoms) -> None:
433
        mass = structure.get_summed_atomic_mass(repr_system.get_atomic_numbers())
434
        orig_volume = repr_system.get_volume() * (1e-10)**3
435
        properties.mass_density = float(mass / orig_volume)
436

437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
    def material_name(self, material: Material, symbols: list, numbers: list) -> None:
        # Systems with one element are named after it
        if len(symbols) == 1:
            number = ase.data.atomic_numbers[symbols[0]]
            name = ase.data.atomic_names[number]
            material.material_name = name

        # Binary systems have specific names
        if len(symbols) == 2:
            atomicnumbers = [ase.data.atomic_numbers[i] for i in symbols]
            names = [ase.data.atomic_names[i] for i in atomicnumbers]

            # Non-metal elements are anions in the binary compounds and receive the -ide suffix
            if names[1] == "Antimony":
                names[1] = names[1][:-1] + "ide"
            if names[1] == "Arsenic":
                names[1] = names[1][:-1] + "de"
            if names[1] == "Boron" or names[1] == "Carbon":
                names[1] = names[1][:-2] + "ide"
            if names[1] == "Chlorine" or names[1] == "Germanium" or names[1] == "Selenium" or names[1] == "Bromine" \
               or names[1] == "Tellurium" or names[1] == "Iodine" or names[1] == "Polonium" or names[1] == "Astatine" or \
               names[1] == "Fluorine":
                names[1] = names[1][:-2] + "de"
            if names[1] == "Silicon" or names[1] == "Sulfur":
                names[1] = names[1][:-2] + "ide"
            if names[1] == "Nitrogen" or names[1] == "Oxygen" or names[1] == "Hydrogen" or names[1] == "Phosphorus":
                names[1] = names[1][:-4] + "ide"

            name = names[0] + " " + names[1]

            if names[1] == "Fluoride" or names[1] == "Chloride" or names[1] == "Bromide" or \
               names[1] == "Iodide" or names[1] == "Hydride":

                # Non-metals with elements of variable valence, therefore we remove alkaline and
                # alkaline-earth elements, which have fixed valence
                # Only the most electronegative non-metals are supposed to make ionic compounds
                if names[0] != "Lithium" and names[0] != "Sodium" and names[0] != "Potassium" and \
                   names[0] != "Rubidium" and names[0] != "Cesium" and names[0] != "Francium" and \
                   names[0] != "Beryllium" and names[0] != "Magnesium" and names[0] != "Calcium" and \
                   names[0] != "Strontium" and names[0] != "Barium" and names[0] != "Radium" and \
                   names[0] != "Aluminum":

                    if numbers[1] == 2:
                        name = names[0] + "(II)" + " " + names[1]
                    elif numbers[1] == 3:
                        name = names[0] + "(III)" + " " + names[1]
                    elif numbers[1] == 4:
                        name = names[0] + "(IV)" + " " + names[1]
                    elif numbers[1] == 5:
                        name = names[0] + "(V)" + " " + names[1]
                    elif numbers[1] == 6:
                        name = names[0] + "(VI)" + " " + names[1]
                    elif numbers[1] == 7:
                        name = names[0] + "(VII)" + " " + names[1]

            if names[1] == "Oxide" or names[1] == "Sulfide" or names[1] == "Selenide":
                if names[0] != "Lithium" and names[0] != "Sodium" and names[0] != "Potassium" and \
                   names[0] != "Rubidium" and names[0] != "Cesium" and names[0] != "Francium" and \
                   names[0] != "Beryllium" and names[0] != "Magnesium" and names[0] != "Calcium" and \
                   names[0] != "Strontium" and names[0] != "Barium" and names[0] != "Radium" and \
                   names[0] != "Aluminum":

                    if numbers[0] == 1 and numbers[1] == 1:
                        name = names[0] + "(II)" + " " + names[1]
                    elif numbers[0] == 2 and numbers[1] == 1:
                        name = names[0] + "(I)" + " " + names[1]
                    elif numbers[0] == 1 and numbers[1] == 2:
                        name = names[0] + "(IV)" + " " + names[1]
                    elif numbers[0] == 2 and numbers[1] == 3:
                        name = names[0] + "(III)" + " " + names[1]
                    elif numbers[0] == 2 and numbers[1] == 5:
                        name = names[0] + "(V)" + " " + names[1]
                    elif numbers[0] == 1 and numbers[1] == 3:
                        name = names[0] + "(VI)" + " " + names[1]
                    elif numbers[0] == 2 and numbers[1] == 7:
                        name = names[0] + "(VII)" + " " + names[1]

            if names[1] == "Nitride" or names[1] == "Phosphide":
                if names[0] != "Lithium" and names[0] != "Sodium" and names[0] != "Potassium" and \
                   names[0] != "Rubidium" and names[0] != "Cesium" and names[0] != "Francium" and \
                   names[0] != "Beryllium" and names[0] != "Magnesium" and names[0] != "Calcium" and \
                   names[0] != "Strontium" and names[0] != "Barium" and names[0] != "Radium" and \
                   names[0] != "Aluminum":

                    if numbers[0] == 1 and numbers[1] == 1:
                        name = names[0] + "(III)" + " " + names[1]
                    if numbers[0] == 1 and numbers[1] == 2:
                        name = names[0] + "(VI)" + " " + names[1]
                    elif numbers[0] == 3 and numbers[1] == 2:
                        name = names[0] + "(II)" + " " + names[1]
                    elif numbers[0] == 3 and numbers[1] == 4:
                        name = names[0] + "(IV)" + " " + names[1]
                    elif numbers[0] == 3 and numbers[1] == 5:
                        name = names[0] + "(V)" + " " + names[1]
                    elif numbers[0] == 3 and numbers[1] == 7:
                        name = names[0] + "(VII)" + " " + names[1]

            if names[1] == "Carbide":
                if names[0] != "Lithium" and names[0] != "Sodium" and names[0] != "Potassium" and \
                   names[0] != "Rubidium" and names[0] != "Cesium" and names[0] != "Francium" and \
                   names[0] != "Beryllium" and names[0] != "Magnesium" and names[0] != "Calcium" and \
                   names[0] != "Strontium" and names[0] != "Barium" and names[0] != "Radium" and \
                   names[0] != "Aluminum":

                    if numbers[0] == 1 and numbers[1] == 1:
                        name = names[0] + "(IV)" + " " + names[1]
                    if numbers[0] == 2 and numbers[1] == 1:
                        name = names[0] + "(II)" + " " + names[1]
                    if numbers[0] == 4 and numbers[1] == 1:
                        name = names[0] + "(I)" + " " + names[1]
                    if numbers[0] == 4 and numbers[1] == 3:
                        name = names[0] + "(III)" + " " + names[1]
                    if numbers[0] == 4 and numbers[1] == 5:
                        name = names[0] + "(V)" + " " + names[1]
                    if numbers[0] == 2 and numbers[1] == 3:
                        name = names[0] + "(VI)" + " " + names[1]
                    if numbers[0] == 4 and numbers[1] == 7:
                        name = names[0] + "(VII)" + " " + names[1]

            material.material_name = name

558
    def periodicity(self, material: Material) -> None:
559
        material.periodicity = np.array([True, True, True], dtype=np.bool)
560

561
    def point_group(self, material: Material, section_symmetry: Section) -> None:
562
563
        point_group = section_symmetry["point_group"]
        material.point_group = point_group
564

565
    def space_group_number(self, material: Material, spg_number: int) -> None:
566
567
568
569
570
571
        material.space_group_number = spg_number

    def space_group_international_short_symbol(self, material: Material, symmetry_analyzer: SymmetryAnalyzer) -> None:
        spg_int_symb = symmetry_analyzer.get_space_group_international_short()
        material.space_group_international_short_symbol = spg_int_symb

572
    def material_classification(self, material: Material, section_system: Section) -> None:
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
        try:
            sec_springer = section_system["section_springer_material"][0]
        except Exception:
            return

        classes: Dict[str, List[str]] = {}
        try:
            classifications = sec_springer['springer_classification']
        except KeyError:
            pass
        else:
            classes["material_class_springer"] = classifications
        try:
            compound_classes = sec_springer['springer_compound_class']
        except KeyError:
            pass
        else:
            classes["compound_class_springer"] = compound_classes
        if classes:
            material.material_classification = json.dumps(classes)

594
    def structure_type(self, material: Material, section_system: Section) -> None:
595
596
        try:
            sec_prototype = section_system["section_prototype"][0]
597
            notes = sec_prototype.tmp['prototype_notes']
598
599
600
        except Exception:
            return

601
602
603
        # Only relevant information hidden in "notes" is handed over TODO:
        # review and eventually add more ****ites which are commonly used
        # (see wurzite)
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
        note_map = {
            "CaTiO<sub>3</sub> Pnma Perovskite Structure": "perovskite",
            "Hypothetical Tetrahedrally Bonded Carbon with 4&ndash;Member Rings": "4-member ring",
            "In (A6) Structure": "fct",
            "$\\alpha$&ndash;Pa (A<sub>a</sub>) Structure": "bct",
            "Hypothetical BCT5 Si Structure": "bct5",
            "Wurtzite (ZnS, B4) Structure": "wurtzite",
            "Hexagonal Close Packed (Mg, A3) Structure": "hcp",
            "Half&ndash;Heusler (C1<sub>b</sub>) Structure": "half-Heusler",
            "Zincblende (ZnS, B3) Structure": "zincblende",
            "Cubic Perovskite (CaTiO<sub>3</sub>, E2<sub>1</sub>) Structure": "cubic perovskite",
            "$\\alpha$&ndash;Po (A<sub>h</sub>) Structure": "simple cubic",
            "Si<sub>46</sub> Clathrate Structure": "clathrate",
            "Cuprite (Cu<sub>2</sub>O, C3) Structure": "cuprite",
            "Heusler (L2<sub>1</sub>) Structure": "Heusler",
            "Rock Salt (NaCl, B1) Structure": "rock salt",
            "Face&ndash;Centered Cubic (Cu, A1) Structure": "fcc",
            "Diamond (A4) Structure": "diamond",
            "Body&ndash;Centered Cubic (W, A2) Structure": "bcc",
        }
        enc_note = note_map.get(notes, None)
        if enc_note is not None:
            material.structure_type = enc_note
627

628
    def structure_prototype(self, material: Material, section_system: Section) -> None:
629
        try:
630
631
            sec_prototype = section_system["section_prototype"][0]
            name = sec_prototype.tmp['prototype_name']
632
        except Exception:
633
634
635
636
            return

        material.structure_prototype = name

637
    def strukturbericht_designation(self, material: Material, section_system: Section) -> None:
638
639
640
641
642
643
        try:
            sec_prototype = section_system["section_prototype"][0]
            strukturbericht = sec_prototype.tmp["strukturbericht_designation"]
        except Exception:
            return

644
645
        # In the current GUI we replace LaTeX with plain text
        strukturbericht = re.sub('[$_{}]', '', strukturbericht)
646
        material.strukturbericht_designation = strukturbericht
647

648
    def wyckoff_sets(self, material: Material, wyckoff_sets: Dict) -> None:
649
        for group in wyckoff_sets:
650
            wset = material.m_create(WyckoffSet)
651
652
653
654
655
656
657
658
            if group.x is not None or group.y is not None or group.z is not None:
                variables = wset.m_create(WyckoffVariables)
                if group.x is not None:
                    variables.x = float(group.x)
                if group.y is not None:
                    variables.y = float(group.y)
                if group.z is not None:
                    variables.z = float(group.z)
659
660
661
            wset.indices = group.indices
            wset.element = group.element
            wset.wyckoff_letter = group.wyckoff_letter
662

663
    def normalize(self, ctx: Context) -> None:
664
        # Fetch resources
Lauri Himanen's avatar
Lauri Himanen committed
665
        sec_system = ctx.representative_system
666
        sec_enc = self.backend.get_mi2_section(Encyclopedia.m_def)
667
        material = sec_enc.material
668
        properties = sec_enc.properties
669
670
        sec_symmetry = sec_system["section_symmetry"][0]
        symmetry_analyzer = sec_system["section_symmetry"][0].tmp["symmetry_analyzer"]
671
        spg_number = symmetry_analyzer.get_space_group_number()
672
673
        std_atoms = symmetry_analyzer.get_conventional_system()
        prim_atoms = symmetry_analyzer.get_primitive_system()
674
        repr_atoms = sec_system.tmp["representative_atoms"]  # Temporary value stored by SystemNormalizer
675
        wyckoff_sets = symmetry_analyzer.get_wyckoff_sets_conventional(return_parameters=True)
676
677
        names, counts = structure.get_hill_decomposition(prim_atoms.get_chemical_symbols(), reduced=False)
        greatest_common_divisor = reduce(gcd, counts)
678
        ctx.greatest_common_divisor = greatest_common_divisor
679
680
681
        reduced_counts = np.array(counts) / greatest_common_divisor

        # Fill structural information
682
        self.mass_density(properties, repr_atoms)
683
        self.material_hash(material, spg_number, wyckoff_sets)
684
685
        self.number_of_atoms(material, std_atoms)
        self.atom_labels(material, std_atoms)
686
        self.atom_positions(material, std_atoms)
687
        self.atomic_density(properties, repr_atoms)
688
689
        self.bravais_lattice(material, sec_symmetry)
        self.cell_normalized(material, std_atoms)
690
        self.cell_volume(material, std_atoms)
691
692
693
694
695
        self.crystal_system(material, sec_symmetry)
        self.cell_primitive(material, prim_atoms)
        self.formula(material, names, counts)
        self.formula_reduced(material, names, reduced_counts)
        self.has_free_wyckoff_parameters(material, symmetry_analyzer)
696
        self.lattice_parameters(material, std_atoms)
697
        self.material_name(material, names, reduced_counts)
698
        self.material_classification(material, sec_system)
699
700
        self.periodicity(material)
        self.point_group(material, sec_symmetry)
701
        self.space_group_number(material, spg_number)
702
703
        self.space_group_international_short_symbol(material, symmetry_analyzer)
        self.structure_type(material, sec_system)
704
705
        self.structure_prototype(material, sec_system)
        self.strukturbericht_designation(material, sec_system)
706
        self.wyckoff_sets(material, wyckoff_sets)
Lauri Himanen's avatar
Lauri Himanen committed
707
708


709
class Material2DNormalizer(MaterialNormalizer):
Lauri Himanen's avatar
Lauri Himanen committed
710
711
    """Processes structure related metainfo for Encyclopedia 2D structures.
    """
712
    def cell_normalized(self, material: Material, std_atoms: Atoms) -> None:
713
714
715
716
        cell_normalized = std_atoms.get_cell()
        cell_normalized *= 1e-10
        material.cell_normalized = cell_normalized

717
    def cell_primitive(self, material: Material, prim_atoms: Atoms) -> None:
718
719
720
721
        cell_prim = prim_atoms.get_cell()
        cell_prim *= 1e-10
        material.cell_primitive = cell_prim

722
    def lattice_parameters(self, material: Material, std_atoms: Atoms, periodicity: np.array) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
723
        # 2D systems only have three lattice parameter: two length and angle between them
724
        periodic_indices = np.where(np.array(periodicity) == True)[0]  # noqa: E712
Lauri Himanen's avatar
Lauri Himanen committed
725
726
727
728
729
730
731
        cell = std_atoms.get_cell()
        a_vec = cell[periodic_indices[0], :] * 1e-10
        b_vec = cell[periodic_indices[1], :] * 1e-10
        a = np.linalg.norm(a_vec)
        b = np.linalg.norm(b_vec)
        alpha = np.clip(np.dot(a_vec, b_vec) / (a * b), -1.0, 1.0)
        alpha = np.arccos(alpha)
732
        material.lattice_parameters = np.array([a, b, 0.0, alpha, 0.0, 0.0])
733

Lauri Himanen's avatar
Lauri Himanen committed
734
    def periodicity(self, material: Material, std_atoms: Atoms) -> None:
735
736
        # MatID already provides the correct periodicity
        material.periodicity = std_atoms.get_pbc()
737

738
    def get_symmetry_analyzer(self, original_system: Atoms) -> SymmetryAnalyzer:
739
740
741
742
743
        # Get dimension of system by also taking into account the covalent radii
        dimensions = matid.geometry.get_dimensions(original_system, [True, True, True])
        basis_dimensions = np.linalg.norm(original_system.get_cell(), axis=1)
        gaps = basis_dimensions - dimensions
        periodicity = gaps <= config.normalize.cluster_threshold
744
745

        # If two axis are not periodic, return. This only happens if the vacuum
746
747
748
        # gap is not aligned with a cell vector or if the linear gap search is
        # unsufficient (the structure is "wavy" making also the gap highly
        # nonlinear).
749
        if sum(periodicity) != 2:
750
            raise ValueError("Could not detect the periodic dimensions in a 2D system.")
751
752
753
754
755
756
757
758
759
760
761

        # Center the system in the non-periodic direction, also taking
        # periodicity into account. The get_center_of_mass()-function in MatID
        # takes into account periodicity and can produce the correct CM unlike
        # the similar function in ASE.
        pbc_cm = matid.geometry.get_center_of_mass(original_system)
        cell_center = 0.5 * np.sum(original_system.get_cell(), axis=0)
        translation = cell_center - pbc_cm
        translation[periodicity] = 0
        symm_system = original_system.copy()
        symm_system.translate(translation)
762
        symm_system.wrap()
763
764
765

        # Set the periodicity according to detected periodicity in order for
        # SymmetryAnalyzer to use the symmetry analysis designed for 2D
766
767
768
769
770
771
772
773
774
        # systems.
        symm_system.set_pbc(periodicity)
        symmetry_analyzer = SymmetryAnalyzer(
            symm_system,
            config.normalize.symmetry_tolerance,
            config.normalize.flat_dim_threshold
        )
        return symmetry_analyzer

775
    def normalize(self, ctx: Context) -> None:
776
777
778
        # Fetch resources
        sec_enc = self.backend.get_mi2_section(Encyclopedia.m_def)
        material = sec_enc.material
Lauri Himanen's avatar
Lauri Himanen committed
779
        repr_atoms = ctx.representative_system.tmp["representative_atoms"]  # Temporary value stored by SystemNormalizer
780
        symmetry_analyzer = self.get_symmetry_analyzer(repr_atoms)
781
782
        spg_number = symmetry_analyzer.get_space_group_number()
        wyckoff_sets = symmetry_analyzer.get_wyckoff_sets_conventional(return_parameters=False)
783
784
785
786
        std_atoms = symmetry_analyzer.get_conventional_system()
        prim_atoms = symmetry_analyzer.get_primitive_system()
        names, counts = structure.get_hill_decomposition(prim_atoms.get_chemical_symbols(), reduced=False)
        greatest_common_divisor = reduce(gcd, counts)
787
        ctx.greatest_common_divisor = greatest_common_divisor
788
789
790
        reduced_counts = np.array(counts) / greatest_common_divisor

        # Fill metainfo
Lauri Himanen's avatar
Lauri Himanen committed
791
        self.periodicity(material, std_atoms)
792
        self.material_hash(material, spg_number, wyckoff_sets)
793
794
795
796
797
798
799
        self.number_of_atoms(material, std_atoms)
        self.atom_labels(material, std_atoms)
        self.atom_positions(material, std_atoms)
        self.cell_normalized(material, std_atoms)
        self.cell_primitive(material, prim_atoms)
        self.formula(material, names, counts)
        self.formula_reduced(material, names, reduced_counts)
800
        self.lattice_parameters(material, std_atoms, material.periodicity)
801
802


803
class Material1DNormalizer(MaterialNormalizer):
804
805
    """Processes structure related metainfo for Encyclopedia 1D structures.
    """
806
    def material_hash_1d(self, material: Material, prim_atoms: Atoms) -> None:
807
808
809
810
        """Hash to be used as identifier for a material. Different 1D
        materials are defined by their Coulomb matrix eigenvalues and their
        Hill formulas.
        """
811
812
        fingerprint = self.get_structure_fingerprint(prim_atoms)
        formula = material.formula
813
814
815
816
        id_strings = []
        id_strings.append(formula)
        id_strings.append(fingerprint)
        hash_seed = ", ".join(id_strings)
817
        hash_val = hash(hash_seed)
818
        material.material_hash = hash_val
819

820
    def cell_normalized(self, material: Material, std_atoms: Atoms) -> None:
821
822
823
824
        cell_normalized = std_atoms.get_cell()
        cell_normalized *= 1e-10
        material.cell_normalized = cell_normalized

825
    def lattice_parameters(self, material: Material, std_atoms: Atoms, periodicity: np.array) -> None:
826
        # 1D systems only have one lattice parameter: length in periodic dimension
827
        periodic_indices = np.where(np.array(periodicity) == True)[0]  # noqa: E712
828
829
        cell = std_atoms.get_cell()
        a = np.linalg.norm(cell[periodic_indices[0], :]) * 1e-10
830
        material.lattice_parameters = np.array([a, 0.0, 0.0, 0.0, 0.0, 0.0])
831
832

    def periodicity(self, material: Material, prim_atoms: Atoms) -> None:
833
834
835
836
837
        # Get dimension of system by also taking into account the covalent radii
        dimensions = matid.geometry.get_dimensions(prim_atoms, [True, True, True])
        basis_dimensions = np.linalg.norm(prim_atoms.get_cell(), axis=1)
        gaps = basis_dimensions - dimensions
        periodicity = gaps <= config.normalize.cluster_threshold
838

839
        # If one axis is not periodic, return. This only happens if the vacuum
840
        # gap is not aligned with a cell vector.
841
        if sum(periodicity) != 1:
842
            raise ValueError("Could not detect the periodic dimensions in a 1D system.")
843

844
        material.periodicity = periodicity
845

846
    def get_structure_fingerprint(self, prim_atoms: Atoms) -> str:
847
848
        """Calculates a numeric fingerprint that coarsely encodes the atomic
        positions and species.
849
850

        The fingerprint is based on calculating a discretized version of a
851
852
853
854
855
856
857
        sorted Coulomb matrix eigenspectrum (Grégoire Montavon, Katja Hansen,
        Siamac Fazli, Matthias Rupp, Franziska Biegler, Andreas Ziehe,
        Alexandre Tkatchenko, Anatole V. Lilienfeld, and Klaus-Robert Müller.
        Learning invariant representations of molecules for atomization energy
        prediction. In F. Pereira, C. J. C. Burges, L. Bottou, and K. Q.
        Weinberger, editors, Advances in Neural Information Processing Systems
        25, pages 440–448. Curran Associates, Inc., 2012.).
858
859
860
861

        The fingerprints are discretized in order to perform O(n) matching
        between structures (no need to compare fingerprints against each
        other). As regular discretization is susceptible to the "edge problem",
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
        a robust discretization is used instead (Birget, Jean-Camille & Hong,
        Dawei & Memon, Nasir. (2003). Robust discretization, with an
        application to graphical passwords. IACR Cryptology ePrint Archive.
        2003. 168.) Basically for the 1-dimensional domain two grids are
        created and the points are mapped to the first grid in which they are
        robust using a minimum tolerance parameter r, with the maximum
        tolerance being 5r.

        There are other robust discretization methods that can guarantee exact
        r-tolerance (e.g. Sonia Chiasson, Jayakumar Srinivasan, Robert Biddle,
        and P. C. van Oorschot. 2008. Centered discretization with application
        to graphical passwords. In Proceedings of the 1st Conference on
        Usability, Psychology, and Security (UPSEC’08). USENIX Association,
        USA, Article 6, 1–9.). This method however requires that a predefined
        "correct" structure exists against which the search is done.

        Args:
            prim_atoms: Primitive system.

        Returns:
            The numeric fingerprint for the system encoded as a string.
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
        """
        # Calculate charge part
        q = prim_atoms.get_atomic_numbers()
        qiqj = np.sqrt(q[None, :] * q[:, None])

        # Calculate distance part. Notice that the minimum image convention
        # must be used. Without it, differently oriented atoms in the same cell
        # may be detected as the same material.
        pos = prim_atoms.get_positions()
        cell = prim_atoms.get_cell()
        cmat = 10 - matid.geometry.get_distance_matrix(pos, pos, cell, pbc=True, mic=True)
        cmat = np.clip(cmat, a_min=0, a_max=None)
        np.fill_diagonal(cmat, 0)
        cmat = qiqj * cmat

        # Calculate eigenvalues
        eigval, _ = np.linalg.eigh(cmat)

        # Sort eigenvalues
        eigval = np.array(sorted(eigval))

904
905
906
907
908
        # Perform robust discretization (see function docstring for details). r
        # = 0.5 ensures that all grids are integers which can be uniquely
        # mapped to strings. If finer grid is needed adjust the eigenvalue scale
        # instead.
        eigval /= 25  # Go to smaller scale where integer numbers are meaningful
909
910
911
912
913
914
915
916
917
        dimension = 1
        r = 0.5
        spacing = 2 * r * (dimension + 1)
        phi_k = 2 * r * np.array(range(dimension + 1))
        t = np.mod((eigval[None, :] + phi_k[:, None]), (2 * r * (dimension + 1)))
        grid_mask = (r <= t) & (t < r * (2 * dimension + 1))
        safe_grid_k = np.argmax(grid_mask == True, axis=0)   # noqa: E712
        discretization = spacing * np.floor((eigval + (2 * r * safe_grid_k)) / spacing)
        discretization[safe_grid_k == 1] += 2 * r
918

919
        # Form string
920
        strings = []
921
        for number in discretization:
922
            num_str = str(int(number))
923
            strings.append(num_str)
924
        fingerprint = ";".join(strings)
925
926
927
928

        return fingerprint

    def get_symmetry_analyzer(self, original_system: Atoms) -> SymmetryAnalyzer:
929
        """For 1D systems the symmetry is analyzed from the original system
930
931
932
933
934
935
936
937
938
939
        with enforced full periodicity.

        Args:
            original_system: The original simulation system.

        Returns:
            The SymmetryAnalyzer that is instantiated with the original system.
        """
        symm_system = original_system.copy()
        symm_system.set_pbc(True)
940
941
942
943
944
945
        symmetry_analyzer = SymmetryAnalyzer(
            symm_system,
            config.normalize.symmetry_tolerance,
            config.normalize.flat_dim_threshold
        )

946
        return symmetry_analyzer
947

948
949
950
951
    def get_std_atoms(self, periodicity: np.array, prim_atoms: Atoms) -> Atoms:
        """For 1D systems the standardized system is based on a primitive
        system. This primitive system is translated to the center of mass and
        the non-periodic dimensions are minimized so that the atoms just fit.
Lauri Himanen's avatar
Lauri Himanen committed
952

953
954
955
956
        Args:
            periodicity: List of periodic indices, in 1D case a list containing
                one index.
            prim_atoms: Primitive system
Lauri Himanen's avatar
Lauri Himanen committed
957

958
959
960
961
962
        Returns
            Standardized structure that represents this material and from which
            the material hash will be constructed from.
        """
        std_atoms = prim_atoms.copy()
963

964
965
966
967
968
969
970
971
972
973
974
975
976
        # Translate to center of mass
        pbc_cm = matid.geometry.get_center_of_mass(prim_atoms)
        cell_center = 0.5 * np.sum(std_atoms.get_cell(), axis=0)
        translation = cell_center - pbc_cm
        translation[periodicity] = 0
        std_atoms.translate(translation)
        std_atoms.wrap()

        # Reduce cell size to just fit the system in the non-periodic dimensions.
        pos = std_atoms.get_scaled_positions(wrap=False)
        cell = std_atoms.get_cell()
        new_cell = np.array(cell)
        translation = np.zeros(3)
977
978
979
980
981
982
        for index, periodic in enumerate(periodicity):
            if not periodic:
                imin = np.min(pos[:, index])
                imax = np.max(pos[:, index])
                translation -= cell[index, :] * imin
                new_cell[index] = cell[index, :] * (imax - imin)
983
984
985
986
        std_atoms.translate(translation)
        std_atoms.set_cell(new_cell)

        return std_atoms
Lauri Himanen's avatar
Lauri Himanen committed
987

988
    def normalize(self, ctx: Context) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
989
        # Fetch resources
Lauri Himanen's avatar
Lauri Himanen committed
990
        sec_system = ctx.representative_system
991
        sec_enc = self.backend.get_mi2_section(Encyclopedia.m_def)
Lauri Himanen's avatar
Lauri Himanen committed
992
        material = sec_enc.material
993
        repr_atoms = sec_system.tmp["representative_atoms"]  # Temporary value stored by SystemNormalizer
994
995
        symmetry_analyzer = self.get_symmetry_analyzer(repr_atoms)
        prim_atoms = symmetry_analyzer.get_primitive_system()
996
        prim_atoms.set_pbc(True)
Lauri Himanen's avatar
Lauri Himanen committed
997
998
        names, counts = structure.get_hill_decomposition(prim_atoms.get_chemical_symbols(), reduced=False)
        greatest_common_divisor = reduce(gcd, counts)
999
        ctx.greatest_common_divisor = greatest_common_divisor
Lauri Himanen's avatar
Lauri Himanen committed
1000
        reduced_counts = np.array(counts) / greatest_common_divisor
For faster browsing, not all history is shown. View entire blame