encyclopedia.py 76.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
from typing import Dict, List, Any
16
17
from math import gcd as gcd
from functools import reduce
18
from abc import abstractmethod
19
from collections import OrderedDict
20
import re
21
import json
22
import ase
23
import ase.data
24
from ase import Atoms
25
import numpy as np
26
from matid import SymmetryAnalyzer
27
import matid.geometry
28

29
30
from nomad.normalizing.normalizer import (
    Normalizer,
31
    s_run,
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
    s_scc,
    s_system,
    s_method,
    s_frame_sequence,
    r_frame_sequence_to_sampling,
    s_sampling_method,
    r_frame_sequence_local_frames,
)
from nomad.metainfo.encyclopedia import (
    Encyclopedia,
    Material,
    Method,
    Properties,
    RunType,
    WyckoffSet,
    WyckoffVariables,
    ElectronicBandStructure,
    BandGap,
)
51
from nomad.parsing.backend import Section, LocalBackend
52
from nomad.normalizing.settingsbasisset import get_basis_set_settings
53
from nomad.normalizing import structure
Lauri Himanen's avatar
Lauri Himanen committed
54
from nomad.utils import hash, RestrictedDict
55
from nomad import config
56

57
58
J_to_Ry = 4.587425e+17

59

Lauri Himanen's avatar
Lauri Himanen committed
60
61
62
63
64
65
66
67
68
69
class Context():
    """A simple class for holding the context related to an Encylopedia entry.
    """
    def __init__(
        self,
        system_type: str,
        method_type: str,
        run_type: str,
        representative_system,
        representative_method,
70
        representative_scc,
71
        representative_scc_idx,
Lauri Himanen's avatar
Lauri Himanen committed
72
73
74
75
76
77
    ):
        self.system_type = system_type
        self.method_type = method_type
        self.run_type = run_type
        self.representative_system = representative_system
        self.representative_method = representative_method
78
        self.representative_scc = representative_scc
79
        self.representative_scc_idx = representative_scc_idx
80
        self.greatest_common_divisor: int = None
Lauri Himanen's avatar
Lauri Himanen committed
81
82


83
84
85
86
class EncyclopediaNormalizer(Normalizer):
    """
    This normalizer emulates the functionality of the old Encyclopedia backend.
    The data used by the encyclopedia have been assigned under new metainfo
87
88
    within a new section called "Encyclopedia". In the future these separate
    metainfos could be absorbed into the existing metainfo hiearchy.
89
    """
90
    def __init__(self, backend: LocalBackend):
91
        super().__init__(backend)
92
        self.backend: LocalBackend = backend
93

94
    def run_type(self, run_type_sec: RunType) -> str:
95
96
97
        """Decides what type of calculation this is: single_point, md,
        geometry_optimization, etc.
        """
98
        run_enums = RunType.run_type.type
99
100
        run_type = run_enums.unavailable

101
102
103
104
105
106
107
108
109
110
111
112
        try:
            sccs = self._backend[s_scc]
        except Exception:
            sccs = []
        try:
            frame_sequences = self._backend[s_frame_sequence]
        except Exception:
            frame_sequences = []

        n_scc = len(sccs)
        n_frame_seq = len(frame_sequences)

113
114
        # No sequences, only a few calculations
        if n_scc <= 3 and n_frame_seq == 0:
115
116
117
            program_name = self._backend["program_name"]
            if program_name == "elastic":
                # TODO move to taylor expansion as soon as data is correct in archive
118
                run_type = run_enums.elastic_constants
119
            else:
120
                run_type = run_enums.single_point
121

122
123
124
125
        # One sequence. Currently calculations with multiple sequences are
        # unsupported.
        elif n_frame_seq == 1:
            frame_seq = frame_sequences[0]
126
127
128
129
130
131
132

            # See if sampling_method is present
            try:
                i_sampling_method = frame_seq[r_frame_sequence_to_sampling]
            except KeyError:
                self.logger.info(
                    "Cannot determine encyclopedia run type because missing "
133
                    "value for frame_sequence_to_sampling_ref."
134
135
136
137
138
139
140
141
142
                )
                return run_type

            # See if local frames are present
            try:
                frames = frame_seq[r_frame_sequence_local_frames]
            except KeyError:
                self.logger.info(
                    "section_frame_sequence_local_frames not found although a "
143
                    "frame_sequence exists."
144
145
146
                )
                return run_type
            if len(frames) == 0:
147
                self.logger.info("No frames referenced in section_frame_sequence_local_frames.")
148
149
                return run_type

150
            section_sampling_method = self._backend[s_sampling_method][i_sampling_method]
151
            sampling_method = section_sampling_method["sampling_method"]
152
153

            if sampling_method == "molecular_dynamics":
154
                run_type = run_enums.molecular_dynamics
155
            if sampling_method == "geometry_optimization":
156
                run_type = run_enums.geometry_optimization
157
            if sampling_method == "taylor_expansion":
158
                run_type = run_enums.phonon_calculation
159

160
        run_type_sec.run_type = run_type
161
        return run_type
162

163
    def system_type(self, material: Material) -> tuple:
164
        # Try to fetch representative system
165
        system = None
166
        system_type = config.services.unavailable_value
167
        system_enums = Material.system_type.type
168
169
170
171
172
173
174
175
176
        system_idx = self._backend["section_run"][0].tmp["representative_system_idx"]
        if system_idx is not None:
            # Try to find system type information from backend for the selected system.
            try:
                system = self._backend[s_system][system_idx]
                stype = system["system_type"]
            except KeyError:
                pass
            else:
177
                if stype == system_enums.one_d or stype == system_enums.two_d:
178
                    system_type = stype
179
180
181
182
183
184
185
186
                # For bulk systems we also ensure that the symmetry information is available
                if stype == system_enums.bulk:
                    try:
                        system["section_symmetry"][0]
                    except (KeyError, IndexError):
                        self.logger.info("Symmetry information is not available for a bulk system. No Encylopedia entry created.")
                    else:
                        system_type = stype
187

188
189
        material.system_type = system_type
        return system, system_type
190

191
    def method_type(self, method: Method) -> tuple:
192
193
194
195
196
197
        repr_method = None
        method_id = config.services.unavailable_value
        methods = self._backend[s_method]
        n_methods = len(methods)

        if n_methods == 1:
198
199
            repr_method = methods[0]
            method_id = repr_method.get("electronic_structure_method", config.services.unavailable_value)
200
201
202
203
204
205
        elif n_methods > 1:
            for sec_method in self._backend[s_method]:
                # GW
                electronic_structure_method = sec_method.get("electronic_structure_method", None)
                if electronic_structure_method in {"G0W0", "scGW"}:
                    repr_method = sec_method
206
                    method_id = "GW"
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
                    break

                # Methods linked to each other through references. Get all
                # linked methods, try to get electronic_structure_method from
                # each.
                try:
                    refs = sec_method["section_method_to_method_refs"]
                except KeyError:
                    pass
                else:
                    linked_methods = [sec_method]
                    for ref in refs:
                        method_to_method_kind = ref["method_to_method_kind"]
                        method_to_method_ref = ref["method_to_method_ref"]
                        if method_to_method_kind == "core_settings":
                            linked_methods.append(methods[method_to_method_ref])

224
                    for i_method in linked_methods:
225
                        try:
226
                            electronic_structure_method = i_method["electronic_structure_method"]
227
228
229
230
231
232
                        except KeyError:
                            pass
                        else:
                            repr_method = sec_method
                            method_id = electronic_structure_method

233
        method.method_type = method_id
234
        return repr_method, method_id
235

236
    def mainfile_uri(self, encyclopedia: Encyclopedia):
237
238
239
240
        entry_info = self._backend["section_entry_info"][0]
        upload_id = entry_info["upload_id"]
        mainfile_path = entry_info["mainfile"]
        uri = f"nmd://R{upload_id}/data/{mainfile_path}"
241
        encyclopedia.mainfile_uri = uri
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266

    # def similar_materials(self) -> None:
        # pass

    # def calculation_pid(self):
        # pass

    # def calculation(self) -> None:
        # pass

    # def contributor_first_name(self) -> None:
        # pass

    # def contributor_last_name(self) -> None:
        # pass

    # def contributor_type(self) -> None:
        # pass

    # def contributors(self) -> None:
        # pass

    # def number_of_calculations(self) -> None:
        # pass

267
    def fill(self, ctx: Context):
268
        # Fill structure related metainfo
269
        struct: Any = None
Lauri Himanen's avatar
Lauri Himanen committed
270
        if ctx.system_type == Material.system_type.type.bulk:
271
            struct = MaterialBulkNormalizer(self.backend, self.logger)
Lauri Himanen's avatar
Lauri Himanen committed
272
        elif ctx.system_type == Material.system_type.type.two_d:
273
            struct = Material2DNormalizer(self.backend, self.logger)
Lauri Himanen's avatar
Lauri Himanen committed
274
        elif ctx.system_type == Material.system_type.type.one_d:
275
            struct = Material1DNormalizer(self.backend, self.logger)
Lauri Himanen's avatar
Lauri Himanen committed
276
        if struct is not None:
277
            struct.normalize(ctx)
278

279
        # Fill method related metainfo
280
        method = None
281
        if ctx.method_type == Method.method_type.type.DFT or ctx.method_type == Method.method_type.type.DFTU:
282
            method = MethodDFTNormalizer(self._backend, self.logger)
283
        elif ctx.method_type == Method.method_type.type.GW:
284
            method = MethodGWNormalizer(self._backend, self.logger)
285
        if method is not None:
286
            method.normalize(ctx)
287

288
        # Fill properties related metainfo
289
        properties = PropertiesNormalizer(self.backend, self.logger)
290
291
        properties.normalize(ctx)

292
    def normalize(self, logger=None) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
        """The caller will automatically log if the normalizer succeeds or ends
        up with an exception.
        """
        try:
            super().normalize(logger)

            # Initialise metainfo structure
            sec_enc = Encyclopedia()
            material = sec_enc.m_create(Material)
            method = sec_enc.m_create(Method)
            sec_enc.m_create(Properties)
            run_type = sec_enc.m_create(RunType)

            # Get generic data
            self.mainfile_uri(sec_enc)

            # Determine run type, stop if unknown
            run_type_name = self.run_type(run_type)
            if run_type_name == config.services.unavailable_value:
                self.logger.info(
                    "Unsupported run type for encyclopedia, encyclopedia metainfo not created.",
                    enc_status="unsupported_run_type",
315
                )
Lauri Himanen's avatar
Lauri Himanen committed
316
                return
317

Lauri Himanen's avatar
Lauri Himanen committed
318
319
320
321
322
323
324
325
326
            # Get the system type, stop if unknown
            system_enums = Material.system_type.type
            representative_system, system_type = self.system_type(material)
            if system_type != system_enums.bulk and system_type != system_enums.two_d and system_type != system_enums.one_d:
                self.logger.info(
                    "Unsupported system type for encyclopedia, encyclopedia metainfo not created.",
                    enc_status="unsupported_system_type",
                )
                return
327

Lauri Himanen's avatar
Lauri Himanen committed
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
            # Get the method type, stop if unknown
            representative_method, method_type = self.method_type(method)

            # Get representative scc
            try:
                representative_scc_idx = self._backend[s_run][0].tmp["representative_scc_idx"]
                representative_scc = self._backend[s_scc][representative_scc_idx]
            except (KeyError, IndexError):
                representative_scc = None
                representative_scc_idx = None

            # Create one context that holds all details
            context = Context(
                system_type=system_type,
                method_type=method_type,
                run_type=run_type_name,
                representative_system=representative_system,
                representative_method=representative_method,
                representative_scc=representative_scc,
                representative_scc_idx=representative_scc_idx,
            )

            # Put the encyclopedia section into backend
            self._backend.add_mi2_section(sec_enc)
            self.fill(context)
        except Exception:
            self.logger.error(
                "Failed to create an Encyclopedia entry due to an unhandlable exception.",
                enc_status="failure",
            )
            raise  # Reraise for the caller to log the exception as well
        else:
            self.logger.info(
                "Successfully created metainfo for Encyclopedia.",
                enc_status="success",
            )
364
365


366
class MaterialNormalizer():
367
    """A base class that is used for processing material-related information
368
369
    in the Encylopedia.
    """
370
    def __init__(self, backend: LocalBackend, logger):
371
372
373
        self.backend = backend
        self.logger = logger

374
    def atom_labels(self, material: Material, std_atoms: Atoms) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
375
        material.atom_labels = std_atoms.get_chemical_symbols()
376

377
    def atom_positions(self, material: Material, std_atoms: Atoms) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
378
        material.atom_positions = std_atoms.get_scaled_positions(wrap=False)
379
380

    @abstractmethod
381
    def cell_normalized(self, material: Material, std_atoms: Atoms) -> None:
382
        pass
383

384
    def cell_volume(self, material: Material, std_atoms: Atoms) -> None:
385
        material.cell_volume = float(std_atoms.get_volume() * 1e-10**3)
386

Lauri Himanen's avatar
Lauri Himanen committed
387
388
389
    def formula(self, material: Material, names: List[str], counts: List[int]) -> None:
        formula = structure.get_formula_string(names, counts)
        material.formula = formula
390

391
    def formula_reduced(self, material: Material, names: list, counts_reduced: list) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
392
393
        formula = structure.get_formula_string(names, counts_reduced)
        material.formula_reduced = formula
394

395
    def material_hash(self, material: Material, spg_number: int, wyckoff_sets: List[WyckoffSet]) -> None:
396
        # Create and store hash based on SHA512
397
        norm_hash_string = structure.get_symmetry_string(spg_number, wyckoff_sets)
398
        material.material_hash = hash(norm_hash_string)
399

400
    def number_of_atoms(self, material: Material, std_atoms: Atoms) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
401
        material.number_of_atoms = len(std_atoms)
402

Lauri Himanen's avatar
Lauri Himanen committed
403
    @abstractmethod
404
    def normalize(self, ctx: Context) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
405
406
        pass

407

408
class MaterialBulkNormalizer(MaterialNormalizer):
409
410
    """Processes structure related metainfo for Encyclopedia bulk structures.
    """
411
    def atomic_density(self, properties: Properties, repr_system: Atoms) -> None:
412
413
        orig_n_atoms = len(repr_system)
        orig_volume = repr_system.get_volume() * (1e-10)**3
414
        properties.atomic_density = float(orig_n_atoms / orig_volume)
415

416
    def bravais_lattice(self, material: Material, section_symmetry: Section) -> None:
417
        bravais_lattice = section_symmetry["bravais_lattice"]
418
419
        material.bravais_lattice = bravais_lattice

420
    def cell_normalized(self, material: Material, std_atoms: Atoms) -> None:
421
422
423
424
        cell_normalized = std_atoms.get_cell()
        cell_normalized *= 1e-10
        material.cell_normalized = cell_normalized

425
    def cell_primitive(self, material: Material, prim_atoms: Atoms) -> None:
426
427
428
429
        cell_prim = prim_atoms.get_cell()
        cell_prim *= 1e-10
        material.cell_primitive = cell_prim

430
    def crystal_system(self, material: Material, section_symmetry: Section) -> None:
431
432
        material.crystal_system = section_symmetry["crystal_system"]

433
434
435
436
    def has_free_wyckoff_parameters(self, material: Material, symmetry_analyzer: SymmetryAnalyzer) -> None:
        has_free_param = symmetry_analyzer.get_has_free_wyckoff_parameters()
        material.has_free_wyckoff_parameters = has_free_param

437
    def lattice_parameters(self, material: Material, std_atoms: Atoms) -> None:
438
439
        cell_normalized = std_atoms.get_cell() * 1E-10
        material.lattice_parameters = structure.get_lattice_parameters(cell_normalized)
440

441
    def mass_density(self, properties: Properties, repr_system: Atoms) -> None:
442
        mass = structure.get_summed_atomic_mass(repr_system.get_atomic_numbers())
443
        orig_volume = repr_system.get_volume() * (1e-10)**3
444
        properties.mass_density = float(mass / orig_volume)
445

446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
    def material_name(self, material: Material, symbols: list, numbers: list) -> None:
        # Systems with one element are named after it
        if len(symbols) == 1:
            number = ase.data.atomic_numbers[symbols[0]]
            name = ase.data.atomic_names[number]
            material.material_name = name

        # Binary systems have specific names
        if len(symbols) == 2:
            atomicnumbers = [ase.data.atomic_numbers[i] for i in symbols]
            names = [ase.data.atomic_names[i] for i in atomicnumbers]

            # Non-metal elements are anions in the binary compounds and receive the -ide suffix
            if names[1] == "Antimony":
                names[1] = names[1][:-1] + "ide"
            if names[1] == "Arsenic":
                names[1] = names[1][:-1] + "de"
            if names[1] == "Boron" or names[1] == "Carbon":
                names[1] = names[1][:-2] + "ide"
            if names[1] == "Chlorine" or names[1] == "Germanium" or names[1] == "Selenium" or names[1] == "Bromine" \
               or names[1] == "Tellurium" or names[1] == "Iodine" or names[1] == "Polonium" or names[1] == "Astatine" or \
               names[1] == "Fluorine":
                names[1] = names[1][:-2] + "de"
            if names[1] == "Silicon" or names[1] == "Sulfur":
                names[1] = names[1][:-2] + "ide"
            if names[1] == "Nitrogen" or names[1] == "Oxygen" or names[1] == "Hydrogen" or names[1] == "Phosphorus":
                names[1] = names[1][:-4] + "ide"

            name = names[0] + " " + names[1]

            if names[1] == "Fluoride" or names[1] == "Chloride" or names[1] == "Bromide" or \
               names[1] == "Iodide" or names[1] == "Hydride":

                # Non-metals with elements of variable valence, therefore we remove alkaline and
                # alkaline-earth elements, which have fixed valence
                # Only the most electronegative non-metals are supposed to make ionic compounds
                if names[0] != "Lithium" and names[0] != "Sodium" and names[0] != "Potassium" and \
                   names[0] != "Rubidium" and names[0] != "Cesium" and names[0] != "Francium" and \
                   names[0] != "Beryllium" and names[0] != "Magnesium" and names[0] != "Calcium" and \
                   names[0] != "Strontium" and names[0] != "Barium" and names[0] != "Radium" and \
                   names[0] != "Aluminum":

                    if numbers[1] == 2:
                        name = names[0] + "(II)" + " " + names[1]
                    elif numbers[1] == 3:
                        name = names[0] + "(III)" + " " + names[1]
                    elif numbers[1] == 4:
                        name = names[0] + "(IV)" + " " + names[1]
                    elif numbers[1] == 5:
                        name = names[0] + "(V)" + " " + names[1]
                    elif numbers[1] == 6:
                        name = names[0] + "(VI)" + " " + names[1]
                    elif numbers[1] == 7:
                        name = names[0] + "(VII)" + " " + names[1]

            if names[1] == "Oxide" or names[1] == "Sulfide" or names[1] == "Selenide":
                if names[0] != "Lithium" and names[0] != "Sodium" and names[0] != "Potassium" and \
                   names[0] != "Rubidium" and names[0] != "Cesium" and names[0] != "Francium" and \
                   names[0] != "Beryllium" and names[0] != "Magnesium" and names[0] != "Calcium" and \
                   names[0] != "Strontium" and names[0] != "Barium" and names[0] != "Radium" and \
                   names[0] != "Aluminum":

                    if numbers[0] == 1 and numbers[1] == 1:
                        name = names[0] + "(II)" + " " + names[1]
                    elif numbers[0] == 2 and numbers[1] == 1:
                        name = names[0] + "(I)" + " " + names[1]
                    elif numbers[0] == 1 and numbers[1] == 2:
                        name = names[0] + "(IV)" + " " + names[1]
                    elif numbers[0] == 2 and numbers[1] == 3:
                        name = names[0] + "(III)" + " " + names[1]
                    elif numbers[0] == 2 and numbers[1] == 5:
                        name = names[0] + "(V)" + " " + names[1]
                    elif numbers[0] == 1 and numbers[1] == 3:
                        name = names[0] + "(VI)" + " " + names[1]
                    elif numbers[0] == 2 and numbers[1] == 7:
                        name = names[0] + "(VII)" + " " + names[1]

            if names[1] == "Nitride" or names[1] == "Phosphide":
                if names[0] != "Lithium" and names[0] != "Sodium" and names[0] != "Potassium" and \
                   names[0] != "Rubidium" and names[0] != "Cesium" and names[0] != "Francium" and \
                   names[0] != "Beryllium" and names[0] != "Magnesium" and names[0] != "Calcium" and \
                   names[0] != "Strontium" and names[0] != "Barium" and names[0] != "Radium" and \
                   names[0] != "Aluminum":

                    if numbers[0] == 1 and numbers[1] == 1:
                        name = names[0] + "(III)" + " " + names[1]
                    if numbers[0] == 1 and numbers[1] == 2:
                        name = names[0] + "(VI)" + " " + names[1]
                    elif numbers[0] == 3 and numbers[1] == 2:
                        name = names[0] + "(II)" + " " + names[1]
                    elif numbers[0] == 3 and numbers[1] == 4:
                        name = names[0] + "(IV)" + " " + names[1]
                    elif numbers[0] == 3 and numbers[1] == 5:
                        name = names[0] + "(V)" + " " + names[1]
                    elif numbers[0] == 3 and numbers[1] == 7:
                        name = names[0] + "(VII)" + " " + names[1]

            if names[1] == "Carbide":
                if names[0] != "Lithium" and names[0] != "Sodium" and names[0] != "Potassium" and \
                   names[0] != "Rubidium" and names[0] != "Cesium" and names[0] != "Francium" and \
                   names[0] != "Beryllium" and names[0] != "Magnesium" and names[0] != "Calcium" and \
                   names[0] != "Strontium" and names[0] != "Barium" and names[0] != "Radium" and \
                   names[0] != "Aluminum":

                    if numbers[0] == 1 and numbers[1] == 1:
                        name = names[0] + "(IV)" + " " + names[1]
                    if numbers[0] == 2 and numbers[1] == 1:
                        name = names[0] + "(II)" + " " + names[1]
                    if numbers[0] == 4 and numbers[1] == 1:
                        name = names[0] + "(I)" + " " + names[1]
                    if numbers[0] == 4 and numbers[1] == 3:
                        name = names[0] + "(III)" + " " + names[1]
                    if numbers[0] == 4 and numbers[1] == 5:
                        name = names[0] + "(V)" + " " + names[1]
                    if numbers[0] == 2 and numbers[1] == 3:
                        name = names[0] + "(VI)" + " " + names[1]
                    if numbers[0] == 4 and numbers[1] == 7:
                        name = names[0] + "(VII)" + " " + names[1]

            material.material_name = name

567
    def periodicity(self, material: Material) -> None:
568
        material.periodicity = np.array([True, True, True], dtype=np.bool)
569

570
    def point_group(self, material: Material, section_symmetry: Section) -> None:
571
572
        point_group = section_symmetry["point_group"]
        material.point_group = point_group
573

574
    def space_group_number(self, material: Material, spg_number: int) -> None:
575
576
577
578
579
580
        material.space_group_number = spg_number

    def space_group_international_short_symbol(self, material: Material, symmetry_analyzer: SymmetryAnalyzer) -> None:
        spg_int_symb = symmetry_analyzer.get_space_group_international_short()
        material.space_group_international_short_symbol = spg_int_symb

581
    def material_classification(self, material: Material, section_system: Section) -> None:
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
        try:
            sec_springer = section_system["section_springer_material"][0]
        except Exception:
            return

        classes: Dict[str, List[str]] = {}
        try:
            classifications = sec_springer['springer_classification']
        except KeyError:
            pass
        else:
            classes["material_class_springer"] = classifications
        try:
            compound_classes = sec_springer['springer_compound_class']
        except KeyError:
            pass
        else:
            classes["compound_class_springer"] = compound_classes
        if classes:
            material.material_classification = json.dumps(classes)

603
    def structure_type(self, material: Material, section_system: Section) -> None:
604
605
        try:
            sec_prototype = section_system["section_prototype"][0]
606
            notes = sec_prototype.tmp['prototype_notes']
607
608
609
        except Exception:
            return

610
611
612
        # Only relevant information hidden in "notes" is handed over TODO:
        # review and eventually add more ****ites which are commonly used
        # (see wurzite)
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
        note_map = {
            "CaTiO<sub>3</sub> Pnma Perovskite Structure": "perovskite",
            "Hypothetical Tetrahedrally Bonded Carbon with 4&ndash;Member Rings": "4-member ring",
            "In (A6) Structure": "fct",
            "$\\alpha$&ndash;Pa (A<sub>a</sub>) Structure": "bct",
            "Hypothetical BCT5 Si Structure": "bct5",
            "Wurtzite (ZnS, B4) Structure": "wurtzite",
            "Hexagonal Close Packed (Mg, A3) Structure": "hcp",
            "Half&ndash;Heusler (C1<sub>b</sub>) Structure": "half-Heusler",
            "Zincblende (ZnS, B3) Structure": "zincblende",
            "Cubic Perovskite (CaTiO<sub>3</sub>, E2<sub>1</sub>) Structure": "cubic perovskite",
            "$\\alpha$&ndash;Po (A<sub>h</sub>) Structure": "simple cubic",
            "Si<sub>46</sub> Clathrate Structure": "clathrate",
            "Cuprite (Cu<sub>2</sub>O, C3) Structure": "cuprite",
            "Heusler (L2<sub>1</sub>) Structure": "Heusler",
            "Rock Salt (NaCl, B1) Structure": "rock salt",
            "Face&ndash;Centered Cubic (Cu, A1) Structure": "fcc",
            "Diamond (A4) Structure": "diamond",
            "Body&ndash;Centered Cubic (W, A2) Structure": "bcc",
        }
        enc_note = note_map.get(notes, None)
        if enc_note is not None:
            material.structure_type = enc_note
636

637
    def structure_prototype(self, material: Material, section_system: Section) -> None:
638
        try:
639
640
            sec_prototype = section_system["section_prototype"][0]
            name = sec_prototype.tmp['prototype_name']
641
        except Exception:
642
643
644
645
            return

        material.structure_prototype = name

646
    def strukturbericht_designation(self, material: Material, section_system: Section) -> None:
647
648
649
650
651
652
        try:
            sec_prototype = section_system["section_prototype"][0]
            strukturbericht = sec_prototype.tmp["strukturbericht_designation"]
        except Exception:
            return

653
654
        # In the current GUI we replace LaTeX with plain text
        strukturbericht = re.sub('[$_{}]', '', strukturbericht)
655
        material.strukturbericht_designation = strukturbericht
656

657
    def wyckoff_sets(self, material: Material, wyckoff_sets: Dict) -> None:
658
        for group in wyckoff_sets:
659
            wset = material.m_create(WyckoffSet)
660
661
662
663
664
665
666
667
            if group.x is not None or group.y is not None or group.z is not None:
                variables = wset.m_create(WyckoffVariables)
                if group.x is not None:
                    variables.x = float(group.x)
                if group.y is not None:
                    variables.y = float(group.y)
                if group.z is not None:
                    variables.z = float(group.z)
668
669
670
            wset.indices = group.indices
            wset.element = group.element
            wset.wyckoff_letter = group.wyckoff_letter
671

672
    def normalize(self, ctx: Context) -> None:
673
        # Fetch resources
Lauri Himanen's avatar
Lauri Himanen committed
674
        sec_system = ctx.representative_system
675
        sec_enc = self.backend.get_mi2_section(Encyclopedia.m_def)
676
        material = sec_enc.material
677
        properties = sec_enc.properties
678
679
        sec_symmetry = sec_system["section_symmetry"][0]
        symmetry_analyzer = sec_system["section_symmetry"][0].tmp["symmetry_analyzer"]
680
        spg_number = symmetry_analyzer.get_space_group_number()
681
682
        std_atoms = symmetry_analyzer.get_conventional_system()
        prim_atoms = symmetry_analyzer.get_primitive_system()
683
        repr_atoms = sec_system.tmp["representative_atoms"]  # Temporary value stored by SystemNormalizer
684
        wyckoff_sets = symmetry_analyzer.get_wyckoff_sets_conventional(return_parameters=True)
685
686
        names, counts = structure.get_hill_decomposition(prim_atoms.get_chemical_symbols(), reduced=False)
        greatest_common_divisor = reduce(gcd, counts)
687
        ctx.greatest_common_divisor = greatest_common_divisor
688
689
690
        reduced_counts = np.array(counts) / greatest_common_divisor

        # Fill structural information
691
        self.mass_density(properties, repr_atoms)
692
        self.material_hash(material, spg_number, wyckoff_sets)
693
694
        self.number_of_atoms(material, std_atoms)
        self.atom_labels(material, std_atoms)
695
        self.atom_positions(material, std_atoms)
696
        self.atomic_density(properties, repr_atoms)
697
698
        self.bravais_lattice(material, sec_symmetry)
        self.cell_normalized(material, std_atoms)
699
        self.cell_volume(material, std_atoms)
700
701
702
703
704
        self.crystal_system(material, sec_symmetry)
        self.cell_primitive(material, prim_atoms)
        self.formula(material, names, counts)
        self.formula_reduced(material, names, reduced_counts)
        self.has_free_wyckoff_parameters(material, symmetry_analyzer)
705
        self.lattice_parameters(material, std_atoms)
706
        self.material_name(material, names, reduced_counts)
707
        self.material_classification(material, sec_system)
708
709
        self.periodicity(material)
        self.point_group(material, sec_symmetry)
710
        self.space_group_number(material, spg_number)
711
712
        self.space_group_international_short_symbol(material, symmetry_analyzer)
        self.structure_type(material, sec_system)
713
714
        self.structure_prototype(material, sec_system)
        self.strukturbericht_designation(material, sec_system)
715
        self.wyckoff_sets(material, wyckoff_sets)
Lauri Himanen's avatar
Lauri Himanen committed
716
717


718
class Material2DNormalizer(MaterialNormalizer):
Lauri Himanen's avatar
Lauri Himanen committed
719
720
    """Processes structure related metainfo for Encyclopedia 2D structures.
    """
721
    def cell_normalized(self, material: Material, std_atoms: Atoms) -> None:
722
723
724
725
        cell_normalized = std_atoms.get_cell()
        cell_normalized *= 1e-10
        material.cell_normalized = cell_normalized

726
    def cell_primitive(self, material: Material, prim_atoms: Atoms) -> None:
727
728
729
730
        cell_prim = prim_atoms.get_cell()
        cell_prim *= 1e-10
        material.cell_primitive = cell_prim

731
    def lattice_parameters(self, material: Material, std_atoms: Atoms, periodicity: np.array) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
732
        # 2D systems only have three lattice parameter: two length and angle between them
733
        periodic_indices = np.where(np.array(periodicity) == True)[0]  # noqa: E712
Lauri Himanen's avatar
Lauri Himanen committed
734
735
736
737
738
739
740
        cell = std_atoms.get_cell()
        a_vec = cell[periodic_indices[0], :] * 1e-10
        b_vec = cell[periodic_indices[1], :] * 1e-10
        a = np.linalg.norm(a_vec)
        b = np.linalg.norm(b_vec)
        alpha = np.clip(np.dot(a_vec, b_vec) / (a * b), -1.0, 1.0)
        alpha = np.arccos(alpha)
741
        material.lattice_parameters = np.array([a, b, 0.0, alpha, 0.0, 0.0])
742

Lauri Himanen's avatar
Lauri Himanen committed
743
    def periodicity(self, material: Material, std_atoms: Atoms) -> None:
744
745
        # MatID already provides the correct periodicity
        material.periodicity = std_atoms.get_pbc()
746

747
    def get_symmetry_analyzer(self, original_system: Atoms) -> SymmetryAnalyzer:
748
749
750
751
752
        # Get dimension of system by also taking into account the covalent radii
        dimensions = matid.geometry.get_dimensions(original_system, [True, True, True])
        basis_dimensions = np.linalg.norm(original_system.get_cell(), axis=1)
        gaps = basis_dimensions - dimensions
        periodicity = gaps <= config.normalize.cluster_threshold
753
754

        # If two axis are not periodic, return. This only happens if the vacuum
755
756
757
        # gap is not aligned with a cell vector or if the linear gap search is
        # unsufficient (the structure is "wavy" making also the gap highly
        # nonlinear).
758
        if sum(periodicity) != 2:
759
            raise ValueError("Could not detect the periodic dimensions in a 2D system.")
760
761
762
763
764
765
766
767
768
769
770

        # Center the system in the non-periodic direction, also taking
        # periodicity into account. The get_center_of_mass()-function in MatID
        # takes into account periodicity and can produce the correct CM unlike
        # the similar function in ASE.
        pbc_cm = matid.geometry.get_center_of_mass(original_system)
        cell_center = 0.5 * np.sum(original_system.get_cell(), axis=0)
        translation = cell_center - pbc_cm
        translation[periodicity] = 0
        symm_system = original_system.copy()
        symm_system.translate(translation)
771
        symm_system.wrap()
772
773
774

        # Set the periodicity according to detected periodicity in order for
        # SymmetryAnalyzer to use the symmetry analysis designed for 2D
775
776
777
778
779
780
781
782
783
        # systems.
        symm_system.set_pbc(periodicity)
        symmetry_analyzer = SymmetryAnalyzer(
            symm_system,
            config.normalize.symmetry_tolerance,
            config.normalize.flat_dim_threshold
        )
        return symmetry_analyzer

784
    def normalize(self, ctx: Context) -> None:
785
786
787
        # Fetch resources
        sec_enc = self.backend.get_mi2_section(Encyclopedia.m_def)
        material = sec_enc.material
Lauri Himanen's avatar
Lauri Himanen committed
788
        repr_atoms = ctx.representative_system.tmp["representative_atoms"]  # Temporary value stored by SystemNormalizer
789
        symmetry_analyzer = self.get_symmetry_analyzer(repr_atoms)
790
791
        spg_number = symmetry_analyzer.get_space_group_number()
        wyckoff_sets = symmetry_analyzer.get_wyckoff_sets_conventional(return_parameters=False)
792
793
794
795
        std_atoms = symmetry_analyzer.get_conventional_system()
        prim_atoms = symmetry_analyzer.get_primitive_system()
        names, counts = structure.get_hill_decomposition(prim_atoms.get_chemical_symbols(), reduced=False)
        greatest_common_divisor = reduce(gcd, counts)
796
        ctx.greatest_common_divisor = greatest_common_divisor
797
798
799
        reduced_counts = np.array(counts) / greatest_common_divisor

        # Fill metainfo
Lauri Himanen's avatar
Lauri Himanen committed
800
        self.periodicity(material, std_atoms)
801
        self.material_hash(material, spg_number, wyckoff_sets)
802
803
804
805
806
807
808
        self.number_of_atoms(material, std_atoms)
        self.atom_labels(material, std_atoms)
        self.atom_positions(material, std_atoms)
        self.cell_normalized(material, std_atoms)
        self.cell_primitive(material, prim_atoms)
        self.formula(material, names, counts)
        self.formula_reduced(material, names, reduced_counts)
809
        self.lattice_parameters(material, std_atoms, material.periodicity)
810
811


812
class Material1DNormalizer(MaterialNormalizer):
813
814
    """Processes structure related metainfo for Encyclopedia 1D structures.
    """
815
    def material_hash_1d(self, material: Material, prim_atoms: Atoms) -> None:
816
817
818
819
        """Hash to be used as identifier for a material. Different 1D
        materials are defined by their Coulomb matrix eigenvalues and their
        Hill formulas.
        """
820
821
        fingerprint = self.get_structure_fingerprint(prim_atoms)
        formula = material.formula
822
823
824
825
        id_strings = []
        id_strings.append(formula)
        id_strings.append(fingerprint)
        hash_seed = ", ".join(id_strings)
826
        hash_val = hash(hash_seed)
827
        material.material_hash = hash_val
828

829
    def cell_normalized(self, material: Material, std_atoms: Atoms) -> None:
830
831
832
833
        cell_normalized = std_atoms.get_cell()
        cell_normalized *= 1e-10
        material.cell_normalized = cell_normalized

834
    def lattice_parameters(self, material: Material, std_atoms: Atoms, periodicity: np.array) -> None:
835
        # 1D systems only have one lattice parameter: length in periodic dimension
836
        periodic_indices = np.where(np.array(periodicity) == True)[0]  # noqa: E712
837
838
        cell = std_atoms.get_cell()
        a = np.linalg.norm(cell[periodic_indices[0], :]) * 1e-10
839
        material.lattice_parameters = np.array([a, 0.0, 0.0, 0.0, 0.0, 0.0])
840
841

    def periodicity(self, material: Material, prim_atoms: Atoms) -> None:
842
843
844
845
846
        # Get dimension of system by also taking into account the covalent radii
        dimensions = matid.geometry.get_dimensions(prim_atoms, [True, True, True])
        basis_dimensions = np.linalg.norm(prim_atoms.get_cell(), axis=1)
        gaps = basis_dimensions - dimensions
        periodicity = gaps <= config.normalize.cluster_threshold
847

848
        # If one axis is not periodic, return. This only happens if the vacuum
849
        # gap is not aligned with a cell vector.
850
        if sum(periodicity) != 1:
851
            raise ValueError("Could not detect the periodic dimensions in a 1D system.")
852

853
        material.periodicity = periodicity
854

855
    def get_structure_fingerprint(self, prim_atoms: Atoms) -> str:
856
857
        """Calculates a numeric fingerprint that coarsely encodes the atomic
        positions and species.
858
859

        The fingerprint is based on calculating a discretized version of a
860
861
862
863
864
865
866
        sorted Coulomb matrix eigenspectrum (Grégoire Montavon, Katja Hansen,
        Siamac Fazli, Matthias Rupp, Franziska Biegler, Andreas Ziehe,
        Alexandre Tkatchenko, Anatole V. Lilienfeld, and Klaus-Robert Müller.
        Learning invariant representations of molecules for atomization energy
        prediction. In F. Pereira, C. J. C. Burges, L. Bottou, and K. Q.
        Weinberger, editors, Advances in Neural Information Processing Systems
        25, pages 440–448. Curran Associates, Inc., 2012.).
867
868
869
870

        The fingerprints are discretized in order to perform O(n) matching
        between structures (no need to compare fingerprints against each
        other). As regular discretization is susceptible to the "edge problem",
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
        a robust discretization is used instead (Birget, Jean-Camille & Hong,
        Dawei & Memon, Nasir. (2003). Robust discretization, with an
        application to graphical passwords. IACR Cryptology ePrint Archive.
        2003. 168.) Basically for the 1-dimensional domain two grids are
        created and the points are mapped to the first grid in which they are
        robust using a minimum tolerance parameter r, with the maximum
        tolerance being 5r.

        There are other robust discretization methods that can guarantee exact
        r-tolerance (e.g. Sonia Chiasson, Jayakumar Srinivasan, Robert Biddle,
        and P. C. van Oorschot. 2008. Centered discretization with application
        to graphical passwords. In Proceedings of the 1st Conference on
        Usability, Psychology, and Security (UPSEC’08). USENIX Association,
        USA, Article 6, 1–9.). This method however requires that a predefined
        "correct" structure exists against which the search is done.

        Args:
            prim_atoms: Primitive system.

        Returns:
            The numeric fingerprint for the system encoded as a string.
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
        """
        # Calculate charge part
        q = prim_atoms.get_atomic_numbers()
        qiqj = np.sqrt(q[None, :] * q[:, None])

        # Calculate distance part. Notice that the minimum image convention
        # must be used. Without it, differently oriented atoms in the same cell
        # may be detected as the same material.
        pos = prim_atoms.get_positions()
        cell = prim_atoms.get_cell()
        cmat = 10 - matid.geometry.get_distance_matrix(pos, pos, cell, pbc=True, mic=True)
        cmat = np.clip(cmat, a_min=0, a_max=None)
        np.fill_diagonal(cmat, 0)
        cmat = qiqj * cmat

        # Calculate eigenvalues
        eigval, _ = np.linalg.eigh(cmat)

        # Sort eigenvalues
        eigval = np.array(sorted(eigval))

913
914
915
916
917
        # Perform robust discretization (see function docstring for details). r
        # = 0.5 ensures that all grids are integers which can be uniquely
        # mapped to strings. If finer grid is needed adjust the eigenvalue scale
        # instead.
        eigval /= 25  # Go to smaller scale where integer numbers are meaningful
918
919
920
921
922
923
924
925
926
        dimension = 1
        r = 0.5
        spacing = 2 * r * (dimension + 1)
        phi_k = 2 * r * np.array(range(dimension + 1))
        t = np.mod((eigval[None, :] + phi_k[:, None]), (2 * r * (dimension + 1)))
        grid_mask = (r <= t) & (t < r * (2 * dimension + 1))
        safe_grid_k = np.argmax(grid_mask == True, axis=0)   # noqa: E712
        discretization = spacing * np.floor((eigval + (2 * r * safe_grid_k)) / spacing)
        discretization[safe_grid_k == 1] += 2 * r
927

928
        # Form string
929
        strings = []
930
        for number in discretization:
931
            num_str = str(int(number))
932
            strings.append(num_str)
933
        fingerprint = ";".join(strings)
934
935
936
937

        return fingerprint

    def get_symmetry_analyzer(self, original_system: Atoms) -> SymmetryAnalyzer:
938
        """For 1D systems the symmetry is analyzed from the original system
939
940
941
942
943
944
945
946
947
948
        with enforced full periodicity.

        Args:
            original_system: The original simulation system.

        Returns:
            The SymmetryAnalyzer that is instantiated with the original system.
        """
        symm_system = original_system.copy()
        symm_system.set_pbc(True)
949
950
951
952
953
954
        symmetry_analyzer = SymmetryAnalyzer(
            symm_system,
            config.normalize.symmetry_tolerance,
            config.normalize.flat_dim_threshold
        )

955
        return symmetry_analyzer
956

957
958
959
960
    def get_std_atoms(self, periodicity: np.array, prim_atoms: Atoms) -> Atoms:
        """For 1D systems the standardized system is based on a primitive
        system. This primitive system is translated to the center of mass and
        the non-periodic dimensions are minimized so that the atoms just fit.
Lauri Himanen's avatar
Lauri Himanen committed
961

962
963
964
965
        Args:
            periodicity: List of periodic indices, in 1D case a list containing
                one index.
            prim_atoms: Primitive system
Lauri Himanen's avatar
Lauri Himanen committed
966

967
968
969
970
971
        Returns
            Standardized structure that represents this material and from which
            the material hash will be constructed from.
        """
        std_atoms = prim_atoms.copy()
972

973
974
975
976
977
978
979
980
981
982
983
984
985
        # Translate to center of mass
        pbc_cm = matid.geometry.get_center_of_mass(prim_atoms)
        cell_center = 0.5 * np.sum(std_atoms.get_cell(), axis=0)
        translation = cell_center - pbc_cm
        translation[periodicity] = 0
        std_atoms.translate(translation)
        std_atoms.wrap()

        # Reduce cell size to just fit the system in the non-periodic dimensions.
        pos = std_atoms.get_scaled_positions(wrap=False)
        cell = std_atoms.get_cell()
        new_cell = np.array(cell)
        translation = np.zeros(3)
986
987
988
989
990
991
        for index, periodic in enumerate(periodicity):
            if not periodic:
                imin = np.min(pos[:, index])
                imax = np.max(pos[:, index])
                translation -= cell[index, :] * imin
                new_cell[index] = cell[index, :] * (imax - imin)
992
993
994
995
        std_atoms.translate(translation)
        std_atoms.set_cell(new_cell)

        return std_atoms
Lauri Himanen's avatar
Lauri Himanen committed
996

997
    def normalize(self, ctx: Context) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
998
        # Fetch resources
Lauri Himanen's avatar
Lauri Himanen committed
999
        sec_system = ctx.representative_system
1000
        sec_enc = self.backend.get_mi2_section(Encyclopedia.m_def)
For faster browsing, not all history is shown. View entire blame