encyclopedia.py 46 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
16
17
from typing import Dict, List
from math import gcd as gcd
from functools import reduce
18
from abc import abstractmethod
19
from collections import OrderedDict
20
import json
21
import ase
22
import ase.data
23
from ase import Atoms
24
import numpy as np
25
from matid import SymmetryAnalyzer
26
import matid.geometry
27

28
from nomad.normalizing.normalizer import Normalizer, s_scc, s_system, s_frame_sequence, r_frame_sequence_to_sampling, s_sampling_method, r_frame_sequence_local_frames, r_scc_to_system
29
from nomad.metainfo.encyclopedia import Encyclopedia, Material, Calculation
30
from nomad.normalizing import structure
31
from nomad.utils import hash
32
from nomad import config
33
34
35
36
37
38
39
40
41
42


class EncyclopediaNormalizer(Normalizer):
    """
    This normalizer emulates the functionality of the old Encyclopedia backend.
    The data used by the encyclopedia have been assigned under new metainfo
    within section_encyclopedia. In the future these separate metainfos could
    be absorbed into the existing metainfo hiearchy.
    """
    def __init__(self, backend):
43
44
        super().__init__(backend)

45
    def run_type(self, calculation) -> str:
46
47
48
        """Decides what type of calculation this is: single_point, md,
        geometry_optimization, etc.
        """
49
50
51
        run_enums = Calculation.run_type.type
        run_type = run_enums.unavailable

52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
        try:
            sccs = self._backend[s_scc]
        except Exception:
            sccs = []
        try:
            frame_sequences = self._backend[s_frame_sequence]
        except Exception:
            frame_sequences = []

        n_scc = len(sccs)
        n_frame_seq = len(frame_sequences)

        # Only one system, no sequences
        if n_scc == 1 and n_frame_seq == 0:
            program_name = self._backend["program_name"]
            if program_name == "elastic":
                # TODO move to taylor expansion as soon as data is correct in archive
69
                run_type = run_enums.elastic_constants
70
            else:
71
                run_type = run_enums.single_point
72
73
74
75
        # One sequence. Currently calculations with multiple sequences are
        # unsupported.
        elif n_frame_seq == 1:
            frame_seq = frame_sequences[0]
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99

            # See if sampling_method is present
            try:
                i_sampling_method = frame_seq[r_frame_sequence_to_sampling]
            except KeyError:
                self.logger.info(
                    "Cannot determine encyclopedia run type because missing "
                    "value for frame_sequence_to_sampling_ref"
                )
                return run_type

            # See if local frames are present
            try:
                frames = frame_seq[r_frame_sequence_local_frames]
            except KeyError:
                self.logger.info(
                    "section_frame_sequence_local_frames not found although a "
                    "frame_sequence exists"
                )
                return run_type
            if len(frames) == 0:
                self.logger.info("No frames referenced in section_frame_sequence_local_frames")
                return run_type

100
            section_sampling_method = self._backend[s_sampling_method][i_sampling_method]
101
            sampling_method = section_sampling_method["sampling_method"]
102
103

            if sampling_method == "molecular_dynamics":
104
                run_type = run_enums.molecular_dynamics
105
            if sampling_method == "geometry_optimization":
106
                run_type = run_enums.geometry_optimization
107
            if sampling_method == "taylor_expansion":
108
                run_type = run_enums.phonon_calculation
109
110
111

        calculation.run_type = run_type
        return run_type
112

113
    def system_type(self, material: Material, calculation: Calculation) -> tuple:
114
115
116
117
118
        # Select the representative system from which system type is retrieved.
        # For geometry optimizations system type is analyzed from last relaxed
        # frame. For phonon calculations system type is analyzed from first
        # undistorted frame. For molecular dynamics system type is analyzed
        # from first frame
119
120
        system_type = config.services.unavailable_value
        system = None
121
122
        run_enums = Calculation.run_type.type
        system_enums = Material.system_type.type
123
124
125
126
        if calculation.run_type in {
                run_enums.geometry_optimization,
                run_enums.molecular_dynamics,
                run_enums.phonon_calculation}:
127
128
129
            frame_seqs = self._backend[s_frame_sequence]
            frame_seq = frame_seqs[0]
            frames = frame_seq[r_frame_sequence_local_frames]
130
            sccs = self._backend[s_scc]
131
            systems = self._backend[s_system]
132
            if calculation.run_type == run_enums.geometry_optimization:
133
134
135
                idx = -1
            elif calculation.run_type == run_enums.phonon_calculation:
                idx = 0
136
            elif calculation.run_type == run_enums.molecular_dynamics:
137
138
139
140
                idx = 0
            scc = sccs[frames[idx]]
            r_system = scc[r_scc_to_system]
            system = systems[r_system]
141
        elif calculation.run_type == run_enums.single_point:
142
            system = self._backend[s_system][0]
143
144

        # Try to find system type information from backend for the selected system.
145
        try:
146
            stype = system["system_type"]
147
148
        except KeyError:
            self.logger.info("System type information not available for encyclopedia")
149
        else:
Lauri Himanen's avatar
Lauri Himanen committed
150
            if stype == system_enums.bulk or stype == system_enums.one_d or stype == system_enums.two_d:
151
                system_type = stype
152
153
154

        material.system_type = system_type
        return system, system_type
155

156
    def method_type(self) -> str:
157
158
        pass

159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
    def mainfile_uri(self, calculation: Calculation):
        entry_info = self._backend["section_entry_info"][0]
        upload_id = entry_info["upload_id"]
        mainfile_path = entry_info["mainfile"]
        uri = f"nmd://R{upload_id}/data/{mainfile_path}"
        calculation.mainfile_uri = uri

    # def similar_materials(self) -> None:
        # pass

    # def calculation_pid(self):
        # pass

    # def calculation(self) -> None:
        # pass

    # def contributor_first_name(self) -> None:
        # pass

    # def contributor_last_name(self) -> None:
        # pass

    # def contributor_type(self) -> None:
        # pass

    # def contributors(self) -> None:
        # pass

    # def number_of_calculations(self) -> None:
        # pass

190
    def fill(self, run_type, system_type, representative_system):
191
        # Fill structure related metainfo
Lauri Himanen's avatar
Lauri Himanen committed
192
        struct = None
193
        if system_type == Material.system_type.type.bulk:
194
            struct = StructureBulk(self._backend, self.logger)
Lauri Himanen's avatar
Lauri Himanen committed
195
        elif system_type == Material.system_type.type.two_d:
196
            struct = Structure2D(self._backend, self.logger)
197
198
        elif system_type == Material.system_type.type.one_d:
            struct = Structure1D(self._backend, self.logger)
Lauri Himanen's avatar
Lauri Himanen committed
199
        if struct is not None:
200
            struct.fill(representative_system)
201

202
203
204
205
        # Fill method related metainfo
        method = Method(self._backend, self.logger)
        method.fill()

206
207
    def normalize(self, logger=None) -> None:
        super().normalize(logger)
208
        system_enums = Material.system_type.type
209

210
        # Initialise metainfo structure
211
212
213
        sec_enc = Encyclopedia()
        material = sec_enc.m_create(Material)
        calculation = sec_enc.m_create(Calculation)
214

215
216
217
        # Get generic data
        self.mainfile_uri(calculation)

218
        # Determine run type, stop if unknown
219
        run_type = self.run_type(calculation)
220
221
222
        if run_type == config.services.unavailable_value:
            self.logger.info("unknown run type for encyclopedia")
            return
223

224
        # Get the system type, stop if unknown
225
        representative_system, system_type = self.system_type(material, calculation)
226
        if system_type != system_enums.bulk and system_type != system_enums.two_d and system_type != system_enums.one_d:
227
            self.logger.info("unknown system type for encyclopedia")
228
229
            return

230
231
        # Get the method type, stop if unknown
        # TODO
232

233
234
235
        # Process all present properties
        # TODO

236
        # Put the encyclopedia section into backend
237
        self._backend.add_mi2_section(sec_enc)
238
239
240
241
242
243
244
        self.fill(run_type, system_type, representative_system)


class Structure():
    """A base class that is used for processing structure related information
    in the Encylopedia.
    """
245
246
247
248
    def __init__(self, backend, logger):
        self.backend = backend
        self.logger = logger

249
    def atom_labels(self, material: Material, std_atoms: ase.Atoms) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
250
        material.atom_labels = std_atoms.get_chemical_symbols()
251
252

    def atom_positions(self, material: Material, std_atoms: ase.Atoms) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
253
        material.atom_positions = std_atoms.get_scaled_positions(wrap=False)
254
255
256
257
258

    @abstractmethod
    def cell_normalized(self, material: Material, std_atoms: ase.Atoms) -> None:
        pass

259
260
261
    @abstractmethod
    def cell_primitive(self, material: Material, std_atoms: ase.Atoms) -> None:
        pass
262

263
    def cell_volume(self, calculation: Calculation, std_atoms: ase.Atoms) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
264
        calculation.cell_volume = float(std_atoms.get_volume() * 1e-10**3)
265

Lauri Himanen's avatar
Lauri Himanen committed
266
267
268
    def formula(self, material: Material, names: List[str], counts: List[int]) -> None:
        formula = structure.get_formula_string(names, counts)
        material.formula = formula
269

270
    def formula_reduced(self, material: Material, names: list, counts_reduced: list) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
271
272
        formula = structure.get_formula_string(names, counts_reduced)
        material.formula_reduced = formula
273

274
275
276
    def material_hash(self, material: Material, symmetry_analyzer: SymmetryAnalyzer) -> None:
        wyckoff_sets = symmetry_analyzer.get_wyckoff_sets_conventional()
        space_group_number = symmetry_analyzer.get_space_group_number()
277

278
279
        # Create and store hash based on SHA512
        norm_hash_string = structure.get_symmetry_string(space_group_number, wyckoff_sets)
280
        material.material_hash = hash(norm_hash_string)
281
282

    def number_of_atoms(self, material: Material, std_atoms: ase.Atoms) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
283
        material.number_of_atoms = len(std_atoms)
284

Lauri Himanen's avatar
Lauri Himanen committed
285
286
287
288
    @abstractmethod
    def fill(self, sec_system) -> None:
        pass

289
290
291
292

class StructureBulk(Structure):
    """Processes structure related metainfo for Encyclopedia bulk structures.
    """
293
294
295
296
    def atomic_density(self, calculation: Calculation, repr_system: ase.Atoms) -> None:
        orig_n_atoms = len(repr_system)
        orig_volume = repr_system.get_volume() * (1e-10)**3
        calculation.atomic_density = float(orig_n_atoms / orig_volume)
297

298
299
    def bravais_lattice(self, material: Material, section_symmetry: Dict) -> None:
        bravais_lattice = section_symmetry["bravais_lattice"]
300
301
302
303
304
305
306
        material.bravais_lattice = bravais_lattice

    def cell_normalized(self, material: Material, std_atoms: ase.Atoms) -> None:
        cell_normalized = std_atoms.get_cell()
        cell_normalized *= 1e-10
        material.cell_normalized = cell_normalized

307
308
309
310
311
    def cell_primitive(self, material: Material, prim_atoms: ase.Atoms) -> None:
        cell_prim = prim_atoms.get_cell()
        cell_prim *= 1e-10
        material.cell_primitive = cell_prim

312
313
314
    def crystal_system(self, material: Material, section_symmetry: Dict) -> None:
        material.crystal_system = section_symmetry["crystal_system"]

315
316
317
318
    def has_free_wyckoff_parameters(self, material: Material, symmetry_analyzer: SymmetryAnalyzer) -> None:
        has_free_param = symmetry_analyzer.get_has_free_wyckoff_parameters()
        material.has_free_wyckoff_parameters = has_free_param

319
320
321
322
    def lattice_parameters(self, calculation: Calculation, std_atoms: ase.Atoms) -> None:
        cell_normalized = std_atoms.get_cell()
        calculation.lattice_parameters = structure.get_lattice_parameters(cell_normalized)

323
324
    def mass_density(self, calculation: Calculation, repr_system: ase.Atoms) -> None:
        mass = structure.get_summed_atomic_mass(repr_system.get_atomic_numbers())
325
        orig_volume = repr_system.get_volume() * (1e-10)**3
326
327
        calculation.mass_density = float(mass / orig_volume)

328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
    def material_name(self, material: Material, symbols: list, numbers: list) -> None:
        # Systems with one element are named after it
        if len(symbols) == 1:
            number = ase.data.atomic_numbers[symbols[0]]
            name = ase.data.atomic_names[number]
            material.material_name = name

        # Binary systems have specific names
        if len(symbols) == 2:
            atomicnumbers = [ase.data.atomic_numbers[i] for i in symbols]
            names = [ase.data.atomic_names[i] for i in atomicnumbers]

            # Non-metal elements are anions in the binary compounds and receive the -ide suffix
            if names[1] == "Antimony":
                names[1] = names[1][:-1] + "ide"
            if names[1] == "Arsenic":
                names[1] = names[1][:-1] + "de"
            if names[1] == "Boron" or names[1] == "Carbon":
                names[1] = names[1][:-2] + "ide"
            if names[1] == "Chlorine" or names[1] == "Germanium" or names[1] == "Selenium" or names[1] == "Bromine" \
               or names[1] == "Tellurium" or names[1] == "Iodine" or names[1] == "Polonium" or names[1] == "Astatine" or \
               names[1] == "Fluorine":
                names[1] = names[1][:-2] + "de"
            if names[1] == "Silicon" or names[1] == "Sulfur":
                names[1] = names[1][:-2] + "ide"
            if names[1] == "Nitrogen" or names[1] == "Oxygen" or names[1] == "Hydrogen" or names[1] == "Phosphorus":
                names[1] = names[1][:-4] + "ide"

            name = names[0] + " " + names[1]

            if names[1] == "Fluoride" or names[1] == "Chloride" or names[1] == "Bromide" or \
               names[1] == "Iodide" or names[1] == "Hydride":

                # Non-metals with elements of variable valence, therefore we remove alkaline and
                # alkaline-earth elements, which have fixed valence
                # Only the most electronegative non-metals are supposed to make ionic compounds
                if names[0] != "Lithium" and names[0] != "Sodium" and names[0] != "Potassium" and \
                   names[0] != "Rubidium" and names[0] != "Cesium" and names[0] != "Francium" and \
                   names[0] != "Beryllium" and names[0] != "Magnesium" and names[0] != "Calcium" and \
                   names[0] != "Strontium" and names[0] != "Barium" and names[0] != "Radium" and \
                   names[0] != "Aluminum":

                    if numbers[1] == 2:
                        name = names[0] + "(II)" + " " + names[1]
                    elif numbers[1] == 3:
                        name = names[0] + "(III)" + " " + names[1]
                    elif numbers[1] == 4:
                        name = names[0] + "(IV)" + " " + names[1]
                    elif numbers[1] == 5:
                        name = names[0] + "(V)" + " " + names[1]
                    elif numbers[1] == 6:
                        name = names[0] + "(VI)" + " " + names[1]
                    elif numbers[1] == 7:
                        name = names[0] + "(VII)" + " " + names[1]

            if names[1] == "Oxide" or names[1] == "Sulfide" or names[1] == "Selenide":
                if names[0] != "Lithium" and names[0] != "Sodium" and names[0] != "Potassium" and \
                   names[0] != "Rubidium" and names[0] != "Cesium" and names[0] != "Francium" and \
                   names[0] != "Beryllium" and names[0] != "Magnesium" and names[0] != "Calcium" and \
                   names[0] != "Strontium" and names[0] != "Barium" and names[0] != "Radium" and \
                   names[0] != "Aluminum":

                    if numbers[0] == 1 and numbers[1] == 1:
                        name = names[0] + "(II)" + " " + names[1]
                    elif numbers[0] == 2 and numbers[1] == 1:
                        name = names[0] + "(I)" + " " + names[1]
                    elif numbers[0] == 1 and numbers[1] == 2:
                        name = names[0] + "(IV)" + " " + names[1]
                    elif numbers[0] == 2 and numbers[1] == 3:
                        name = names[0] + "(III)" + " " + names[1]
                    elif numbers[0] == 2 and numbers[1] == 5:
                        name = names[0] + "(V)" + " " + names[1]
                    elif numbers[0] == 1 and numbers[1] == 3:
                        name = names[0] + "(VI)" + " " + names[1]
                    elif numbers[0] == 2 and numbers[1] == 7:
                        name = names[0] + "(VII)" + " " + names[1]

            if names[1] == "Nitride" or names[1] == "Phosphide":
                if names[0] != "Lithium" and names[0] != "Sodium" and names[0] != "Potassium" and \
                   names[0] != "Rubidium" and names[0] != "Cesium" and names[0] != "Francium" and \
                   names[0] != "Beryllium" and names[0] != "Magnesium" and names[0] != "Calcium" and \
                   names[0] != "Strontium" and names[0] != "Barium" and names[0] != "Radium" and \
                   names[0] != "Aluminum":

                    if numbers[0] == 1 and numbers[1] == 1:
                        name = names[0] + "(III)" + " " + names[1]
                    if numbers[0] == 1 and numbers[1] == 2:
                        name = names[0] + "(VI)" + " " + names[1]
                    elif numbers[0] == 3 and numbers[1] == 2:
                        name = names[0] + "(II)" + " " + names[1]
                    elif numbers[0] == 3 and numbers[1] == 4:
                        name = names[0] + "(IV)" + " " + names[1]
                    elif numbers[0] == 3 and numbers[1] == 5:
                        name = names[0] + "(V)" + " " + names[1]
                    elif numbers[0] == 3 and numbers[1] == 7:
                        name = names[0] + "(VII)" + " " + names[1]

            if names[1] == "Carbide":
                if names[0] != "Lithium" and names[0] != "Sodium" and names[0] != "Potassium" and \
                   names[0] != "Rubidium" and names[0] != "Cesium" and names[0] != "Francium" and \
                   names[0] != "Beryllium" and names[0] != "Magnesium" and names[0] != "Calcium" and \
                   names[0] != "Strontium" and names[0] != "Barium" and names[0] != "Radium" and \
                   names[0] != "Aluminum":

                    if numbers[0] == 1 and numbers[1] == 1:
                        name = names[0] + "(IV)" + " " + names[1]
                    if numbers[0] == 2 and numbers[1] == 1:
                        name = names[0] + "(II)" + " " + names[1]
                    if numbers[0] == 4 and numbers[1] == 1:
                        name = names[0] + "(I)" + " " + names[1]
                    if numbers[0] == 4 and numbers[1] == 3:
                        name = names[0] + "(III)" + " " + names[1]
                    if numbers[0] == 4 and numbers[1] == 5:
                        name = names[0] + "(V)" + " " + names[1]
                    if numbers[0] == 2 and numbers[1] == 3:
                        name = names[0] + "(VI)" + " " + names[1]
                    if numbers[0] == 4 and numbers[1] == 7:
                        name = names[0] + "(VII)" + " " + names[1]

            material.material_name = name

449
450
    def periodicity(self, material: Material) -> None:
        material.periodicity = np.array([0, 1, 2], dtype=np.int8)
451
452
453
454

    def point_group(self, material: Material, section_symmetry: Dict) -> None:
        point_group = section_symmetry["point_group"]
        material.point_group = point_group
455

456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
    def space_group_number(self, material: Material, symmetry_analyzer: SymmetryAnalyzer) -> None:
        spg_number = symmetry_analyzer.get_space_group_number()
        material.space_group_number = spg_number

    def space_group_international_short_symbol(self, material: Material, symmetry_analyzer: SymmetryAnalyzer) -> None:
        spg_int_symb = symmetry_analyzer.get_space_group_international_short()
        material.space_group_international_short_symbol = spg_int_symb

    def material_classification(self, material: Material, section_system) -> None:
        try:
            sec_springer = section_system["section_springer_material"][0]
        except Exception:
            return

        classes: Dict[str, List[str]] = {}
        try:
            classifications = sec_springer['springer_classification']
        except KeyError:
            pass
        else:
            classes["material_class_springer"] = classifications
        try:
            compound_classes = sec_springer['springer_compound_class']
        except KeyError:
            pass
        else:
            classes["compound_class_springer"] = compound_classes
        if classes:
            material.material_classification = json.dumps(classes)

    def structure_type(self, material: Material, section_system) -> None:
        try:
            sec_prototype = section_system["section_prototype"][0]
489
            notes = sec_prototype.tmp['prototype_notes']
490
491
492
        except Exception:
            return

493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
        # Only relevant information hidden in "notes" is handed over TODO:
        # review and eventually add more ****ites which are commonly used
        # (see wurzite)
        if notes in {
           'perovskite',
           '4-member ring',
           'fct',
           'bct',
           'bct5',
           'wurtzite',
           'hcp',
           'half-Heusler',
           'zincblende',
           'cubic perovskite',
           'simple cubic',
           'clathrate',
           'cuprite',
           'Heusler',
           'rock salt',
           'fcc',
           'diamond',
           'bcc'}:
            material.structure_type = notes

    def structure_prototype(self, material: Material, section_system) -> None:
518
        try:
519
520
            sec_prototype = section_system["section_prototype"][0]
            name = sec_prototype.tmp['prototype_name']
521
        except Exception:
522
523
524
525
526
527
528
529
530
531
532
533
            return

        material.structure_prototype = name

    def strukturbericht_designation(self, material: Material, section_system) -> None:
        try:
            sec_prototype = section_system["section_prototype"][0]
            strukturbericht = sec_prototype.tmp["strukturbericht_designation"]
        except Exception:
            return

        material.strukturbericht_designation = strukturbericht
534

535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
    def wyckoff_groups(self, material: Material, wyckoff_sets: Dict) -> None:
        wyckoff_list = []
        for group in wyckoff_sets:
            data = {
                "wyckoff_letter": group.wyckoff_letter,
                "element": group.element,
                "indices": group.indices,
                "variables": {
                    "x": group.x,
                    "y": group.y,
                    "z": group.z,
                },
            }
            wyckoff_list.append(data)
        material.wyckoff_groups = json.dumps(wyckoff_list, sort_keys=True)

551
    def fill(self, sec_system) -> None:
552
        # Fetch resources
553
        sec_enc = self.backend.get_mi2_section(Encyclopedia.m_def)
554
555
        material = sec_enc.material
        calculation = sec_enc.calculation
556
557
        sec_symmetry = sec_system["section_symmetry"][0]
        symmetry_analyzer = sec_system["section_symmetry"][0].tmp["symmetry_analyzer"]
558
559
        std_atoms = symmetry_analyzer.get_conventional_system()
        prim_atoms = symmetry_analyzer.get_primitive_system()
560
        repr_atoms = sec_system.tmp["representative_atoms"]  # Temporary value stored by SystemNormalizer
561
        wyckoff_sets = symmetry_analyzer.get_wyckoff_sets_conventional()
562
563
564
565
566
567
        names, counts = structure.get_hill_decomposition(prim_atoms.get_chemical_symbols(), reduced=False)
        greatest_common_divisor = reduce(gcd, counts)
        reduced_counts = np.array(counts) / greatest_common_divisor

        # Fill structural information
        self.mass_density(calculation, repr_atoms)
568
        self.material_hash(material, symmetry_analyzer)
569
570
        self.number_of_atoms(material, std_atoms)
        self.atom_labels(material, std_atoms)
571
        self.atom_positions(material, std_atoms)
572
573
574
575
576
577
578
579
580
581
        self.atomic_density(calculation, repr_atoms)
        self.bravais_lattice(material, sec_symmetry)
        self.cell_normalized(material, std_atoms)
        self.cell_volume(calculation, std_atoms)
        self.crystal_system(material, sec_symmetry)
        self.cell_primitive(material, prim_atoms)
        self.formula(material, names, counts)
        self.formula_reduced(material, names, reduced_counts)
        self.has_free_wyckoff_parameters(material, symmetry_analyzer)
        self.lattice_parameters(calculation, std_atoms)
582
        self.material_name(material, names, reduced_counts)
583
        self.material_classification(material, sec_system)
584
585
        self.periodicity(material)
        self.point_group(material, sec_symmetry)
586
587
588
        self.space_group_number(material, symmetry_analyzer)
        self.space_group_international_short_symbol(material, symmetry_analyzer)
        self.structure_type(material, sec_system)
589
590
        self.structure_prototype(material, sec_system)
        self.strukturbericht_designation(material, sec_system)
591
        self.wyckoff_groups(material, wyckoff_sets)
Lauri Himanen's avatar
Lauri Himanen committed
592
593
594
595
596


class Structure2D(Structure):
    """Processes structure related metainfo for Encyclopedia 2D structures.
    """
597
598
599
600
601
602
603
604
605
606
    def cell_normalized(self, material: Material, std_atoms: ase.Atoms) -> None:
        cell_normalized = std_atoms.get_cell()
        cell_normalized *= 1e-10
        material.cell_normalized = cell_normalized

    def cell_primitive(self, material: Material, prim_atoms: ase.Atoms) -> None:
        cell_prim = prim_atoms.get_cell()
        cell_prim *= 1e-10
        material.cell_primitive = cell_prim

Lauri Himanen's avatar
Lauri Himanen committed
607
608
609
610
611
612
613
614
615
    def lattice_parameters(self, calculation: Calculation, std_atoms: Atoms, periodic_indices: np.array) -> None:
        # 2D systems only have three lattice parameter: two length and angle between them
        cell = std_atoms.get_cell()
        a_vec = cell[periodic_indices[0], :] * 1e-10
        b_vec = cell[periodic_indices[1], :] * 1e-10
        a = np.linalg.norm(a_vec)
        b = np.linalg.norm(b_vec)
        alpha = np.clip(np.dot(a_vec, b_vec) / (a * b), -1.0, 1.0)
        alpha = np.arccos(alpha)
616
617
        calculation.lattice_parameters = np.array([a, b, 0.0, alpha, 0.0, 0.0])

Lauri Himanen's avatar
Lauri Himanen committed
618
619
620
621
622
623
624
625
626
627
628
629
630
631
    def periodicity(self, material: Material, std_atoms: Atoms) -> None:
        # Determine the periodicity by examining vacuum gaps
        vacuum_directions = structure.find_vacuum_directions(
            std_atoms,
            threshold=config.normalize.cluster_threshold
        )

        # If one axis is not periodic, return. This only happens if the vacuum
        # gap is not aligned with a cell vector.
        if sum(vacuum_directions) != 1:
            raise ValueError("Could not detect the periodic dimensions in a 2D system.")

        periodic_indices = np.where(vacuum_directions == False)[0]  # noqa: E712
        material.periodicity = np.sort(np.array(periodic_indices, dtype=np.int8))
632

633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
    def get_symmetry_analyzer(self, original_system: Atoms) -> SymmetryAnalyzer:
        # Determine the periodicity by examining vacuum gaps
        vacuum_directions = structure.find_vacuum_directions(original_system, threshold=7.0)
        periodicity = np.invert(vacuum_directions)

        # If two axis are not periodic, return. This only happens if the vacuum
        # gap is not aligned with a cell vector.
        if sum(periodicity) != 2:
            self.logger.warn("Could not detect the periodic dimensions in a 2D system.")
            return False

        # Center the system in the non-periodic direction, also taking
        # periodicity into account. The get_center_of_mass()-function in MatID
        # takes into account periodicity and can produce the correct CM unlike
        # the similar function in ASE.
        pbc_cm = matid.geometry.get_center_of_mass(original_system)
        cell_center = 0.5 * np.sum(original_system.get_cell(), axis=0)
        translation = cell_center - pbc_cm
        translation[periodicity] = 0
        symm_system = original_system.copy()
        symm_system.translate(translation)
654
        symm_system.wrap()
655
656
657

        # Set the periodicity according to detected periodicity in order for
        # SymmetryAnalyzer to use the symmetry analysis designed for 2D
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
        # systems.
        symm_system.set_pbc(periodicity)
        symmetry_analyzer = SymmetryAnalyzer(
            symm_system,
            config.normalize.symmetry_tolerance,
            config.normalize.flat_dim_threshold
        )
        return symmetry_analyzer

    def fill(self, representative_system) -> None:
        # Fetch resources
        sec_enc = self.backend.get_mi2_section(Encyclopedia.m_def)
        material = sec_enc.material
        calculation = sec_enc.calculation
        repr_atoms = representative_system.tmp["representative_atoms"]  # Temporary value stored by SystemNormalizer
        symmetry_analyzer = self.get_symmetry_analyzer(repr_atoms)
        std_atoms = symmetry_analyzer.get_conventional_system()
        prim_atoms = symmetry_analyzer.get_primitive_system()
        names, counts = structure.get_hill_decomposition(prim_atoms.get_chemical_symbols(), reduced=False)
        greatest_common_divisor = reduce(gcd, counts)
        reduced_counts = np.array(counts) / greatest_common_divisor
Lauri Himanen's avatar
Lauri Himanen committed
679
        # non_periodic_index_std = self.get_non_periodic_index_std(symmetry_analyzer)
680
681

        # Fill metainfo
Lauri Himanen's avatar
Lauri Himanen committed
682
        self.periodicity(material, std_atoms)
683
684
685
686
687
688
689
690
        self.material_hash(material, symmetry_analyzer)
        self.number_of_atoms(material, std_atoms)
        self.atom_labels(material, std_atoms)
        self.atom_positions(material, std_atoms)
        self.cell_normalized(material, std_atoms)
        self.cell_primitive(material, prim_atoms)
        self.formula(material, names, counts)
        self.formula_reduced(material, names, reduced_counts)
Lauri Himanen's avatar
Lauri Himanen committed
691
        self.lattice_parameters(calculation, std_atoms, material.periodicity)
692
693
694
695
696


class Structure1D(Structure):
    """Processes structure related metainfo for Encyclopedia 1D structures.
    """
697
    def material_hash(self, material: Material, prim_atoms: Atoms) -> None:
698
699
700
701
        """Hash to be used as identifier for a material. Different 1D
        materials are defined by their Coulomb matrix eigenvalues and their
        Hill formulas.
        """
702
703
        fingerprint = self.get_structure_fingerprint(prim_atoms)
        formula = material.formula
704
705
706
707
        id_strings = []
        id_strings.append(formula)
        id_strings.append(fingerprint)
        hash_seed = ", ".join(id_strings)
708
        hash_val = hash(hash_seed)
709
        material.material_hash = hash_val
710

711
712
713
714
715
716
717
718
719
720
721
722
    def cell_normalized(self, material: Material, std_atoms: ase.Atoms) -> None:
        cell_normalized = std_atoms.get_cell()
        cell_normalized *= 1e-10
        material.cell_normalized = cell_normalized

    def lattice_parameters(self, calculation: Calculation, std_atoms: Atoms, periodic_indices: np.array) -> None:
        # 1D systems only have one lattice parameter: length in periodic dimension
        cell = std_atoms.get_cell()
        a = np.linalg.norm(cell[periodic_indices[0], :]) * 1e-10
        calculation.lattice_parameters = np.array([a, 0.0, 0.0, 0.0, 0.0, 0.0])

    def periodicity(self, material: Material, prim_atoms: Atoms) -> None:
723
        # Determine the periodicity by examining vacuum gaps
724
725
726
727
        vacuum_directions = structure.find_vacuum_directions(
            prim_atoms,
            threshold=config.normalize.cluster_threshold
        )
728

729
        # If one axis is not periodic, return. This only happens if the vacuum
730
        # gap is not aligned with a cell vector.
731
732
        if sum(vacuum_directions) != 2:
            raise ValueError("Could not detect the periodic dimensions in a 1D system.")
733

734
        periodic_indices = np.where(vacuum_directions == False)[0]  # noqa: E712
Lauri Himanen's avatar
Lauri Himanen committed
735
        material.periodicity = np.sort(np.array(periodic_indices, dtype=np.int8))
736

737
    def get_structure_fingerprint(self, prim_atoms: Atoms) -> str:
738
739
        """Calculates a numeric fingerprint that coarsely encodes the atomic
        positions and species.
740
741

        The fingerprint is based on calculating a discretized version of a
742
743
744
745
746
747
748
        sorted Coulomb matrix eigenspectrum (Grégoire Montavon, Katja Hansen,
        Siamac Fazli, Matthias Rupp, Franziska Biegler, Andreas Ziehe,
        Alexandre Tkatchenko, Anatole V. Lilienfeld, and Klaus-Robert Müller.
        Learning invariant representations of molecules for atomization energy
        prediction. In F. Pereira, C. J. C. Burges, L. Bottou, and K. Q.
        Weinberger, editors, Advances in Neural Information Processing Systems
        25, pages 440–448. Curran Associates, Inc., 2012.).
749
750
751
752

        The fingerprints are discretized in order to perform O(n) matching
        between structures (no need to compare fingerprints against each
        other). As regular discretization is susceptible to the "edge problem",
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
        a robust discretization is used instead (Birget, Jean-Camille & Hong,
        Dawei & Memon, Nasir. (2003). Robust discretization, with an
        application to graphical passwords. IACR Cryptology ePrint Archive.
        2003. 168.) Basically for the 1-dimensional domain two grids are
        created and the points are mapped to the first grid in which they are
        robust using a minimum tolerance parameter r, with the maximum
        tolerance being 5r.

        There are other robust discretization methods that can guarantee exact
        r-tolerance (e.g. Sonia Chiasson, Jayakumar Srinivasan, Robert Biddle,
        and P. C. van Oorschot. 2008. Centered discretization with application
        to graphical passwords. In Proceedings of the 1st Conference on
        Usability, Psychology, and Security (UPSEC’08). USENIX Association,
        USA, Article 6, 1–9.). This method however requires that a predefined
        "correct" structure exists against which the search is done.

        Args:
            prim_atoms: Primitive system.

        Returns:
            The numeric fingerprint for the system encoded as a string.
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
        """
        # Calculate charge part
        q = prim_atoms.get_atomic_numbers()
        qiqj = np.sqrt(q[None, :] * q[:, None])

        # Calculate distance part. Notice that the minimum image convention
        # must be used. Without it, differently oriented atoms in the same cell
        # may be detected as the same material.
        pos = prim_atoms.get_positions()
        cell = prim_atoms.get_cell()
        cmat = 10 - matid.geometry.get_distance_matrix(pos, pos, cell, pbc=True, mic=True)
        cmat = np.clip(cmat, a_min=0, a_max=None)
        np.fill_diagonal(cmat, 0)
        cmat = qiqj * cmat

        # Calculate eigenvalues
        eigval, _ = np.linalg.eigh(cmat)

        # Sort eigenvalues
        eigval = np.array(sorted(eigval))

795
796
797
798
799
        # Perform robust discretization (see function docstring for details). r
        # = 0.5 ensures that all grids are integers which can be uniquely
        # mapped to strings. If finer grid is needed adjust the eigenvalue scale
        # instead.
        eigval /= 25  # Go to smaller scale where integer numbers are meaningful
800
801
802
803
804
805
806
807
808
        dimension = 1
        r = 0.5
        spacing = 2 * r * (dimension + 1)
        phi_k = 2 * r * np.array(range(dimension + 1))
        t = np.mod((eigval[None, :] + phi_k[:, None]), (2 * r * (dimension + 1)))
        grid_mask = (r <= t) & (t < r * (2 * dimension + 1))
        safe_grid_k = np.argmax(grid_mask == True, axis=0)   # noqa: E712
        discretization = spacing * np.floor((eigval + (2 * r * safe_grid_k)) / spacing)
        discretization[safe_grid_k == 1] += 2 * r
809

810
        # Form string
811
        strings = []
812
        for number in discretization:
813
            num_str = str(int(number))
814
            strings.append(num_str)
815
        fingerprint = ";".join(strings)
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830

        return fingerprint

    def get_symmetry_analyzer(self, original_system: Atoms) -> SymmetryAnalyzer:
        """For 1D systems the symmetery is analyzed from the original system
        with enforced full periodicity.

        Args:
            original_system: The original simulation system.

        Returns:
            The SymmetryAnalyzer that is instantiated with the original system.
        """
        symm_system = original_system.copy()
        symm_system.set_pbc(True)
831
832
833
834
835
836
        symmetry_analyzer = SymmetryAnalyzer(
            symm_system,
            config.normalize.symmetry_tolerance,
            config.normalize.flat_dim_threshold
        )

837
        return symmetry_analyzer
838

839
840
841
842
    def get_std_atoms(self, periodicity: np.array, prim_atoms: Atoms) -> Atoms:
        """For 1D systems the standardized system is based on a primitive
        system. This primitive system is translated to the center of mass and
        the non-periodic dimensions are minimized so that the atoms just fit.
Lauri Himanen's avatar
Lauri Himanen committed
843

844
845
846
847
        Args:
            periodicity: List of periodic indices, in 1D case a list containing
                one index.
            prim_atoms: Primitive system
Lauri Himanen's avatar
Lauri Himanen committed
848

849
850
851
852
853
        Returns
            Standardized structure that represents this material and from which
            the material hash will be constructed from.
        """
        std_atoms = prim_atoms.copy()
854

855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
        # Translate to center of mass
        pbc_cm = matid.geometry.get_center_of_mass(prim_atoms)
        cell_center = 0.5 * np.sum(std_atoms.get_cell(), axis=0)
        translation = cell_center - pbc_cm
        translation[periodicity] = 0
        std_atoms.translate(translation)
        std_atoms.wrap()

        # Reduce cell size to just fit the system in the non-periodic dimensions.
        indices = [0, 1, 2]
        for idx in periodicity:
            del indices[idx]
        pos = std_atoms.get_scaled_positions(wrap=False)
        cell = std_atoms.get_cell()
        new_cell = np.array(cell)
        translation = np.zeros(3)
        for index in indices:
            imin = np.min(pos[:, index])
            imax = np.max(pos[:, index])
            translation -= cell[index, :] * imin
            new_cell[index] = cell[index, :] * (imax - imin)
        std_atoms.translate(translation)
        std_atoms.set_cell(new_cell)

        return std_atoms
Lauri Himanen's avatar
Lauri Himanen committed
880

881
    def fill(self, representative_system) -> None:
Lauri Himanen's avatar
Lauri Himanen committed
882
        # Fetch resources
883
        sec_enc = self.backend.get_mi2_section(Encyclopedia.m_def)
Lauri Himanen's avatar
Lauri Himanen committed
884
885
        material = sec_enc.material
        calculation = sec_enc.calculation
886
887
888
        repr_atoms = representative_system.tmp["representative_atoms"]  # Temporary value stored by SystemNormalizer
        symmetry_analyzer = self.get_symmetry_analyzer(repr_atoms)
        prim_atoms = symmetry_analyzer.get_primitive_system()
889
        prim_atoms.set_pbc(True)
Lauri Himanen's avatar
Lauri Himanen committed
890
891
892
893
        names, counts = structure.get_hill_decomposition(prim_atoms.get_chemical_symbols(), reduced=False)
        greatest_common_divisor = reduce(gcd, counts)
        reduced_counts = np.array(counts) / greatest_common_divisor

894
        # Fill metainfo
895
896
        self.periodicity(material, prim_atoms)
        std_atoms = self.get_std_atoms(material.periodicity, prim_atoms)
Lauri Himanen's avatar
Lauri Himanen committed
897
898
        self.number_of_atoms(material, std_atoms)
        self.atom_labels(material, std_atoms)
899
        self.atom_positions(material, std_atoms)
Lauri Himanen's avatar
Lauri Himanen committed
900
901
902
        self.cell_normalized(material, std_atoms)
        self.formula(material, names, counts)
        self.formula_reduced(material, names, reduced_counts)
903
904
        self.material_hash(material, std_atoms)
        self.lattice_parameters(calculation, std_atoms, material.periodicity)
905
906
907
908
909
910
911
912
913
914


class Method():
    """A base class that is used for processing method related information
    in the Encylopedia.
    """
    def __init__(self, backend, logger):
        self.backend = backend
        self.logger = logger

915
916
    def code_name(self, calculation: Calculation) -> None:
        calculation.code_name = self.backend["program_name"]
917

918
919
    def code_version(self, calculation: Calculation) -> None:
        calculation.code_version = self.backend["program_version"]
920

921
922
923
924
925
926
927
    def method_hash(self, calculation: Calculation):
        # Create ordered dictionary with the values. Order is important for
        # consistent hashing. Not all the settings are set for this
        # calculation, in which case they will be simply set as None.
        hash_dict: OrderedDict = OrderedDict()
        hash_dict['program_name'] = calculation.code_name
        hash_dict['program_version'] = calculation.code_version
928

929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
        # The subclasses may define their own method properties that are to be
        # included here.
        hash_dict.update(self.method_hash_dict())

        # Form a hash from the dictionary
        method_hash = self.group_dict_to_hash("method_hash", hash_dict)
        calculation.method_hash = method_hash

    def group_eos_hash(self, calculation: Calculation):
        # Create ordered dictionary with the values. Order is important for
        # consistent hashing.
        hash_dict: OrderedDict = OrderedDict()
        hash_dict['upload_id'] = self.backend["section_entry_info"][0]["upload_id"]  # Only calculations from the same upload are grouped

        # The subclasses may define their own method properties that are to be
        # included here.
        hash_dict.update(self.group_eos_hash_dict())

        # Form a hash from the dictionary
        group_eos_hash = self.group_dict_to_hash('group_eos_hash', hash_dict)
        calculation.group_eos_hash = group_eos_hash
950

951
    def group_parametervariation_hash(self, calculation: Calculation):
952
953
954
955
956
957
958
959
960
961
962
        # Create ordered dictionary with the values. Order is important for
        # consistent hashing.
        hash_dict: OrderedDict = OrderedDict()
        hash_dict['upload_id'] = self.backend["section_entry_info"][0]["upload_id"]  # Only calculations from the same upload are grouped

        # The subclasses may define their own method properties that are to be
        # included here.
        hash_dict.update(self.group_parametervariation_hash_dict())

        # Form a hash from the dictionary
        group_eos_hash = self.group_dict_to_hash('group_eos_hash', hash_dict)
963
        calculation.group_parametervariation_hash = group_eos_hash
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000

    def group_e_min(self) -> None:
        pass

    def group_type(self) -> None:
        pass

    def k_point_grid_description(self) -> None:
        pass

    def basis_set_short_name(self) -> None:
        pass

    def basis_set_type(self) -> None:
        pass

    def core_electron_treatment(self) -> None:
        pass

    def functional_long_name(self) -> None:
        pass

    def functional_type(self) -> None:
        pass

    def gw_starting_point(self) -> None:
        pass

    def gw_type(self) -> None:
        pass

    def pseudopotential_type(self) -> None:
        pass

    def scf_threshold(self) -> None:
        pass

For faster browsing, not all history is shown. View entire blame