system.py 19.7 KB
Newer Older
1
#
Markus Scheidgen's avatar
Markus Scheidgen committed
2
3
4
5
6
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
7
8
9
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
Markus Scheidgen's avatar
Markus Scheidgen committed
10
#     http://www.apache.org/licenses/LICENSE-2.0
11
12
#
# Unless required by applicable law or agreed to in writing, software
Markus Scheidgen's avatar
Markus Scheidgen committed
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
15
16
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Markus Scheidgen's avatar
Markus Scheidgen committed
17
#
18

19
from typing import Any, Dict
20
from nptyping import NDArray
21
import ase
Lauri Himanen's avatar
Lauri Himanen committed
22
from ase import Atoms
23
import numpy as np
24
import json
25
import re
Lauri Himanen's avatar
Lauri Himanen committed
26
27
from matid import SymmetryAnalyzer, Classifier
from matid.classifications import Class0D, Atom, Class1D, Material2D, Surface, Class3D
28

29
from nomad import atomutils, archive
Markus Scheidgen's avatar
Markus Scheidgen committed
30
from nomad import utils, config
31
32
33
from nomad.datamodel.metainfo.public import section_symmetry, section_std_system, \
    section_primitive_system, section_original_system, section_springer_material, \
    section_prototype, section_system
34
35

from .normalizer import SystemBasedNormalizer
36

37
38
39
40
41
42
43
# use a regular expression to check atom labels; expression is build from list of
# all labels sorted desc to find Br and not B when searching for Br.
atom_label_re = re.compile('|'.join(
    sorted(ase.data.chemical_symbols, key=lambda x: len(x), reverse=True)))


def normalized_atom_labels(atom_labels):
44
    '''
45
46
47
    Normalizes the given atom labels: they either are labels right away, or contain
    additional numbers (to distinguish same species but different labels, see meta-info),
    or we replace them with ase placeholder atom for unknown elements 'X'.
48
    '''
49
50
51
52
    return [
        ase.data.chemical_symbols[0] if match is None else match.group(0)
        for match in [re.search(atom_label_re, atom_label) for atom_label in atom_labels]]

53

54
def formula_normalizer(atoms):
55
    '''
56
57
    Reads the chemical symbols in ase.atoms and returns a normalized formula.
    Formula normalization is on the basis of atom counting,
58
    e.g., Tc ->  Tc100, SZn -> S50Zn50, Co2Nb -> Co67Nb33
59
    '''
60
    #
Alvin Noe Ladines's avatar
Alvin Noe Ladines committed
61
    atoms_counter = atoms.symbols.formula.count()  # dictionary
62
63
64
65
    atoms_total = sum(atoms_counter.values())

    atoms_normed = []
    for key in atoms_counter.keys():
66
67
        norm = str(round(100 * atoms_counter[key] / atoms_total))
        atoms_normed.append(key + norm)
68
69
70
    #
    atoms_normed.sort()
    return ''.join(atoms_normed)
71

72

73
class SystemNormalizer(SystemBasedNormalizer):
74
    '''
75
76
    This normalizer performs all system (atoms, cells, etc.) related normalizations
    of the legacy NOMAD-coe *stats* normalizer.
77
    '''
78
79
    @staticmethod
    def atom_label_to_num(atom_label):
80
        # Take first three characters and make first letter capitalized.
81
82
83
84
85
86
87
88
89
        atom_label = atom_label[:3].title()

        for symbol_length in reversed(range(1, 4)):
            symbol = atom_label[:symbol_length]
            if symbol in ase.data.chemical_symbols:
                return ase.data.chemical_symbols.index(symbol)

        return 0

90
    def normalize_system(self, system, is_representative) -> bool:
91
        '''
92
93
94
        The 'main' method of this :class:`SystemBasedNormalizer`.
        Normalizes the section with the given `index`.
        Normalizes geometry, classifies, system_type, and runs symmetry analysis.
95
96

        Returns: True, iff the normalization was successful
97
        '''
98
99
100
        if self.section_run is None:
            self.logger.error('section_run is not present.')
            return False
101

102
        def get_value(quantity_def, default: Any = None, numpy: bool = True) -> Any:
103
            try:
104
                value = system.m_get(quantity_def)
105
                if not numpy and type(value).__module__ == np.__name__:
106
                    value = value.tolist()
107
108
109
110

                elif numpy and isinstance(value, list):
                    value = np.array(value)

111
                return value
112
            except (KeyError, IndexError):
113
114
                return default

115
        if is_representative:
116
            system.is_representative = is_representative
117

118
        # analyze atoms labels
119
        atom_labels = get_value(section_system.atom_labels, numpy=False)
120
121
122
        if atom_labels is not None:
            atom_labels = normalized_atom_labels(atom_labels)

123
        atom_species = get_value(section_system.atom_species, numpy=False)
124
        if atom_labels is None and atom_species is None:
Markus Scheidgen's avatar
Markus Scheidgen committed
125
            self.logger.warn('system has neither atom species nor labels')
126
            return False
127

Daniel Speckhard's avatar
Daniel Speckhard committed
128
        # If there are no atom labels we create them from atom species data.
129
        if atom_labels is None:
130
131
132
            try:
                atom_labels = list(ase.data.chemical_symbols[species] for species in atom_species)
            except IndexError:
133
134
                self.logger.error('system has atom species that are out of range')
                return False
135

136
            system.atom_labels = atom_labels
137
138

        # At this point we should have atom labels.
139
        try:
Daniel Speckhard's avatar
Daniel Speckhard committed
140
141
142
143
144
            atoms = ase.Atoms(symbols=atom_labels)
            chemical_symbols = list(atoms.get_chemical_symbols())
            if atom_labels != chemical_symbols:
                self.logger.error('atom labels are ambiguous', atom_labels=atom_labels[:10])
            atom_labels = chemical_symbols
145
        except Exception as e:
146
            self.logger.error(
147
                'cannot build ase atoms from atom labels',
Markus Scheidgen's avatar
Markus Scheidgen committed
148
                atom_labels=atom_labels[:10], exc_info=e, error=str(e))
149
            raise e
150
151
152

        if atom_species is None:
            atom_species = atoms.get_atomic_numbers().tolist()
153
            system.atom_species = atom_species
154
        else:
Markus Scheidgen's avatar
Markus Scheidgen committed
155
156
            if not isinstance(atom_species, list):
                atom_species = [atom_species]
157
158
159
            if atom_species != atoms.get_atomic_numbers().tolist():
                self.logger.warning(
                    'atom species do not match labels',
Markus Scheidgen's avatar
Markus Scheidgen committed
160
                    atom_labels=atom_labels[:10], atom_species=atom_species[:10])
161
                atom_species = atoms.get_atomic_numbers().tolist()
162
            system.atom_species = atom_species
163
164

        # periodic boundary conditions
165
        pbc = get_value(section_system.configuration_periodic_dimensions, numpy=False)
166
167
168
        if pbc is None:
            pbc = [False, False, False]
            self.logger.warning('missing configuration_periodic_dimensions')
169
            system.configuration_periodic_dimensions = pbc
170
171
172
        try:
            atoms.set_pbc(pbc)
        except Exception as e:
173
174
            self.logger.error(
                'cannot use pbc with ase atoms', exc_info=e, pbc=pbc, error=str(e))
175
            return False
176
177

        # formulas
178
179
180
        system.chemical_composition = atoms.get_chemical_formula(mode='all')
        system.chemical_composition_reduced = atoms.get_chemical_formula(mode='reduce')
        system.chemical_composition_bulk_reduced = atoms.get_chemical_formula(mode='hill')
181

182
        # positions
183
        atom_positions = get_value(section_system.atom_positions, numpy=True)
184
185
        if atom_positions is None:
            self.logger.warning('no atom positions, skip further system analysis')
186
            return False
187
        if len(atom_positions) != len(atoms):
188
189
            self.logger.error(
                'len of atom position does not match number of atoms',
190
                n_atom_positions=len(atom_positions), n_atoms=len(atoms))
191
            return False
192
        try:
193
            atoms.set_positions(1e10 * atom_positions.magnitude)
194
        except Exception as e:
195
196
            self.logger.error(
                'cannot use positions with ase atoms', exc_info=e, error=str(e))
197
            return False
198
199

        # lattice vectors
200
        lattice_vectors = get_value(section_system.lattice_vectors, numpy=True)
201
        if lattice_vectors is None:
202
            lattice_vectors = get_value(section_system.simulation_cell, numpy=True)
203
            if lattice_vectors is not None:
204
                system.lattice_vectors = lattice_vectors
205
206
207
        if lattice_vectors is None:
            if any(pbc):
                self.logger.error('no lattice vectors but periodicity', pbc=pbc)
208
209
        else:
            try:
210
                atoms.set_cell(1e10 * lattice_vectors.magnitude)
211
            except Exception as e:
212
213
                self.logger.error(
                    'cannot use lattice_vectors with ase atoms', exc_info=e, error=str(e))
214
                return False
215
216
217
218
219
220
221

        # configuration
        configuration = [
            atom_labels, atoms.positions.tolist(),
            atoms.cell.tolist() if atoms.cell is not None else None,
            atoms.pbc.tolist()]
        configuration_id = utils.hash(json.dumps(configuration).encode('utf-8'))
222
        system.configuration_raw_gid = configuration_id
223

224
        if is_representative:
225
            # Save the Atoms as a temporary variable
226
            system.m_cache["representative_atoms"] = atoms
227

228
            # System type analysis
229
230
231
            if atom_positions is not None:
                with utils.timer(
                        self.logger, 'system classification executed',
232
                        system_size=len(atoms)):
233
                    self.system_type_analysis(atoms)
234

235
            system_type = system.system_type
236
            # Symmetry analysis
237
            if atom_positions is not None and (lattice_vectors is not None or not any(pbc)) and system_type == "bulk":
238
239
                with utils.timer(
                        self.logger, 'symmetry analysis executed',
240
                        system_size=len(atoms)):
241
                    self.symmetry_analysis(system, atoms)
242

243
244
        return True

Lauri Himanen's avatar
Lauri Himanen committed
245
    def system_type_analysis(self, atoms: Atoms) -> None:
246
        '''
Lauri Himanen's avatar
Lauri Himanen committed
247
        Determine the system type with MatID. Write the system type to the
248
        entry_archive.
Lauri Himanen's avatar
Lauri Himanen committed
249
250

        Args:
Lauri Himanen's avatar
Lauri Himanen committed
251
            atoms: The structure to analyse
252
        '''
Markus Scheidgen's avatar
Markus Scheidgen committed
253
        system_type = config.services.unavailable_value
254
        if len(atoms) <= config.normalize.system_classification_with_clusters_threshold:
Lauri Himanen's avatar
Lauri Himanen committed
255
            try:
256
                classifier = Classifier(radii="covalent", cluster_threshold=config.normalize.cluster_threshold)
Lauri Himanen's avatar
Lauri Himanen committed
257
258
259
260
                cls = classifier.classify(atoms)
            except Exception as e:
                self.logger.error(
                    'matid project system classification failed', exc_info=e, error=str(e))
261
            else:
262
263
                classification = type(cls)
                if classification == Class3D:
264
                    system_type = 'bulk'
265
                elif classification == Atom:
266
                    system_type = 'atom'
267
                elif classification == Class0D:
268
                    system_type = 'molecule / cluster'
269
                elif classification == Class1D:
270
                    system_type = '1D'
271
                elif classification == Surface:
272
                    system_type = 'surface'
273
                elif classification == Material2D:
274
275
                    system_type = '2D'
        else:
Lauri Himanen's avatar
Lauri Himanen committed
276
            self.logger.info("system type analysis not run due to large system size")
277
278
279
        idx = self.section_run.m_cache["representative_system_idx"]
        self.section_run.section_system[idx].system_type = system_type
        self.section_run.section_system[-1].system_type = system_type
280

281
282
    def symmetry_analysis(self, system, atoms: ase.Atoms) -> None:
        '''Analyze the symmetry of the material being simulated. Only performed
283
        for bulk materials.
284

285
        We feed in the parsed values in section_system to the the symmetry
286
        analyzer. The analysis results are written to the entry_archive.
287
288

        Args:
289
            atoms: The atomistic structure to analyze.
290
291
292

        Returns:
            None: The method should write symmetry variables
293
            to the entry_archive which is member of this class.
294
        '''
295
        # Try to use MatID's symmetry analyzer to analyze the ASE object.
296
        try:
297
            symm = SymmetryAnalyzer(atoms, symmetry_tol=config.normalize.symmetry_tolerance)
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328

            space_group_number = symm.get_space_group_number()

            hall_number = symm.get_hall_number()
            hall_symbol = symm.get_hall_symbol()

            crystal_system = symm.get_crystal_system()
            bravais_lattice = symm.get_bravais_lattice()
            point_group = symm.get_point_group()

            orig_wyckoff = symm.get_wyckoff_letters_original()
            prim_wyckoff = symm.get_wyckoff_letters_primitive()
            conv_wyckoff = symm.get_wyckoff_letters_conventional()

            orig_equivalent_atoms = symm.get_equivalent_atoms_original()
            prim_equivalent_atoms = symm.get_equivalent_atoms_primitive()
            conv_equivalent_atoms = symm.get_equivalent_atoms_conventional()
            international_short = symm.get_space_group_international_short()

            conv_sys = symm.get_conventional_system()
            conv_pos = conv_sys.get_scaled_positions()
            conv_cell = conv_sys.get_cell()
            conv_num = conv_sys.get_atomic_numbers()

            prim_sys = symm.get_primitive_system()
            prim_pos = prim_sys.get_scaled_positions()
            prim_cell = prim_sys.get_cell()
            prim_num = prim_sys.get_atomic_numbers()

            transform = symm._get_spglib_transformation_matrix()
            origin_shift = symm._get_spglib_origin_shift()
329
330
331
        except ValueError as e:
            self.logger.debug('symmetry analysis is not available', details=str(e))
            return
332
333
334
335
        except Exception as e:
            self.logger.error('matid symmetry analysis fails with exception', exc_info=e)
            return

336
337
        # Write data extracted from MatID's symmetry analysis to the
        # representative section_system.
338
339

        sec_symmetry = system.m_create(section_symmetry)
340
        sec_symmetry.m_cache["symmetry_analyzer"] = symm
341

342
343
344
345
346
347
348
349
350
351
352
        sec_symmetry.symmetry_method = 'MatID (spg)'
        sec_symmetry.space_group_number = space_group_number
        sec_symmetry.hall_number = hall_number
        sec_symmetry.hall_symbol = hall_symbol
        sec_symmetry.international_short_symbol = international_short
        sec_symmetry.point_group = point_group
        sec_symmetry.crystal_system = crystal_system
        sec_symmetry.bravais_lattice = bravais_lattice
        sec_symmetry.origin_shift = origin_shift
        sec_symmetry.transformation_matrix = transform

353
        sec_std = sec_symmetry.m_create(section_std_system)
354
355
356
357
358
359
        sec_std.lattice_vectors_std = conv_cell
        sec_std.atom_positions_std = conv_pos
        sec_std.atomic_numbers_std = conv_num
        sec_std.wyckoff_letters_std = conv_wyckoff
        sec_std.equivalent_atoms_std = conv_equivalent_atoms

360
        sec_prim = sec_symmetry.m_create(section_primitive_system)
361
362
363
364
365
366
        sec_prim.lattice_vectors_primitive = prim_cell
        sec_prim.atom_positions_primitive = prim_pos
        sec_prim.atomic_numbers_primitive = prim_num
        sec_prim.wyckoff_letters_primitive = prim_wyckoff
        sec_prim.equivalent_atoms_primitive = prim_equivalent_atoms

367
        sec_orig = sec_symmetry.m_create(section_original_system)
368
369
370
        sec_orig.wyckoff_letters_original = orig_wyckoff
        sec_orig.equivalent_atoms_original = orig_equivalent_atoms

371
        self.springer_classification(atoms, space_group_number)  # Springer Normalizer
372
        self.prototypes(system, conv_num, conv_wyckoff, space_group_number)
373

374
    def springer_classification(self, atoms, space_group_number):
375
        normalized_formula = formula_normalizer(atoms)
376
        springer_data = query_springer_data(normalized_formula, space_group_number)
377
        idx = self.section_run.m_cache["representative_system_idx"]
378

379
        for material in springer_data.values():
380
            sec_springer_mat = self.section_run.section_system[idx].m_create(section_springer_material)
381

382
383
384
            sec_springer_mat.springer_id = material['spr_id']
            sec_springer_mat.springer_alphabetical_formula = material['spr_aformula']
            sec_springer_mat.springer_url = material['spr_url']
Markus Scheidgen's avatar
Markus Scheidgen committed
385
386
387
388

            compound_classes = material['spr_compound']
            if compound_classes is None:
                compound_classes = []
389
            sec_springer_mat.springer_compound_class = compound_classes
Markus Scheidgen's avatar
Markus Scheidgen committed
390
391
392
393

            classifications = material['spr_classification']
            if classifications is None:
                classifications = []
394
            sec_springer_mat.springer_classification = classifications
395

396
397
        # Check the 'springer_classification' and 'springer_compound_class' information
        # found is the same for all springer_id's
398
399
400
401
        springer_data_keys = list(springer_data.keys())
        if len(springer_data_keys) != 0:
            class_0 = springer_data[springer_data_keys[0]]['spr_classification']
            comp_0 = springer_data[springer_data_keys[0]]['spr_compound']
402
403

            # compare 'class_0' and 'comp_0' against the rest
404
405
406
            for ii in range(1, len(springer_data_keys)):
                class_test = (class_0 == springer_data[springer_data_keys[ii]]['spr_classification'])
                comp_test = (comp_0 == springer_data[springer_data_keys[ii]]['spr_compound'])
407
408

                if (class_test or comp_test) is False:
Markus Scheidgen's avatar
Markus Scheidgen committed
409
                    self.logger.info('Mismatch in Springer classification or compounds')
410

411
    def prototypes(self, system, atom_species: NDArray, wyckoffs: NDArray, spg_number: int) -> None:
412
        '''Tries to match the material to an entry in the AFLOW prototype data.
413
        If a match is found, a section_prototype is added to section_system.
414

415
416
417
418
        Args:
            atomic_numbers: Array of atomic numbers.
            wyckoff_letters: Array of Wyckoff letters as strings.
            spg_number: Space group number.
419
        '''
420
421
        norm_wyckoff = atomutils.get_normalized_wyckoff(atom_species, wyckoffs)
        protoDict = atomutils.search_aflow_prototype(spg_number, norm_wyckoff)
422
423
424
        if protoDict is not None:
            aflow_prototype_id = protoDict["aflow_prototype_id"]
            aflow_prototype_url = protoDict["aflow_prototype_url"]
425
426
427
            aflow_prototype_notes = protoDict["Notes"]
            aflow_prototype_name = protoDict["Prototype"]
            aflow_strukturbericht_designation = protoDict["Strukturbericht Designation"]
428
429
            prototype_label = '%d-%s-%s' % (
                spg_number,
430
                aflow_prototype_name,
431
432
                protoDict.get("Pearsons Symbol", "-")
            )
433
434
            idx = self.section_run.m_cache["representative_system_idx"]
            sec_prototype = self.section_run.section_system[idx].m_create(section_prototype)
435
436
437
438
439
440
            sec_prototype.prototype_label = prototype_label
            sec_prototype.prototype_aflow_id = aflow_prototype_id
            sec_prototype.prototype_aflow_url = aflow_prototype_url
            sec_prototype.prototype_assignment_method = "normalized-wyckoff"
            sec_prototype.m_cache["prototype_notes"] = aflow_prototype_notes
            sec_prototype.m_cache["prototype_name"] = aflow_prototype_name
441
            if aflow_strukturbericht_designation != "None":
442
                sec_prototype.m_cache["strukturbericht_designation"] = aflow_strukturbericht_designation
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461


def query_springer_data(normalized_formula: str, space_group_number: int) -> Dict[str, Any]:
    ''' Queries a msgpack database for springer-related quantities. '''
    if config.normalize.springer_db_path is None:
        return {}

    entries = archive.query_archive(config.normalize.springer_db_path, {str(space_group_number): {normalized_formula: '*'}})
    db_dict = {}
    entries = entries.get(str(space_group_number), {}).get(normalized_formula, {})

    for sp_id, entry in entries.items():
        db_dict[sp_id] = {
            'spr_id': sp_id,
            'spr_aformula': entry['aformula'],
            'spr_url': entry['url'],
            'spr_compound': entry['compound'],
            'spr_classification': entry['classification']}
    return db_dict