Commit 4a56c805 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'v0.7.3' of gitlab.mpcdf.mpg.de:nomad-lab/nomad-FAIR into v0.7.3

parents c6c92fba 71a85632
Pipeline #68106 passed with stages
in 30 minutes and 50 seconds
......@@ -181,7 +181,17 @@ mail = NomadConfig(
)
normalize = NomadConfig(
system_classification_with_clusters_threshold=50
# The system size limit for running the dimensionality analysis. For very
# large systems the dimensionality analysis will get too expensive.
system_classification_with_clusters_threshold=50,
# Symmetry tolerance controls the precision used by spglib in order to find
# symmetries. The atoms are allowed to move 1/2*symmetry_tolerance from
# their symmetry positions in order for spglib to still detect symmetries.
# The unit is angstroms.
symmetry_tolerance=0.1,
# The distance tolerance between atoms for grouping them into the same
# cluster. Used in detecting system type.
cluster_threshold=3.1,
)
client = NomadConfig(
......
This source diff could not be displayed because it is too large. You can view the blob instead.
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import re
import functools
import fractions
import json
import uuid
from typing import Dict
from matid import SymmetryAnalyzer
import numpy as np
from ase import Atoms
from nomad.normalizing.data.aflow_prototypes import aflow_prototypes
from nomad import config
# The AFLOW symmetry information is checked once on import
old_symmetry_tolerance = aflow_prototypes["matid_symmetry_tolerance"]
symmetry_tolerance = config.normalize.symmetry_tolerance
if old_symmetry_tolerance != symmetry_tolerance:
raise AssertionError(
"The AFLOW prototype information is outdated due to changed "
"tolerance for symmetry detection. Please update the AFLOW "
"prototype information by running once the function "
"'update_aflow_prototype_information'."
)
def get_normalized_wyckoff(atomic_numbers: np.array, wyckoff_letters: np.array) -> Dict[str, Dict[str, int]]:
"""Returns a normalized Wyckoff sequence for the given atomic numbers and
corresponding wyckoff letters. In a normalized sequence the chemical
species are "anonymized" by replacing them with upper case alphabets.
Args:
atomic_numbers: Array of atomic numbers.
wyckoff_letters: Array of Wyckoff letters as strings.
Returns:
Returns a dictionary that maps each present Wyckoff letter to a
dictionary. The dictionary contains the number of atoms for each
species, where the species names have been anomymized in the form
"X_<index>".
"""
# Count the occurrence of each chemical species
atom_count: Dict[int, int] = {}
for atomic_number in atomic_numbers:
atom_count[atomic_number] = atom_count.get(atomic_number, 0) + 1
# Form a dictionary that maps Wyckoff letters to a dictionary with the
# number of atoms of that Wyckoff letter for each atomic number
wyc_dict: dict = {}
for i, wyckoff_letter in enumerate(wyckoff_letters):
old_val = wyc_dict.get(wyckoff_letter, {})
atomic_number = atomic_numbers[i]
old_val[atomic_number] = old_val.get(atomic_number, 0) + 1
wyc_dict[wyckoff_letter] = old_val
sorted_wyckoff_letters = list(wyc_dict.keys())
sorted_wyckoff_letters.sort()
# Anonymize the atomic species to X_<index>, where the index is calculated
# by ordering the species.
def compare_atomic_number(at1, at2):
def cmpp(a, b):
return ((a < b) - (a > b))
c = cmpp(atom_count[at1], atom_count[at2])
if (c != 0):
return c
for wyckoff_letter in sorted_wyckoff_letters:
p = wyc_dict[wyckoff_letter]
c = cmpp(p.get(at1, 0), p.get(at2, 0))
if c != 0:
return c
return 0
sorted_species = list(atom_count.keys())
sorted_species.sort(key=functools.cmp_to_key(compare_atomic_number))
standard_atom_names = {}
for i, at in enumerate(sorted_species):
standard_atom_names[at] = ("X_%d" % i)
# Rename with anonymized species labels
standard_wyc: dict = {}
for wk, ats in wyc_dict.items():
std_ats = {}
for at, count in ats.items():
std_ats[standard_atom_names[at]] = count
standard_wyc[wk] = std_ats
# Divide atom counts with greatest common divisor
if standard_wyc:
counts = [c for x in standard_wyc.values() for c in x.values()]
gcd = counts[0]
for c in counts[1:]:
gcd = fractions.gcd(gcd, c)
if gcd != 1:
for d in standard_wyc.values():
for at, c in d.items():
d[at] = c // gcd
return standard_wyc
def search_aflow_prototype(space_group: int, norm_wyckoff: dict) -> dict:
"""Searches the AFLOW prototype library for a match for the given space
group and normalized Wyckoff sequence. The normalized Wyckoff sequence is
assumed to come from the MatID symmetry routine.
Currently only contains Part I of the prototype library (M. J. Mehl, D.
Hicks, C. Toher, O. Levy, R. M. Hanson, G. L. W. Hart, and S. Curtarolo,
The AFLOW Library of Crystallographic Prototypes: Part 1, Comp. Mat. Sci.
136, S1-S828 (2017), 10.1016/j.commatsci.2017.01.017)
Args:
space_group_number: Space group number
norm_wyckoff: Normalized Wyckoff occupations
Returns:
Dictionary containing the AFLOW prototype information.
"""
structure_type_info = None
type_descriptions = aflow_prototypes["prototypes_by_spacegroup"].get(space_group, [])
for type_description in type_descriptions:
current_norm_wyckoffs = type_description.get("normalized_wyckoff_matid")
if current_norm_wyckoffs and current_norm_wyckoffs == norm_wyckoff:
structure_type_info = type_description
break
return structure_type_info
def update_aflow_prototype_information(filepath: str) -> None:
"""Used to update the AFLOW prototype information. Creates a new python
module with updated symmetry tolerance parameter and the wyckoff positions
as detected by MatID.
This function is relatively heavy and should only be run if the symmetry
tolerance has been changed or the symmetry detection routine has been
updated.
Args:
filepath: Path to the python file in which the new symmetry information
will be written.
"""
class NoIndent(object):
def __init__(self, value):
self.value = value
class NoIndentEncoder(json.JSONEncoder):
"""A custom JSON encoder that can pretty-print objects wrapped in the
NoIndent class.
"""
def __init__(self, *args, **kwargs):
super(NoIndentEncoder, self).__init__(*args, **kwargs)
self.kwargs = dict(kwargs)
del self.kwargs['indent']
self._replacement_map = {}
def default(self, o): # pylint: disable=E0202
if isinstance(o, NoIndent):
key = uuid.uuid4().hex
self._replacement_map[key] = json.dumps(o.value, **self.kwargs)
return "@@%s@@" % (key,)
else:
return super(NoIndentEncoder, self).default(o)
def encode(self, o):
result = super(NoIndentEncoder, self).encode(o)
for k, v in self._replacement_map.items():
result = result.replace('"@@%s@@"' % (k,), v)
return result
n_prototypes = 0
n_failed = 0
n_unmatched = 0
prototype_dict = aflow_prototypes["prototypes_by_spacegroup"]
for aflow_spg_number, prototypes in prototype_dict.items():
n_prototypes += len(prototypes)
for prototype in prototypes:
# Read prototype structure
pos = np.array(prototype["atom_positions"]) * 1E10
labels = prototype["atom_labels"]
cell = np.array(prototype["lattice_vectors"]) * 1E10
atoms = Atoms(
symbols=labels,
positions=pos,
cell=cell,
pbc=True
)
# Try to first see if the space group can be matched with the one in AFLOW
tolerance = config.normalize.symmetry_tolerance
try:
symm = SymmetryAnalyzer(atoms, tolerance)
spg_number = symm.get_space_group_number()
wyckoff_matid = symm.get_wyckoff_letters_conventional()
norm_system = symm.get_conventional_system()
except Exception:
n_failed += 1
else:
# If the space group is matched, add the MatID normalized Wyckoff
# letters to the data.
if spg_number == aflow_spg_number:
atomic_numbers = norm_system.get_atomic_numbers()
normalized_wyckoff_matid = get_normalized_wyckoff(atomic_numbers, wyckoff_matid)
prototype["normalized_wyckoff_matid"] = NoIndent(normalized_wyckoff_matid)
else:
n_unmatched += 1
# Save the information back in a prettified form
prototype["atom_positions"] = NoIndent(prototype["atom_positions"])
prototype["atom_labels"] = NoIndent(prototype["atom_labels"])
prototype["lattice_vectors"] = NoIndent(prototype["lattice_vectors"])
try:
prototype["normalized_wyckoff"] = NoIndent(prototype["normalized_wyckoff"])
except KeyError:
pass
print(f"Updated AFLOW prototype library. Total number of prototypes: {n_prototypes}, unmatched: {n_unmatched}, failed: {n_failed}")
# Save the updated data
with io.open(filepath, "w", encoding="utf8") as f:
json_dump = json.dumps(aflow_prototypes, ensure_ascii=False, indent=4, sort_keys=True, cls=NoIndentEncoder)
json_dump = re.sub(r"\"(-?\d+(?:[\.,]\d+)?)\"", r'\1', json_dump) # Removes quotes around numbers
f.write("# -*- coding: utf-8 -*-\naflow_prototypes = {}\n".format(json_dump))
......@@ -15,19 +15,17 @@
from collections import Counter
from typing import Any
import ase
from ase import Atoms
import numpy as np
import json
import re
import os
import sqlite3
import functools
import fractions
from matid import SymmetryAnalyzer
from matid.geometry import get_dimensionality
from nomadcore.structure_types import structure_types_by_spacegroup as str_types_by_spg
from matid import SymmetryAnalyzer, Classifier
from matid.classifications import Class0D, Atom, Class1D, Material2D, Surface, Class3D
from nomad.normalizing import structure
from nomad import utils, config
from nomad.normalizing.normalizer import SystemBasedNormalizer
......@@ -262,36 +260,38 @@ class SystemNormalizer(SystemBasedNormalizer):
return True
def system_type_analysis(self, atoms) -> None:
def system_type_analysis(self, atoms: Atoms) -> None:
"""
Determine the dimensionality and hence the system type of the system with
Matid. Write the system type to the backend.
Determine the system type with MatID. Write the system type to the
backend.
Args:
atoms: The structure to analyse
"""
system_type = config.services.unavailable_value
try:
if atoms.get_number_of_atoms() > config.normalize.system_classification_with_clusters_threshold:
# it is too expensive to run Matid's cluster detection, just check pbc
dimensionality = np.sum(atoms.get_pbc())
if atoms.get_number_of_atoms() <= config.normalize.system_classification_with_clusters_threshold:
try:
classifier = Classifier(cluster_threshold=config.normalize.cluster_threshold)
cls = classifier.classify(atoms)
except Exception as e:
self.logger.error(
'matid project system classification failed', exc_info=e, error=str(e))
else:
dimensionality = get_dimensionality(
atoms, cluster_threshold=3.1, return_clusters=False)
if dimensionality is None:
pass
elif dimensionality == 0:
if atoms.get_number_of_atoms() == 1:
classification = type(cls)
if classification == Class3D:
system_type = 'bulk'
elif classification == Atom:
system_type = 'atom'
else:
elif classification == Class0D:
system_type = 'molecule / cluster'
elif dimensionality == 1:
system_type = '1D'
elif dimensionality == 2:
system_type = '2D / surface'
elif dimensionality == 3:
system_type = 'bulk'
except Exception as e:
self.logger.error(
'matid project system classification failed', exc_info=e, error=str(e))
elif classification == Class1D:
system_type = '1D'
elif classification == Surface:
system_type = 'surface'
elif classification == Material2D:
system_type = '2D'
else:
self.logger.info("system type analysis not run due to large system size")
self._backend.addValue('system_type', system_type)
......@@ -393,7 +393,7 @@ class SystemNormalizer(SystemBasedNormalizer):
self.springer_classification(atoms, space_group_number) # Springer Normalizer
self.prototypes(prim_num, prim_wyckoff, space_group_number)
self.prototypes(conv_num, conv_wyckoff, space_group_number)
self._backend.closeSection('section_symmetry', symmetry_gid)
......@@ -482,8 +482,8 @@ class SystemNormalizer(SystemBasedNormalizer):
def prototypes(self, atomSpecies, wyckoffs, spg_nr):
try:
norm_wyckoff = SystemNormalizer.get_normalized_wyckoff(atomSpecies, wyckoffs)
protoDict = SystemNormalizer.get_structure_type(spg_nr, norm_wyckoff)
norm_wyckoff = structure.get_normalized_wyckoff(atomSpecies, wyckoffs)
protoDict = structure.search_aflow_prototype(spg_nr, norm_wyckoff)
if protoDict is None:
proto = "%d-_" % spg_nr
......@@ -512,72 +512,3 @@ class SystemNormalizer(SystemBasedNormalizer):
if aurl:
self._backend.addValue("prototype_aflow_url", aurl)
self._backend.closeSection("section_prototype", pSect)
@staticmethod
def get_normalized_wyckoff(atomic_number, wyckoff):
""" Returns a normalized Wyckoff sequence """
atomCount = {}
for nr in atomic_number:
atomCount[nr] = atomCount.get(nr, 0) + 1
wycDict = {}
for i, wk in enumerate(wyckoff):
oldVal = wycDict.get(wk, {})
nr = atomic_number[i]
oldVal[nr] = oldVal.get(nr, 0) + 1
wycDict[wk] = oldVal
sortedWyc = list(wycDict.keys())
sortedWyc.sort()
def cmpp(a, b):
return ((a < b) - (a > b))
def compareAtNr(at1, at2):
c = cmpp(atomCount[at1], atomCount[at2])
if (c != 0):
return c
for wk in sortedWyc:
p = wycDict[wk]
c = cmpp(p.get(at1, 0), p.get(at2, 0))
if c != 0:
return c
return 0
sortedAt = list(atomCount.keys())
sortedAt.sort(key=functools.cmp_to_key(compareAtNr))
standardAtomNames = {}
for i, at in enumerate(sortedAt):
standardAtomNames[at] = ("X_%d" % i)
standardWyc = {}
for wk, ats in wycDict.items():
stdAts = {}
for at, count in ats.items():
stdAts[standardAtomNames[at]] = count
standardWyc[wk] = stdAts
if standardWyc:
counts = [c for x in standardWyc.values() for c in x.values()]
gcd = counts[0]
for c in counts[1:]:
gcd = fractions.gcd(gcd, c)
if gcd != 1:
for wk, d in standardWyc.items():
for at, c in d.items():
d[at] = c // gcd
return standardWyc
@staticmethod
def get_structure_type(space_group, norm_wyckoff):
"""Returns the information on the prototype.
"""
structure_type_info = {}
type_descriptions = str_types_by_spg.get(space_group, [])
for type_description in type_descriptions:
current_norm_wyckoffs = type_description.get("normalized_wysytax")
if current_norm_wyckoffs and current_norm_wyckoffs == norm_wyckoff:
structure_type_info = type_description
break
if structure_type_info:
return structure_type_info
else:
return None
images:
nomad:
tag: "v0.6.3"
tag: "v0.7.2"
frontend:
tag: "v0.6.3"
services:
apiSecret: 'nomad-keycloak-prod-api-secret'
tag: "v0.7.2"
proxy:
nodePort: 30012
external:
host: "repository.nomad-coe.eu"
path: "/uploads"
path: "/app"
gui:
debug: false
app:
replicas: 2
worker: 8
workerClass: "sync"
nomadNodeType: "public"
worker:
replicas: 1
......@@ -19,13 +25,20 @@ worker:
processes: 10
nomadNodeType: "prod-worker"
dbname: fairdi_nomad_prod
elastic:
port: 9202
uploadurl: 'https://repository.nomad-coe.eu/uploads/gui/upload'
dbname: fairdi_nomad_prod_v0_7
uploadurl: 'https://repository.nomad-coe.eu/app/gui/upload'
keycloak:
serverUrl: "https://repository.nomad-coe.eu/fairdi/keycloak/auth/"
passwordSecret: 'nomad-keycloak-password'
realmName: 'fairdi_nomad_prod'
clientId: 'nomad_api_dev'
clientSecret: 'nomad-keycloak-prod-api-secret'
admin_user_id: '82efac55-6187-408c-8027-b98580c0e1c5'
volumes:
prefixSize: 1
......@@ -39,3 +52,7 @@ mail:
host: 'mailrelay.mpcdf.mpg.de'
port: 25
from: 'webmaster@nomad-repository.eu'
datacite:
secret: 'nomad-datacite'
enabled: True
images.nomad.tag: "stable"
images.frontend.tag: "stable"
images:
nomad: "v0.7.2"
frontend:
tag: "v0.7.2"
proxy:
nodePort: 30010
......@@ -12,7 +14,10 @@ gui:
app:
replicas: 2
threads: 16
threads: 4
worker: 8
workerClass: "sync"
nomadNodeType: "public"
worker:
replicas: 1
......@@ -44,3 +49,4 @@ volumes:
datacite:
secret: 'nomad-datacite'
enabled: True
......@@ -64,10 +64,12 @@ metadata:
data:
gunicorn.conf: |
secure_scheme_headers = {'X-FORWARDED-PROTOCOL': 'ssl', 'X-FORWARDED-PROTO': 'https', 'X-FORWARDED-SSL': 'on'}
worker_class = '{{ .Values.app.workerClass }}'
threads = {{ .Values.app.threads }}
{{ if ne .Values.app.workerClass "sync" }}
worker_class = '{{ .Values.app.workerClass }}'
threads = {{ .Values.app.threads }}
{{ end }}
worker_connections = 1000
worker = {{ .Values.app.worker }}
workers = {{ .Values.app.worker }}
---
apiVersion: apps/v1
kind: Deployment
......
# Step Nr. Time[fs] Kin.[a.u.] Temp[K] Pot.[a.u.] Cons Qty[a.u.] UsedTime[s]
0 0.000000 0.009975468 300.000000000 -31.297885373 -31.287909904 0.000000000
1 0.500000 0.009980435 300.149355595 -31.297890335 -31.287909901 20.881730749
2 1.000000 0.009995385 300.598964458 -31.297905285 -31.287909900 10.199698742
3 1.500000 0.010020440 301.352455416 -31.297930343 -31.287909903 9.527669955
4 2.000000 0.010055749 302.414350563 -31.297965611 -31.287909862 9.869754617
8
i = 0, time = 0.000, E = -31.2978853728
Si 0.0000000000 0.0000000000 0.0000000000
Si 0.0000000000 2.7153487000 2.7153487000
Si 2.7153487000 2.7153487000 0.0000000000
Si 2.7153487000 0.0000000000 2.7153487000
Si 4.0730231000 1.3576744000 4.0730231000
Si 1.3576744000 1.3576744000 1.3576744000
Si 1.3576744000 4.0730231000 4.0730231000
Si 4.0730231000 4.0730231000 1.3576744000
8
i = 1, time = 0.500, E = -31.2978903354
Si 0.0016383385 -0.0001721499 -0.0006238612
Si -0.0000657767 2.7160970060 2.7143725504
Si 2.7136670506 2.7156518890 -0.0018471601
Si 2.7177623600 -0.0010352377 2.7164197878
Si 4.0752014230 1.3578495204 4.0733437423
Si 1.3577499527 1.3579175611 1.3598641839
Si 1.3566870268 4.0732906063 4.0743347685
Si 4.0694520250 4.0724932049 1.3562283885
8
i = 2, time = 1.000, E = -31.2979052852
Si 0.0032756726 -0.0003437104 -0.0012419769
Si -0.0001344763 2.7168490487 2.7133950184
Si 2.7119819817 2.7159506528 -0.0036953940
Si 2.7201761805 -0.0020699682 2.7174949846
Si 4.0773833263 1.3580258103 4.0736642410
Si 1.3578288759 1.3581590860 1.3620556567
Si 1.3556996868 4.0735588711 4.0756425358
Si 4.0658811525 4.0719626098 1.3547773344
8
i = 3, time = 1.500, E = -31.2979303428
Si 0.0049109769 -0.0005141199 -0.0018486869
Si -0.0002090025 2.7176085071 2.7124147394
Si 2.7102900940 2.7162405146 -0.0055457615
Si 2.7225903290 -0.0031037049 2.7185784065
Si 4.0795723667 1.3582045036 4.0739844738
Si 1.3579144916 1.3583973571 1.3642504927
Si 1.3547124450 4.0738286606 4.0769425026
Si 4.0623106994 4.0714306817 1.3533162334
8
i = 4, time = 2.000, E = -31.2979656110
Si 0.0065431987 -0.0006828491 -0.0024384492
Si -0.0002922176 2.7183789795 2.7114303865
Si 2.7085880403 2.7165169709 -0.0073992905
Si 2.7250049622 -0.0041359798 2.7196741472
Si 4.0817720397 1.3583868923 4.0743043346
Si 1.3580100514 1.3586307841 1.3664503252
Si 1.3537254076 4.0741007430 4.0782307899
Si 4.0587409176 4.0708968590 1.3518401563
&GLOBAL
PROJECT si_md
RUN_TYPE MD
PRINT_LEVEL MEDIUM
&END GLOBAL