Commit 827e261a authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge remote-tracking branch 'origin/normalizer' into dev-merge.

parents 7b0a1bea f87ba5e2
......@@ -92,8 +92,10 @@ tests:
NOMAD_MONGO_HOST: mongo
NOMAD_KEYCLOAK_CLIENT_SECRET: ${CI_KEYCLOAK_TEST_CLIENT_SECRET}
NOMAD_KEYCLOAK_PASSWORD: ${CI_KEYCLOAK_ADMIN_PASSWORD}
NOMAD_SPRINGER_DB_PATH: /nomad/fairdi/db/data/springer.db
script:
- cd /app
- ls /builds
- python -m pytest --cov=nomad -sv tests
except:
refs:
......
......@@ -207,6 +207,10 @@ max_upload_size = 32 * (1024 ** 3)
raw_file_strip_cutoff = 1000
springer_db_relative_path = 'normalizing/data/SM_all08.db'
springer_db_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), springer_db_relative_path)
def normalize_loglevel(value, default_level=logging.INFO):
plain_value = value
if plain_value is None:
......
......@@ -33,14 +33,15 @@ There is one ABC for all normalizer:
from typing import List, Any, Iterable, Type
from .normalizer import Normalizer
from .system import SystemNormalizer
from .dos import DosNormalizer
from .fhiaims import FhiAimsBaseNormalizer
from .normalizer import Normalizer
from .optimade import OptimadeNormalizer
from .system import SystemNormalizer
normalizers: Iterable[Type[Normalizer]] = [
SystemNormalizer,
OptimadeNormalizer,
FhiAimsBaseNormalizer
FhiAimsBaseNormalizer,
DosNormalizer
]
SM_all08.db
\ No newline at end of file
SM_all08.db
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .normalizer import Normalizer
import numpy as np
class DosNormalizer(Normalizer):
def normalize(self, logger=None) -> None:
if logger is not None:
self.logger = logger.bind(normalizer=self.__class__.__name__)
# 'scc': single_configuration_calculation
section_scc_indices = self._backend.get_sections('section_single_configuration_calculation')
for scc_index in section_scc_indices:
section_dos_indices = self._backend.get_sections('section_dos', scc_index)
for dos_index in section_dos_indices:
try:
dos = self._backend.get_value('dos_values', dos_index) # a numpy.ndarray
except KeyError:
# section dos without doc_values
continue
system_index = self._backend.get_value(
'single_configuration_calculation_to_system_ref', scc_index)
atom_positions = self._backend.get_value('atom_positions', system_index)
lattice_vectors = self._backend.get_value('lattice_vectors', system_index)
number_of_atoms = np.shape(atom_positions)[0]
unit_cell_volume = np.linalg.det(lattice_vectors)
# Final quantities
dos_normed = dos / (number_of_atoms * unit_cell_volume)
# Add quantities to NOMAD's Metainfo
self._backend.addArrayValues('dos_values_normalized', dos_normed, dos_index)
......@@ -12,11 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import Counter
from typing import Any
import ase
import numpy as np
import json
import re
import os
import sqlite3
import functools
import fractions
......@@ -28,13 +31,34 @@ from nomadcore.structure_types import structure_types_by_spacegroup as str_types
from nomad import utils, config
from nomad.normalizing.normalizer import SystemBasedNormalizer
# use a regular expression to check atom labels; expression is build from list of
# all labels sorted desc to find Br and not B when searching for Br.
atom_label_re = re.compile('|'.join(
sorted(ase.data.chemical_symbols, key=lambda x: len(x), reverse=True)))
springer_db_connection = None
def open_springer_database():
"""
Create a global connection to the Springer database in a way that
each worker opens the database just once.
"""
global springer_db_connection
if springer_db_connection is None:
# filepath definition in 'nomad-FAIR/nomad/config.py'
db_file = config.springer_db_path
if not os.path.exists(db_file):
utils.get_logger(__name__).error('Springer database not found')
return None
springer_db_connection = sqlite3.connect(db_file, check_same_thread=False, uri=True)
# we lift the thread check because we share the connection among workers
# 'uri=True': open a database in read-only mode
return springer_db_connection
def normalized_atom_labels(atom_labels):
"""
Normalizes the given atom labels: they either are labels right away, or contain
......@@ -46,6 +70,26 @@ def normalized_atom_labels(atom_labels):
for match in [re.search(atom_label_re, atom_label) for atom_label in atom_labels]]
def formula_normalizer(atoms):
"""
Reads the chemical symbols in ase.atoms and returns a normalized formula.
Formula normalization is on the basis of atom counting,
e.g., Tc -> Tc100, SZn -> S50Zn50, Co2Nb -> Co67Nb33
"""
#
chem_symb = atoms.get_chemical_symbols()
atoms_counter = Counter(chem_symb) # dictionary
atoms_total = sum(atoms_counter.values())
atoms_normed = []
for key in atoms_counter.keys():
norm = str(round(100 * atoms_counter[key] / atoms_total))
atoms_normed.append(key + norm)
#
atoms_normed.sort()
return ''.join(atoms_normed)
class SystemNormalizer(SystemBasedNormalizer):
"""
......@@ -345,11 +389,97 @@ class SystemNormalizer(SystemBasedNormalizer):
self._backend.addArrayValues('wyckoff_letters_original', orig_wyckoff)
self._backend.addArrayValues('equivalent_atoms_original', orig_equivalent_atoms)
self._backend.closeSection('section_original_system', orig_gid)
self._backend.closeSection('section_symmetry', symmetry_gid)
self.springer_classification(atoms, space_group_number) # Springer Normalizer
self.prototypes(prim_num, prim_wyckoff, space_group_number)
self._backend.closeSection('section_symmetry', symmetry_gid)
def springer_classification(self, atoms, space_group_number):
# SPRINGER NORMALIZER
normalized_formula = formula_normalizer(atoms)
#
springer_db_connection = open_springer_database()
if springer_db_connection is None:
return
cur = springer_db_connection.cursor()
# SQL QUERY
# (this replaces the four queries done in the old 'classify4me_SM_normalizer.py')
cur.execute("""
SELECT
entry.entry_id,
entry.alphabetic_formula,
GROUP_CONCAT(DISTINCT compound_classes.compound_class_name),
GROUP_CONCAT(DISTINCT classification.classification_name)
FROM entry
LEFT JOIN entry_compound_class as ecc ON ecc.entry_nr = entry.entry_nr
LEFT JOIN compound_classes ON ecc.compound_class_nr = compound_classes.compound_class_nr
LEFT JOIN entry_classification as ec ON ec.entry_nr = entry.entry_nr
LEFT JOIN classification ON ec.classification_nr = classification.classification_nr
LEFT JOIN entry_reference as er ON er.entry_nr = entry.entry_nr
LEFT JOIN reference ON reference.reference_nr = er.entry_nr
WHERE entry.normalized_formula = ( %r ) and entry.space_group_number = '%d'
GROUP BY entry.entry_id;
""" % (normalized_formula, space_group_number))
results = cur.fetchall()
# 'results' is a list of tuples, i.e. '[(a,b,c,d), ..., (a,b,c,d)]'
# All SQL queries done
# Storing 'results' in a dictionary
dbdict = {}
for ituple in results:
# 'spr' means 'springer'
spr_id = ituple[0]
spr_aformula = ituple[1] # alphabetical formula
spr_url = 'http://materials.springer.com/isp/crystallographic/docs/' + spr_id
spr_compound = ituple[2].split(',') # split to convert string to list
spr_classification = ituple[3].split(',')
#
spr_compound.sort()
spr_classification.sort()
#
dbdict[spr_id] = {'spr_id': spr_id,
'spr_aformula': spr_aformula,
'spr_url': spr_url,
'spr_compound': spr_compound,
'spr_classification': spr_classification}
# =============
# SPRINGER's METAINFO UPDATE
# LAYOUT: Five sections under 'section_springer_material' for each material ID:
# id, alphabetical formula, url, compound_class, clasification.
# As per Markus/Luca's emails, we don't expose Springer bib references (Springer's paywall)
for material in dbdict.values():
self._backend.openNonOverlappingSection('section_springer_material')
self._backend.addValue('springer_id', material['spr_id'])
self._backend.addValue('springer_alphabetical_formula', material['spr_aformula'])
self._backend.addValue('springer_url', material['spr_url'])
self._backend.addArrayValues('springer_compound_class', material['spr_compound'])
self._backend.addArrayValues('springer_classification', material['spr_classification'])
self._backend.closeNonOverlappingSection('section_springer_material')
# Check the 'springer_classification' and 'springer_compound_class' information
# found is the same for all springer_id's
dkeys = list(dbdict.keys())
if len(dkeys) != 0:
class_0 = dbdict[dkeys[0]]['spr_classification']
comp_0 = dbdict[spr_id]['spr_compound']
# compare 'class_0' and 'comp_0' against the rest
for ii in range(1, len(dkeys)):
class_test = (class_0 == dbdict[dkeys[ii]]['spr_classification'])
comp_test = (comp_0 == dbdict[dkeys[ii]]['spr_compound'])
if (class_test or comp_test) is False:
self.logger.warning('Mismatch in Springer classification or compounds')
def prototypes(self, atomSpecies, wyckoffs, spg_nr):
try:
norm_wyckoff = SystemNormalizer.get_normalized_wyckoff(atomSpecies, wyckoffs)
......
......@@ -60,3 +60,5 @@ data:
client_id: "{{ .Values.keycloak.clientId }}"
client_secret: "*"
password: "*"
springer_db_path: "{{ .Values.springerDbPath }}"
......@@ -150,3 +150,5 @@ volumes:
## Everything else
# The domain configuration, currently there is DFT and EMS
domain: DFT
springerDbPath: /nomad/fairdi/db/data/springer.db
This diff is collapsed.
......@@ -13,6 +13,7 @@
# limitations under the License.
import pytest
import numpy as np
from nomad import datamodel, config
from nomad.parsing import LocalBackend
......@@ -37,6 +38,12 @@ unknown_atom_label = (
fcc_symmetry = (
'parsers/template', 'tests/data/normalizers/fcc_crystal_structure.json')
vasp_parser = (
'parsers/vasp', 'tests/data/parsers/vasp/vasp.xml')
vasp_parser_dos = (
'parsers/vasp', 'tests/data/parsers/vasp/vasp_dos.xml')
glucose_atom_labels = (
'parsers/template', 'tests/data/normalizers/glucose_atom_labels.json')
......@@ -194,3 +201,55 @@ def test_reduced_chemical_formula():
expected_red_chem_formula = 'C6H12O6'
reduced_chemical_formula = backend.get_value('chemical_composition_bulk_reduced')
assert expected_red_chem_formula == reduced_chemical_formula
def test_vasp_incar_system():
"""
Ensure we can test an incar value in the VASP example
"""
backend = parse_file(vasp_parser)
backend = run_normalize(backend)
expected_value = 'SrTiO3' # material's formula in vasp.xml
# backend_value = backend.get_value('x_vasp_unknown_incars') # OK
# backend_value = backend.get_value('x_vasp_atom_kind_refs') # OK
backend_value = backend.get_value('x_vasp_incar_SYSTEM') # OK
print("backend_value: ", backend_value)
assert expected_value == backend_value
def test_springer_normalizer():
"""
Ensure the Springer normalizer works well with the VASP example.
"""
backend = parse_file(vasp_parser)
backend = run_normalize(backend)
backend_value = backend.get_value('springer_id', 89)
expected_value = 'sd_1932539'
assert expected_value == backend_value
backend_value = backend.get_value('springer_alphabetical_formula', 89)
expected_value = 'O3SrTi'
assert expected_value == backend_value
backend_value = backend.get_value('springer_url', 89)
expected_value = 'http://materials.springer.com/isp/crystallographic/docs/sd_1932539'
assert expected_value == backend_value
def test_dos_normalizer():
"""
Ensure the DOS normalizer acted on the DOS values. We take a VASP example.
"""
backend = parse_file(vasp_parser_dos)
backend = run_normalize(backend)
# Check if 'dos_values' were indeed normalized
# 'dvn' stands for 'dos_values_normalized'
backend_dvn = backend.get_value('dos_values_normalized', 0)
last_value = backend_dvn[0, -1]
expected = 1.7362195274239454e+47
# Compare floats properly with numpy (delta tolerance involved)
assert np.allclose(last_value, expected)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment