Commit 1995952d authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'v0.8.4' into 'master'

V0.8.4

See merge request !139
parents cc540ed5 8d9744c5
Pipeline #79921 passed with stages
in 20 minutes and 15 seconds
......@@ -33,10 +33,10 @@ class MethodNormalizer():
"""A base class that is used for processing method related information
in the Encylopedia.
"""
def __init__(self, backend, logger):
self.backend = backend
def __init__(self, entry_archive, logger):
self.logger = logger
self.section_run = backend.entry_archive.section_run[0]
self.entry_archive = entry_archive
self.section_run = entry_archive.section_run[0]
def method_id(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section):
method_dict = RestrictedDict(
......@@ -76,7 +76,7 @@ class MethodNormalizer():
)
# Only calculations from the same upload are grouped
eos_dict['upload_id'] = self.backend.entry_archive.section_metadata.upload_id
eos_dict['upload_id'] = self.entry_archive.section_metadata.upload_id
# Method
eos_dict["method_id"] = method.method_id
......@@ -106,11 +106,11 @@ class MethodNormalizer():
)
# Only calculations from the same upload are grouped
param_dict['upload_id'] = self.backend.entry_archive.section_metadata.upload_id
param_dict['upload_id'] = self.entry_archive.section_metadata.upload_id
# The same code and functional type is required
param_dict['program_name'] = self.backend["program_name"]
param_dict['program_version'] = self.backend["program_version"]
param_dict['program_name'] = self.section_run.program_name
param_dict['program_version'] = self.section_run.program_version
# Get a string representation of the geometry. It is included as the
# geometry should remain the same during parameter variation. By simply
......@@ -166,7 +166,7 @@ class MethodDFTNormalizer(MethodNormalizer):
"""
def core_electron_treatment(self, method: Method) -> None:
treatment = config.services.unavailable_value
code_name = self.backend["program_name"]
code_name = self.section_run.program_name
if code_name is not None:
core_electron_treatments = {
'VASP': 'pseudopotential',
......@@ -382,10 +382,10 @@ class MethodDFTNormalizer(MethodNormalizer):
# Fetch resources
repr_method = context.representative_method
repr_system = context.representative_system
sec_enc = self.backend.entry_archive.section_metadata.encyclopedia
sec_enc = self.entry_archive.section_metadata.encyclopedia
method = sec_enc.method
material = sec_enc.material
settings_basis_set = get_basis_set(context, self.backend, self.logger)
settings_basis_set = get_basis_set(context, self.entry_archive, self.logger)
# Fill metainfo
self.core_electron_treatment(method)
......@@ -421,7 +421,7 @@ class MethodGWNormalizer(MethodDFTNormalizer):
def normalize(self, context: Context) -> None:
# Fetch resources
repr_method = context.representative_method
sec_enc = self.backend.entry_archive.section_metadata.encyclopedia
sec_enc = self.entry_archive.section_metadata.encyclopedia
method = sec_enc.method
# Fill metainfo
......
......@@ -17,7 +17,6 @@ from nomad.datamodel.encyclopedia import (
Properties,
Energies,
)
from nomad.parsing.legacy import Backend
from nomad.metainfo import Section
from nomad.normalizing.encyclopedia.context import Context
......@@ -26,8 +25,8 @@ class PropertiesNormalizer():
"""A base class that is used for processing calculated quantities that
should be extracted to Encyclopedia.
"""
def __init__(self, backend: Backend, logger):
self.backend = backend
def __init__(self, entry_archive, logger):
self.entry_archive = entry_archive
self.logger = logger
def electronic_band_structure(self, properties: Properties, calc_type: str, material_type: str, context: Context, sec_system: Section) -> None:
......@@ -131,7 +130,7 @@ class PropertiesNormalizer():
"""
try:
resolved_section = None
frame_sequences = self.backend.entry_archive.section_run[0].section_frame_sequence
frame_sequences = self.entry_archive.section_run[0].section_frame_sequence
for frame_sequence in reversed(frame_sequences):
thermodynamical_props = frame_sequence.section_thermodynamical_properties
for thermodynamical_prop in thermodynamical_props:
......@@ -224,7 +223,7 @@ class PropertiesNormalizer():
return
# Fetch resources
sec_enc = self.backend.entry_archive.section_metadata.encyclopedia
sec_enc = self.entry_archive.section_metadata.encyclopedia
properties = sec_enc.properties
calc_type = context.calc_type
material_type = context.material_type
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
import os.path
import json
import numpy as np
......@@ -25,12 +24,9 @@ controlIn_nucleus = 'x_fhi_aims_controlIn_nucleus'
pure_types_json = dict()
files = glob.glob(os.path.join(__file__, "data/*.json"))
for file in files:
pure_types_str = os.path.basename(os.path.split(file)[-1])
with open(file) as data_file:
json_data = json.load(data_file)
for pure_types_str in ['light', 'really_tight', 'tight']:
with open(os.path.join(os.path.dirname(__file__), 'data', pure_types_str + '.json')) as f:
json_data = json.load(f)
section_method = json_data['sections']['section_run-0']['sections']['section_method-0']
pure_types_json[pure_types_str] = section_method[controlIn_basis_set]
......@@ -90,14 +86,13 @@ class FhiAimsBaseNormalizer(Normalizer):
def normalize(self, logger=None) -> None:
super().normalize(logger)
if not self.section_run or self.section_run.program_name != 'FHI-aims':
return
for index in self._backend.get_sections('section_method'):
try:
to_compare = self._backend.get_value(controlIn_basis_set, index)
if to_compare is None:
# not fhi aims data
continue
except KeyError:
for method in self.section_run.section_method:
to_compare = getattr(method, controlIn_basis_set, None)
if to_compare is None:
# not fhi aims data
continue
matrix_hits_int = dict.fromkeys(pure_types_json, 0)
......@@ -112,9 +107,6 @@ class FhiAimsBaseNormalizer(Normalizer):
# matrix_hits[key]=matrix_hits[key]+CompareToDefaults(val[AtomIndex],to_compare[i])
context_uri = '/section_run/0/section_method/%d' % index
self._backend.openContext(context_uri)
closest_base_int = min(matrix_hits_int, key=matrix_hits_int.get)
if (matrix_hits_basis[min(matrix_hits_basis, key=matrix_hits_basis.get)] == 0):
closest_base_base = ''
......@@ -122,18 +114,11 @@ class FhiAimsBaseNormalizer(Normalizer):
closest_base_base = '+'
if (matrix_hits_int[closest_base_int] == 0):
# print(closest_base_int +closest_base_base)
self._backend.addValue('basis_set', closest_base_int + closest_base_base)
method.basis_set = closest_base_int + closest_base_base
elif(matrix_hits_int[closest_base_int] <= 5):
# print('~'+closest_base_int+closest_base_base)
self._backend.addValue('basis_set', '~' + closest_base_int + closest_base_base)
method.basis_set = '~' + closest_base_int + closest_base_base
elif(matrix_hits_int[closest_base_int] > 5):
self._backend.addValue('basis_set', 'custom-' + closest_base_int)
# print('custom-'+closest_base_int)
self._backend.closeContext(context_uri)
self._backend.finishedParsingSession("ParseSuccess", None)
method.basis_set = 'custom-' + closest_base_int
# import setup_paths
......@@ -251,8 +236,6 @@ class FhiAimsBaseNormalizer(Normalizer):
# # matrix_hits[key]=matrix_hits[key]+CompareToDefaults(val[AtomIndex],to_compare[i])
# Copen=backend.openContext(context)
# closest_base_int=min(matrix_hits_int, key=matrix_hits_int.get)
# if (matrix_hits_basis[min(matrix_hits_basis, key=matrix_hits_basis.get)] ==0):
......@@ -269,8 +252,6 @@ class FhiAimsBaseNormalizer(Normalizer):
# backend.addValue("basis_set",'custom-'+closest_base_int)
# # print('custom-'+closest_base_int)
# backend.closeContext(context)
# backend.finishedParsingSession("ParseSuccess", None)
# sys.stdout.flush()
# return
......
......@@ -15,27 +15,27 @@
from abc import ABCMeta, abstractmethod
from typing import List
from nomad.parsing import Backend
from nomad.utils import get_logger
from nomad.metainfo import MSection
from nomad.datamodel import EntryArchive
class Normalizer(metaclass=ABCMeta):
'''
A base class for normalizers. Normalizers work on a :class:`Backend` instance
A base class for normalizers. Normalizers work on a :class:`EntryArchive` section
for read and write. Normalizer instances are reused.
Arguments:
backend: The backend used to read and write data from and to.
entry_archive: The entry_archive root section of the archive to normalize.
'''
domain = 'dft'
''' The domain this normalizer should be used in. Default for all normalizer is 'DFT'. '''
def __init__(self, backend: Backend) -> None:
self._backend = backend
def __init__(self, entry_archive: EntryArchive) -> None:
self.entry_archive = entry_archive
try:
self.section_run = backend.entry_archive.section_run[0]
self.section_run = entry_archive.section_run[0]
except (AttributeError, IndexError):
self.section_run = None
self.logger = get_logger(__name__)
......@@ -57,8 +57,8 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
Args:
only_representatives: Will only normalize the `representative` systems.
'''
def __init__(self, backend: Backend, only_representatives: bool = False):
super().__init__(backend)
def __init__(self, entry_archive: EntryArchive, only_representatives: bool = False):
super().__init__(entry_archive)
self.only_representatives = only_representatives
@property
......@@ -73,13 +73,7 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
]
def _normalize_system(self, system, is_representative):
context = '/section_run/0/section_system/%d' % system.m_parent_index
self._backend.openContext(context)
try:
return self.normalize_system(system, is_representative)
finally:
self._backend.closeContext(context)
return self.normalize_system(system, is_representative)
@abstractmethod
def normalize_system(self, system: MSection, is_representative: bool) -> bool:
......@@ -193,6 +187,6 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
# All the rest if requested
if not self.only_representatives:
for isys, system in enumerate(self._backend.entry_archive.section_run[0].section_system):
for isys, system in enumerate(self.section_run.section_system):
if isys != repr_sys_idx:
self.__normalize_system(system, False, logger)
......@@ -12,17 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Dict, cast
from typing import Any, Dict
import numpy as np
import re
import ase.data
from string import ascii_uppercase
import pint.quantity
from nomad.parsing.legacy import Backend
from nomad.normalizing.normalizer import SystemBasedNormalizer
from nomad.units import ureg
from nomad.datamodel import OptimadeEntry, Species, DFTMetadata, EntryMetadata
from nomad.datamodel.metainfo.public import section_system
species_re = re.compile(r'^([A-Z][a-z]?)(\d*)$')
......@@ -43,16 +43,15 @@ class OptimadeNormalizer(SystemBasedNormalizer):
Normalizes geometry, classifies, system_type, and runs symmetry analysis.
'''
backend = cast(Backend, self._backend)
if backend.entry_archive.section_metadata is None:
backend.entry_archive.m_create(EntryMetadata)
if backend.entry_archive.section_metadata.dft is None:
backend.entry_archive.section_metadata.m_create(DFTMetadata)
optimade = backend.entry_archive.section_metadata.dft.m_create(OptimadeEntry)
if self.entry_archive.section_metadata is None:
self.entry_archive.m_create(EntryMetadata)
if self.entry_archive.section_metadata.dft is None:
self.entry_archive.section_metadata.m_create(DFTMetadata)
optimade = self.entry_archive.section_metadata.dft.m_create(OptimadeEntry)
def get_value(key: str, default: Any = None, numpy: bool = False, unit=None) -> Any:
def get_value(quantity_def, default: Any = None, numpy: bool = False, unit=None) -> Any:
try:
value = self._backend.get_value(key, index)
value = self.section_run.section_system[-1].m_get(quantity_def)
if type(value) == np.ndarray and not numpy:
return value.tolist()
if isinstance(value, list) and numpy:
......@@ -70,7 +69,7 @@ class OptimadeNormalizer(SystemBasedNormalizer):
from nomad.normalizing.system import normalized_atom_labels
nomad_species = get_value('atom_labels')
nomad_species = get_value(section_system.atom_labels)
# elements
atoms = normalized_atom_labels(nomad_species)
......@@ -89,8 +88,8 @@ class OptimadeNormalizer(SystemBasedNormalizer):
for element in optimade.elements]
# formulas
optimade.chemical_formula_reduced = get_value('chemical_composition_reduced')
optimade.chemical_formula_hill = get_value('chemical_composition_bulk_reduced')
optimade.chemical_formula_reduced = get_value(section_system.chemical_composition_reduced)
optimade.chemical_formula_hill = get_value(section_system.chemical_composition_bulk_reduced)
optimade.chemical_formula_descriptive = optimade.chemical_formula_hill
optimade.chemical_formula_anonymous = ''
for i in range(len(optimade.elements)):
......@@ -102,11 +101,11 @@ class OptimadeNormalizer(SystemBasedNormalizer):
# sites
optimade.nsites = len(nomad_species)
optimade.species_at_sites = nomad_species
optimade.lattice_vectors = get_value('lattice_vectors', numpy=True, unit=ureg.m)
optimade.cartesian_site_positions = get_value('atom_positions', numpy=True, unit=ureg.m)
optimade.lattice_vectors = get_value(section_system.lattice_vectors, numpy=True, unit=ureg.m)
optimade.cartesian_site_positions = get_value(section_system.atom_positions, numpy=True, unit=ureg.m)
optimade.dimension_types = [
1 if value else 0
for value in get_value('configuration_periodic_dimensions')]
for value in get_value(section_system.configuration_periodic_dimensions)]
# species
for species_label in set(nomad_species):
......
......@@ -24,6 +24,9 @@ from matid.classifications import Class0D, Atom, Class1D, Material2D, Surface, C
from nomad import atomutils, archive
from nomad import utils, config
from nomad.datamodel.metainfo.public import section_symmetry, section_std_system, \
section_primitive_system, section_original_system, section_springer_material, \
section_prototype, section_system
from .normalizer import SystemBasedNormalizer
......@@ -89,10 +92,13 @@ class SystemNormalizer(SystemBasedNormalizer):
Returns: True, iff the normalization was successful
'''
if self.section_run is None:
self.logger.error('section_run is not present.')
return False
def get_value(key: str, default: Any = None, numpy: bool = True) -> Any:
def get_value(quantity_def, default: Any = None, numpy: bool = True) -> Any:
try:
value = self._backend.get_value(key, system.m_parent_index)
value = system.m_get(quantity_def)
if not numpy and type(value).__module__ == np.__name__:
value = value.tolist()
......@@ -103,18 +109,15 @@ class SystemNormalizer(SystemBasedNormalizer):
except (KeyError, IndexError):
return default
def set_value(key: str, value: Any):
self._backend.addValue(key, value)
if is_representative:
self._backend.addValue('is_representative', is_representative)
system.is_representative = is_representative
# analyze atoms labels
atom_labels = get_value('atom_labels', numpy=False)
atom_labels = get_value(section_system.atom_labels, numpy=False)
if atom_labels is not None:
atom_labels = normalized_atom_labels(atom_labels)
atom_species = get_value('atom_species', numpy=False)
atom_species = get_value(section_system.atom_species, numpy=False)
if atom_labels is None and atom_species is None:
self.logger.warn('system has neither atom species nor labels')
return False
......@@ -127,7 +130,7 @@ class SystemNormalizer(SystemBasedNormalizer):
self.logger.error('system has atom species that are out of range')
return False
self._backend.addArrayValues('atom_labels', atom_labels)
system.atom_labels = atom_labels
# At this point we should have atom labels.
try:
......@@ -144,7 +147,7 @@ class SystemNormalizer(SystemBasedNormalizer):
if atom_species is None:
atom_species = atoms.get_atomic_numbers().tolist()
self._backend.addArrayValues('atom_species', atom_species)
system.atom_species = atom_species
else:
if not isinstance(atom_species, list):
atom_species = [atom_species]
......@@ -153,14 +156,14 @@ class SystemNormalizer(SystemBasedNormalizer):
'atom species do not match labels',
atom_labels=atom_labels[:10], atom_species=atom_species[:10])
atom_species = atoms.get_atomic_numbers().tolist()
set_value('atom_species', atom_species)
system.atom_species = atom_species
# periodic boundary conditions
pbc = get_value('configuration_periodic_dimensions', numpy=False)
pbc = get_value(section_system.configuration_periodic_dimensions, numpy=False)
if pbc is None:
pbc = [False, False, False]
self.logger.warning('missing configuration_periodic_dimensions')
set_value('configuration_periodic_dimensions', pbc)
system.configuration_periodic_dimensions = pbc
try:
atoms.set_pbc(pbc)
except Exception as e:
......@@ -169,12 +172,12 @@ class SystemNormalizer(SystemBasedNormalizer):
return False
# formulas
set_value('chemical_composition', atoms.get_chemical_formula(mode='all'))
set_value('chemical_composition_reduced', atoms.get_chemical_formula(mode='reduce'))
set_value('chemical_composition_bulk_reduced', atoms.get_chemical_formula(mode='hill'))
system.chemical_composition = atoms.get_chemical_formula(mode='all')
system.chemical_composition_reduced = atoms.get_chemical_formula(mode='reduce')
system.chemical_composition_bulk_reduced = atoms.get_chemical_formula(mode='hill')
# positions
atom_positions = get_value('atom_positions', None, numpy=True)
atom_positions = get_value(section_system.atom_positions, numpy=True)
if atom_positions is None:
self.logger.warning('no atom positions, skip further system analysis')
return False
......@@ -191,11 +194,11 @@ class SystemNormalizer(SystemBasedNormalizer):
return False
# lattice vectors
lattice_vectors = get_value('lattice_vectors', numpy=True)
lattice_vectors = get_value(section_system.lattice_vectors, numpy=True)
if lattice_vectors is None:
lattice_vectors = get_value('simulation_cell', numpy=True)
lattice_vectors = get_value(section_system.simulation_cell, numpy=True)
if lattice_vectors is not None:
set_value('lattice_vectors', lattice_vectors)
system.lattice_vectors = lattice_vectors
if lattice_vectors is None:
if any(pbc):
self.logger.error('no lattice vectors but periodicity', pbc=pbc)
......@@ -213,7 +216,7 @@ class SystemNormalizer(SystemBasedNormalizer):
atoms.cell.tolist() if atoms.cell is not None else None,
atoms.pbc.tolist()]
configuration_id = utils.hash(json.dumps(configuration).encode('utf-8'))
set_value('configuration_raw_gid', configuration_id)
system.configuration_raw_gid = configuration_id
if is_representative:
# Save the Atoms as a temporary variable
......@@ -226,8 +229,7 @@ class SystemNormalizer(SystemBasedNormalizer):
system_size=len(atoms)):
self.system_type_analysis(atoms)
system_type = self._backend.get_value("system_type")
system_type = system.system_type
# Symmetry analysis
if atom_positions is not None and (lattice_vectors is not None or not any(pbc)) and system_type == "bulk":
with utils.timer(
......@@ -240,7 +242,7 @@ class SystemNormalizer(SystemBasedNormalizer):
def system_type_analysis(self, atoms: Atoms) -> None:
'''
Determine the system type with MatID. Write the system type to the
backend.
entry_archive.
Args:
atoms: The structure to analyse
......@@ -248,7 +250,7 @@ class SystemNormalizer(SystemBasedNormalizer):
system_type = config.services.unavailable_value
if len(atoms) <= config.normalize.system_classification_with_clusters_threshold:
try:
classifier = Classifier(cluster_threshold=config.normalize.cluster_threshold)
classifier = Classifier(radii="covalent", cluster_threshold=config.normalize.cluster_threshold)
cls = classifier.classify(atoms)
except Exception as e:
self.logger.error(
......@@ -269,22 +271,23 @@ class SystemNormalizer(SystemBasedNormalizer):
system_type = '2D'
else:
self.logger.info("system type analysis not run due to large system size")
self._backend.addValue('system_type', system_type)
idx = self.section_run.m_cache["representative_system_idx"]
self.section_run.section_system[idx].system_type = system_type
self.section_run.section_system[-1].system_type = system_type
def symmetry_analysis(self, system, atoms: ase.Atoms) -> None:
'''Analyze the symmetry of the material being simulated. Only performed
for bulk materials.
We feed in the parsed values in section_system to the the symmetry
analyzer. The analysis results are written to the backend.
analyzer. The analysis results are written to the entry_archive.
Args:
atoms: The atomistic structure to analyze.
Returns:
None: The method should write symmetry variables
to the backend which is member of this class.
to the entry_archive which is member of this class.
'''
# Try to use MatID's symmetry analyzer to analyze the ASE object.
try:
......@@ -329,7 +332,8 @@ class SystemNormalizer(SystemBasedNormalizer):
# Write data extracted from MatID's symmetry analysis to the
# representative section_system.
sec_symmetry = self._backend.openSection("section_symmetry", return_section=True)
sec_symmetry = system.m_create(section_symmetry)
sec_symmetry.m_cache["symmetry_analyzer"] = symm
sec_symmetry.symmetry_method = 'MatID (spg)'
......@@ -343,21 +347,21 @@ class SystemNormalizer(SystemBasedNormalizer):
sec_symmetry.origin_shift = origin_shift
sec_symmetry.transformation_matrix = transform
sec_std = self._backend.openSection("section_std_system", return_section=True)
sec_std = sec_symmetry.m_create(section_std_system)
sec_std.lattice_vectors_std = conv_cell
sec_std.atom_positions_std = conv_pos
sec_std.atomic_numbers_std = conv_num
sec_std.wyckoff_letters_std = conv_wyckoff
sec_std.equivalent_atoms_std = conv_equivalent_atoms
sec_prim = self._backend.openSection("section_primitive_system", return_section=True)
sec_prim = sec_symmetry.m_create(section_primitive_system)
sec_prim.lattice_vectors_primitive = prim_cell
sec_prim.atom_positions_primitive = prim_pos
sec_prim.atomic_numbers_primitive = prim_num
sec_prim.wyckoff_letters_primitive = prim_wyckoff
sec_prim.equivalent_atoms_primitive = prim_equivalent_atoms
sec_orig = self._backend.openSection("section_original_system", return_section=True)
sec_orig = sec_symmetry.m_create(section_original_system)
sec_orig.wyckoff_letters_original = orig_wyckoff
sec_orig.equivalent_atoms_original = orig_equivalent_atoms
......@@ -367,25 +371,24 @@ class SystemNormalizer(SystemBasedNormalizer):
def springer_classification(self, atoms, space_group_number):
normalized_formula = formula_normalizer(atoms)
springer_data = query_springer_data(normalized_formula, space_group_number)
idx = self.section_run.m_cache["representative_system_idx"]
for material in springer_data.values():
self._backend.openNonOverlappingSection('section_springer_material')
sec_springer_mat = self.section_run.section_system[idx].m_create(section_springer_material)
self._backend.addValue('springer_id', material['spr_id'])
self._backend.addValue('springer_alphabetical_formula', material['spr_aformula'])
self._backend.addValue('springer_url', material['spr_url'])
sec_springer_mat.springer_id = material['spr_id']
sec_springer_mat.springer_alphabetical_formula = material['spr_aformula']
sec_springer_mat.springer_url = material['spr_url']
compound_classes = material['spr_compound']
if compound_classes is None:
compound_classes = []
self._backend.addArrayValues('springer_compound_class', compound_classes)
sec_springer_mat.springer_compound_class = compound_classes
classifications = material['spr_classification']
if classifications is None:
classifications = []
self._backend.addArrayValues('springer_classification', classifications)
self._backend.closeNonOverlappingSection('section_springer_material')
sec_springer_mat.springer_classification = classifications
# Check the 'springer_classification' and 'springer_compound_class' information
# found is the same for all springer_id's
......@@ -424,7 +427,8 @@ class SystemNormalizer(SystemBasedNormalizer):
aflow_prototype_name,
protoDict.get("Pearsons Symbol", "-")
)
sec_prototype = self._backend.openSection("section_prototype", return_section=True)
idx = self.section_run.m_cache["representative_system_idx"]
sec_prototype = self.section_run.section_system[idx].m_create(section_prototype)
sec_prototype.prototype_label = prototype_label
sec_prototype.prototype_aflow_id = aflow_prototype_id
sec_prototype.prototype_aflow_url = aflow_prototype_url
......
......@@ -70,6 +70,6 @@ based on nomad@fairdi's metainfo: