Commit 1446182e authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Split the EncyclopediaNormalizer into meaningful submodules, added new module...

Split the EncyclopediaNormalizer into meaningful submodules, added new module atomutils.py at root level: contains utilities for atomistic data that are used across normalizers and possibly parsers.
parent daffd20a
Pipeline #71356 failed with stages
in 7 minutes and 40 seconds
......@@ -40,7 +40,7 @@ from .fhiaims import FhiAimsBaseNormalizer
from .normalizer import Normalizer
from .optimade import OptimadeNormalizer
from .system import SystemNormalizer
from .encyclopedia import EncyclopediaNormalizer
from .encyclopedia.encyclopedia import EncyclopediaNormalizer
normalizers: Iterable[Type[Normalizer]] = [
SystemNormalizer,
......
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
......@@ -8,7 +8,7 @@ from nomad.parsing.backend import Section
from nomad.utils import RestrictedDict
def get_basis_set_settings(context, backend: LocalBackend, logger) -> RestrictedDict:
def get_basis_set(context, backend: LocalBackend, logger) -> RestrictedDict:
"""Decide which type of basis set settings are applicable to the entry and
return a corresponding settings as a RestrictedDict.
......@@ -22,19 +22,19 @@ def get_basis_set_settings(context, backend: LocalBackend, logger) -> Restricted
RestrictedDict. If no suitable basis set settings could be identified,
returns None.
"""
settings: SettingsBasisSet = None
settings: BasisSet = None
program_name = backend.get('program_name')
if program_name == "exciting":
settings = SettingsBasisSetExciting(context, backend, logger)
settings = BasisSetExciting(context, backend, logger)
elif program_name == "FHI-aims":
settings = SettingsBasisSetFHIAims(context, backend, logger)
settings = BasisSetFHIAims(context, backend, logger)
else:
return None
return settings.to_dict()
class SettingsBasisSet(ABC):
class BasisSet(ABC):
"""Abstract base class for basis set settings. The idea is to create
subclasses that inherit this class and hierarchically add new mandatory and
optional settings with the setup()-function.
......@@ -69,7 +69,7 @@ class SettingsBasisSet(ABC):
return mandatory, optional
class SettingsBasisSetFHIAims(SettingsBasisSet):
class BasisSetFHIAims(BasisSet):
"""Basis set settings for 'FHI-Aims' (code-dependent).
"""
def setup(self) -> Tuple:
......@@ -134,7 +134,7 @@ class SettingsBasisSetFHIAims(SettingsBasisSet):
yield k
class SettingsBasisSetExciting(SettingsBasisSet):
class BasisSetExciting(BasisSet):
"""Basis set settings for 'Exciting' (code-dependent).
"""
def setup(self) -> Tuple:
......
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class Context():
"""A simple class for holding the context related to an Encylopedia entry.
"""
def __init__(
self,
material_type: str,
method_type: str,
calc_type: str,
representative_system,
representative_method,
representative_scc,
representative_scc_idx,
):
self.material_type = material_type
self.method_type = method_type
self.calc_type = calc_type
self.representative_system = representative_system
self.representative_method = representative_method
self.representative_scc = representative_scc
self.representative_scc_idx = representative_scc_idx
self.greatest_common_divisor: int = None
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any
from nomad.normalizing.normalizer import (
Normalizer,
s_run,
s_scc,
s_system,
s_method,
s_frame_sequence,
r_frame_sequence_to_sampling,
s_sampling_method,
r_frame_sequence_local_frames,
)
from nomad.metainfo.encyclopedia import (
Encyclopedia,
Material,
Method,
Properties,
Calculation,
)
from nomad.parsing.backend import LocalBackend
from nomad.normalizing.encyclopedia.context import Context
from nomad.normalizing.encyclopedia.material import MaterialBulkNormalizer, Material2DNormalizer, Material1DNormalizer
from nomad.normalizing.encyclopedia.method import MethodDFTNormalizer, MethodGWNormalizer
from nomad.normalizing.encyclopedia.properties import PropertiesNormalizer
from nomad import config
J_to_Ry = 4.587425e+17
class EncyclopediaNormalizer(Normalizer):
"""
This normalizer emulates the functionality of the old Encyclopedia backend.
The data used by the encyclopedia have been assigned under new metainfo
within a new section called "Encyclopedia". In the future these separate
metainfos could be absorbed into the existing metainfo hiearchy.
"""
def __init__(self, backend: LocalBackend):
super().__init__(backend)
self.backend: LocalBackend = backend
def calc_type(self, calc: Calculation) -> str:
"""Decides what type of calculation this is: single_point, md,
geometry_optimization, etc.
"""
calc_enums = Calculation.calculation_type.type
calc_type = calc_enums.unavailable
try:
sccs = self._backend[s_scc]
except Exception:
sccs = []
try:
frame_sequences = self._backend[s_frame_sequence]
except Exception:
frame_sequences = []
n_scc = len(sccs)
n_frame_seq = len(frame_sequences)
# No sequences, only a few calculations
if n_scc <= 3 and n_frame_seq == 0:
program_name = self._backend["program_name"]
if program_name == "elastic":
# TODO move to taylor expansion as soon as data is correct in archive
calc_type = calc_enums.elastic_constants
else:
calc_type = calc_enums.single_point
# One sequence. Currently calculations with multiple sequences are
# unsupported.
elif n_frame_seq == 1:
frame_seq = frame_sequences[0]
# See if sampling_method is present
try:
i_sampling_method = frame_seq[r_frame_sequence_to_sampling]
except KeyError:
self.logger.info(
"Cannot determine encyclopedia run type because missing "
"value for frame_sequence_to_sampling_ref."
)
return calc_type
# See if local frames are present
try:
frames = frame_seq[r_frame_sequence_local_frames]
except KeyError:
self.logger.info(
"section_frame_sequence_local_frames not found although a "
"frame_sequence exists."
)
return calc_type
if len(frames) == 0:
self.logger.info("No frames referenced in section_frame_sequence_local_frames.")
return calc_type
section_sampling_method = self._backend[s_sampling_method][i_sampling_method]
sampling_method = section_sampling_method["sampling_method"]
if sampling_method == "molecular_dynamics":
calc_type = calc_enums.molecular_dynamics
if sampling_method == "geometry_optimization":
calc_type = calc_enums.geometry_optimization
if sampling_method == "taylor_expansion":
calc_type = calc_enums.phonon_calculation
calc.calculation_type = calc_type
return calc_type
def material_type(self, material: Material) -> tuple:
# Try to fetch representative system
system = None
material_type = config.services.unavailable_value
material_enums = Material.material_type.type
system_idx = self._backend["section_run"][0].tmp["representative_system_idx"]
if system_idx is not None:
# Try to find system type information from backend for the selected system.
try:
system = self._backend[s_system][system_idx]
stype = system["system_type"]
except KeyError:
pass
else:
if stype == material_enums.one_d or stype == material_enums.two_d:
material_type = stype
# For bulk systems we also ensure that the symmetry information is available
if stype == material_enums.bulk:
try:
system["section_symmetry"][0]
except (KeyError, IndexError):
self.logger.info("Symmetry information is not available for a bulk system. No Encylopedia entry created.")
else:
material_type = stype
material.material_type = material_type
return system, material_type
def method_type(self, method: Method) -> tuple:
repr_method = None
method_id = config.services.unavailable_value
methods = self._backend[s_method]
n_methods = len(methods)
if n_methods == 1:
repr_method = methods[0]
method_id = repr_method.get("electronic_structure_method", config.services.unavailable_value)
elif n_methods > 1:
for sec_method in self._backend[s_method]:
# GW
electronic_structure_method = sec_method.get("electronic_structure_method", None)
if electronic_structure_method in {"G0W0", "scGW"}:
repr_method = sec_method
method_id = "GW"
break
# Methods linked to each other through references. Get all
# linked methods, try to get electronic_structure_method from
# each.
try:
refs = sec_method["section_method_to_method_refs"]
except KeyError:
pass
else:
linked_methods = [sec_method]
for ref in refs:
method_to_method_kind = ref["method_to_method_kind"]
method_to_method_ref = ref["method_to_method_ref"]
if method_to_method_kind == "core_settings":
linked_methods.append(methods[method_to_method_ref])
for i_method in linked_methods:
try:
electronic_structure_method = i_method["electronic_structure_method"]
except KeyError:
pass
else:
repr_method = sec_method
method_id = electronic_structure_method
method.method_type = method_id
return repr_method, method_id
def fill(self, context: Context):
# Fill structure related metainfo
struct: Any = None
if context.material_type == Material.material_type.type.bulk:
struct = MaterialBulkNormalizer(self.backend, self.logger)
elif context.material_type == Material.material_type.type.two_d:
struct = Material2DNormalizer(self.backend, self.logger)
elif context.material_type == Material.material_type.type.one_d:
struct = Material1DNormalizer(self.backend, self.logger)
if struct is not None:
struct.normalize(context)
# Fill method related metainfo
method = None
if context.method_type == Method.method_type.type.DFT or context.method_type == Method.method_type.type.DFTU:
method = MethodDFTNormalizer(self._backend, self.logger)
elif context.method_type == Method.method_type.type.GW:
method = MethodGWNormalizer(self._backend, self.logger)
if method is not None:
method.normalize(context)
# Fill properties related metainfo
properties = PropertiesNormalizer(self.backend, self.logger)
properties.normalize(context)
def normalize(self, logger=None) -> None:
"""The caller will automatically log if the normalizer succeeds or ends
up with an exception.
"""
try:
super().normalize(logger)
# Initialise metainfo structure
sec_enc = Encyclopedia()
material = sec_enc.m_create(Material)
method = sec_enc.m_create(Method)
sec_enc.m_create(Properties)
calc = sec_enc.m_create(Calculation)
# Determine run type, stop if unknown
calc_type = self.calc_type(calc)
if calc_type == config.services.unavailable_value:
self.logger.info(
"Unsupported run type for encyclopedia, encyclopedia metainfo not created.",
enc_status="unsupported_calc_type",
)
return
# Get the system type, stop if unknown
material_enums = Material.material_type.type
representative_system, material_type = self.material_type(material)
if material_type != material_enums.bulk and material_type != material_enums.two_d and material_type != material_enums.one_d:
self.logger.info(
"Unsupported system type for encyclopedia, encyclopedia metainfo not created.",
enc_status="unsupported_material_type",
)
return
# Get the method type, stop if unknown
representative_method, method_type = self.method_type(method)
if method_type == config.services.unavailable_value:
self.logger.info(
"Unsupported method type for encyclopedia, encyclopedia metainfo not created.",
enc_status="unsupported_method_type",
)
return
# Get representative scc
try:
representative_scc_idx = self._backend[s_run][0].tmp["representative_scc_idx"]
representative_scc = self._backend[s_scc][representative_scc_idx]
except (KeyError, IndexError):
representative_scc = None
representative_scc_idx = None
# Create one context that holds all details
context = Context(
material_type=material_type,
method_type=method_type,
calc_type=calc_type,
representative_system=representative_system,
representative_method=representative_method,
representative_scc=representative_scc,
representative_scc_idx=representative_scc_idx,
)
# Put the encyclopedia section into backend
self._backend.add_mi2_section(sec_enc)
self.fill(context)
except Exception:
self.logger.error(
"Failed to create an Encyclopedia entry due to an unhandlable exception.",
enc_status="failure",
)
raise # Reraise for the caller to log the exception as well
else:
self.logger.info(
"Successfully created metainfo for Encyclopedia.",
enc_status="success",
)
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List
from abc import abstractmethod
from collections import OrderedDict
import numpy as np
from nomad.normalizing.normalizer import (
s_method,
s_scc,
)
from nomad.metainfo.encyclopedia import (
Encyclopedia,
Material,
Method,
)
from nomad.parsing.backend import Section
from nomad.normalizing.encyclopedia.basisset import get_basis_set
from nomad.normalizing.encyclopedia.context import Context
from nomad.utils import RestrictedDict
from nomad import config
J_to_Ry = 4.587425e+17
class MethodNormalizer():
"""A base class that is used for processing method related information
in the Encylopedia.
"""
def __init__(self, backend, logger):
self.backend = backend
self.logger = logger
def method_hash(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section):
method_dict = RestrictedDict(
mandatory_keys=[
"program_name",
"subsettings",
],
forbidden_values=[None]
)
method_dict['program_name'] = self.backend["program_name"]
# The subclasses may define their own method properties that are to be
# included here.
subsettings = self.method_hash_dict(method, settings_basis_set, repr_method)
method_dict["subsettings"] = subsettings
# If all required information is present, safe the hash
try:
method_dict.check(recursive=True)
except (KeyError, ValueError) as e:
self.logger.info("Could not create method hash, missing required information: {}".format(str(e)))
else:
method.method_hash = method_dict.hash()
@abstractmethod
def method_hash_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict:
pass
def group_eos_hash(self, method: Method, material: Material, repr_method: Section):
eos_dict = RestrictedDict(
mandatory_keys=(
"upload_id",
"method_hash",
"formula",
),
forbidden_values=[None]
)
# Only calculations from the same upload are grouped
eos_dict['upload_id'] = self.backend["section_entry_info"][0]["upload_id"]
# Method
eos_dict["method_hash"] = method.method_hash
# The formula should be same for EoS (maybe even symmetries)
eos_dict["formula"] = material.formula
# Form a hash from the dictionary
try:
eos_dict.check(recursive=True)
except (KeyError, ValueError) as e:
self.logger.info("Could not create EOS hash, missing required information: {}".format(str(e)))
else:
method.group_eos_hash = eos_dict.hash()
def group_parametervariation_hash(self, method: Method, settings_basis_set: RestrictedDict, repr_system: Section, repr_method: Section):
# Create ordered dictionary with the values. Order is important for
param_dict = RestrictedDict(
mandatory_keys=[
"upload_id",
"program_name",
"program_version",
"settings_geometry",
"subsettings",
],
forbidden_values=[None]
)
# Only calculations from the same upload are grouped
param_dict['upload_id'] = self.backend["section_entry_info"][0]["upload_id"]
# The same code and functional type is required
param_dict['program_name'] = self.backend["program_name"]
param_dict['program_version'] = self.backend["program_version"]
# Get a string representation of the geometry. It is included as the
# geometry should remain the same during parameter variation. By simply
# using the atom labels and positions we assume that their
# order/translation/rotation does not change.
geom_dict: OrderedDict = OrderedDict()
sec_sys = repr_system
atom_labels = sec_sys['atom_labels']
geom_dict['atom_labels'] = ', '.join(atom_labels)
atom_positions = sec_sys['atom_positions']
geom_dict['atom_positions'] = np.array2string(
atom_positions * 1e10, # convert to Angstrom
formatter={'float_kind': lambda x: "%.6f" % x},
).replace('\n', '')
cell = sec_sys['lattice_vectors']
geom_dict['simulation_cell'] = np.array2string(
cell * 1e10, # convert to Angstrom
formatter={'float_kind': lambda x: "%.6f" % x},
).replace('\n', '')
param_dict['settings_geometry'] = geom_dict
# The subclasses may define their own method properties that are to be
# included here.
subsettings = self.group_parametervariation_hash_dict(method, settings_basis_set, repr_method)
param_dict["subsettings"] = subsettings
# Form a hash from the dictionary
try:
param_dict.check(recursive=True)
except (KeyError, ValueError) as e:
self.logger.info("Could not create parameter variation hash, missing required information: {}".format(str(e)))
else:
method.group_parametervariation_hash = param_dict.hash()
@abstractmethod
def group_parametervariation_hash_dict(self, method: Method, settings_basis_set: RestrictedDict, repr_method: Section) -> RestrictedDict:
pass
def group_e_min(self) -> None:
pass
def group_type(self) -> None:
pass
@abstractmethod
def normalize(self, context: Context) -> None:
pass
class MethodDFTNormalizer(MethodNormalizer):
"""A base class that is used for processing method related information
in the Encylopedia.
"""
def core_electron_treatment(self, method: Method) -> None:
treatment = config.services.unavailable_value
code_name = self.backend["program_name"]
if code_name is not None:
core_electron_treatments = {
'VASP': 'pseudopotential',
'FHI-aims': 'full all electron',
'exciting': 'full all electron',
'quantum espresso': 'pseudopotential'
}
treatment = core_electron_treatments.get(code_name, config.services.unavailable_value)
method.core_electron_treatment = treatment