Commit be5fbee1 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Modified the metainfo enum class, fixed issues in encyclopedia normalization,...

Modified the metainfo enum class, fixed issues in encyclopedia normalization, fixed error in the retrieval of representative systems.
parent 64f6e911
Pipeline #66876 failed with stages
in 8 minutes
......@@ -209,7 +209,7 @@ Custom data types
.. autoclass:: DataType
:members:
.. autoclass:: Enum
.. autoclass:: MEnum
.. _metainfo-reflection
......@@ -275,6 +275,6 @@ A more complex example
"""
from .metainfo import MSection, MCategory, Definition, Property, Quantity, SubSection, \
Section, Category, Package, Environment, Enum, Datetime, MProxy, MetainfoError, DeriveError, \
Section, Category, Package, Environment, MEnum, Datetime, MProxy, MetainfoError, DeriveError, \
MetainfoReferenceError, DataType, MData, MDataDict, Reference, MResource, m_package, \
units
from elasticsearch_dsl import InnerDoc
from nomad.metainfo import MSection, Section, SubSection, Quantity, Enum
from nomad.metainfo import MSection, Section, SubSection, Quantity, MEnum
class Material(MSection):
......@@ -19,7 +19,12 @@ class Material(MSection):
"""
)
system_type = Quantity(
type=Enum("bulk", "2D", "1D", "unavailable"),
type=MEnum(
bulk="bulk",
two_d="2D",
one_d="1D",
unavailable="unavailable"
),
description="""
"Character of physical system's geometry, e.g. bulk, surface... ",
"""
......@@ -36,7 +41,17 @@ class Calculation(MSection):
"""
)
run_type = Quantity(
type=Enum("single point", "geometry optimization", "molecular dynamics", "phonon calculation", "elastic constants", "QHA calculation", "GW calculation", "equation of state", "parameter variation", "unavailable"),
type=MEnum(
single_point="single point",
geometry_optimization="geometry optimization",
molecular_dynamics="molecular dynamics",
phonon_calculation="phonon calculation",
elastic_constants="elastic constants",
qha_calculation="QHA calculation",
qw_calculation="GW calculation",
equation_of_state="equation of state",
parameter_variation="parameter variation",
unavailable="unavailable"),
description="""
Defines the type of run identified for this entry.
"""
......
......@@ -9,7 +9,7 @@ import nomad_meta_info
from nomad import utils
from nomad.metainfo import Definition, Package, Category, Section, Quantity, SubSection, \
Environment, Enum, Reference, MSection, units
Environment, MEnum, Reference, MSection, units
T = TypeVar('T', bound=Definition)
......@@ -246,7 +246,7 @@ class LegacyMetainfoEnvironment:
elif isinstance(definition.type, Reference):
dtype_str = 'r'
result['referencedSections'] = [definition.type.target_section_def.name]
elif isinstance(definition.type, Enum):
elif isinstance(definition.type, MEnum):
dtype_str = 'C'
elif type(definition.type) == np.dtype:
dtype_str = definition.type.name[0]
......
......@@ -20,6 +20,7 @@ import inspect
import re
import json
import itertools
import numpy as np
import pint
import pint.unit
......@@ -56,14 +57,26 @@ class MetainfoReferenceError(MetainfoError):
# Metainfo quantity data types
class Enum(list):
""" Allows to define str types with values limited to a pre-set list of possible values. """
def __init__(self, *args):
class MEnum():
"""Allows to define str types with values limited to a pre-set list of possible values."""
def __init__(self, *args, **kwargs):
# Supports one big list in place of args
if len(args) == 1 and isinstance(args[0], list):
super().__init__(args[0])
args = args[0]
else:
super().__init__(args)
# If non-named arguments are given, the default is to have them placed
# into a dictionary with their string value as both the enum name and
# the value.
for arg in args:
if arg in kwargs:
raise ValueError("Duplicate value '{}' provided for enum".format(arg))
kwargs[arg] = arg
self._values = set(kwargs.values()) # For allowing constant time member check
self._map = kwargs
def __getattr__(self, attr):
return self._map[attr]
class MProxy():
......@@ -181,7 +194,7 @@ class _QuantityType(DataType):
- python build-in primitives: int, float, bool, str
- numpy dtypes, e.g. f, int32
- a section definition to define references
- an Enum instance to use it's values as possible str values
- an MEnum instance to use it's values as possible str values
- a custom datatype, i.e. instance of :class:`DataType`
- Any
"""
......@@ -190,10 +203,10 @@ class _QuantityType(DataType):
if value in [str, int, float, bool]:
return value
if isinstance(value, Enum):
for enum_value in value:
if isinstance(value, MEnum):
for enum_value in value._values:
if not isinstance(enum_value, str):
raise TypeError('Enum value %s is not a string.' % enum_value)
raise TypeError('MEnum value %s is not a string.' % enum_value)
return value
if type(value) == np.dtype:
......@@ -221,7 +234,7 @@ class _QuantityType(DataType):
if value is str or value is int or value is float or value is bool:
return dict(type_kind='python', type_data=value.__name__)
if isinstance(value, Enum):
if isinstance(value, MEnum):
return dict(type_kind='Enum', type_data=list(value))
if type(value) == np.dtype:
......@@ -789,8 +802,8 @@ class MSection(metaclass=MObjectMeta):
'The value %s for quantity %s does not follow %s' %
(value, quantity_def, quantity_def.type))
elif isinstance(quantity_def.type, Enum):
if value not in quantity_def.type:
elif isinstance(quantity_def.type, MEnum):
if value not in quantity_def.type._values:
raise TypeError(
'The value %s is not an enum value for quantity %s.' %
(value, quantity_def))
......@@ -1055,7 +1068,7 @@ class MSection(metaclass=MObjectMeta):
elif type(quantity.type) == np.dtype:
pass
elif isinstance(quantity.type, Enum):
elif isinstance(quantity.type, MEnum):
pass
elif quantity.type == Any:
......@@ -1273,7 +1286,7 @@ class MSection(metaclass=MObjectMeta):
if type(value) == np.ndarray:
value_shape = value.shape
if isinstance(value, list) and not isinstance(value, Enum):
if isinstance(value, list) and not isinstance(value, MEnum):
value_shape = [len(value)]
else:
value_shape = []
......@@ -1467,7 +1480,7 @@ class Quantity(Property):
The `type` can be one of:
- a build-in primitive Python type: ``int``, ``str``, ``bool``, ``float``
- an instance of :class:`Enum`, e.g. ``Enum('one', 'two', 'three')``
- an instance of :class:`MEnum`, e.g. ``MEnum('one', 'two', 'three')``
- a section to define references to other sections as quantity values
- a custom meta-info :class:`DataType`, see :ref:`metainfo-custom-types`
- a numpy `dtype`, e.g. ``np.dtype('float32')``
......
......@@ -2,7 +2,7 @@ from ase.data import chemical_symbols
from elasticsearch_dsl import Keyword, Integer, Float, InnerDoc, Nested
import numpy as np
from nomad.metainfo import MSection, Section, Quantity, SubSection, Enum, units
from nomad.metainfo import MSection, Section, Quantity, SubSection, MEnum, units
def optimade_links(section: str):
......@@ -42,7 +42,8 @@ class Species(MSection):
''')
chemical_symbols = Quantity(
type=Enum(chemical_symbols + ['x', 'vacancy']), shape=['1..*'],
type=MEnum(chemical_symbols + ['x', 'vacancy']),
shape=['1..*'],
a_optimade=Optimade(entry=True), description='''
A list of strings of all chemical elements composing this species.
......@@ -99,7 +100,7 @@ class OptimadeEntry(MSection):
a_elastic=dict(type=InnerDoc))
elements = Quantity(
type=Enum(chemical_symbols), shape=['1..*'],
type=MEnum(chemical_symbols), shape=['1..*'],
links=optimade_links('h.6.2.1'),
a_elastic=dict(type=Keyword),
a_optimade=Optimade(query=True, entry=True),
......@@ -217,7 +218,7 @@ class OptimadeEntry(MSection):
# TODO assemblies
structure_features = Quantity(
type=Enum(['disorder', 'unknown_positions', 'assemblies']), shape=['1..*'],
type=MEnum(['disorder', 'unknown_positions', 'assemblies']), shape=['1..*'],
links=optimade_links('h.6.2.15'),
a_elastic=dict(type=Keyword),
a_optimade=Optimade(query=True, entry=True), description='''
......
......@@ -298,7 +298,9 @@ class EncyclopediaNormalizer(Normalizer):
"""Decides what type of calculation this is: single_point, md,
geometry_optimization, etc.
"""
run_type = config.services.unavailable_value
run_enums = Calculation.run_type.type
run_type = run_enums.unavailable
try:
sccs = self._backend[s_scc]
except Exception:
......@@ -316,9 +318,9 @@ class EncyclopediaNormalizer(Normalizer):
program_name = self._backend["program_name"]
if program_name == "elastic":
# TODO move to taylor expansion as soon as data is correct in archive
run_type = "elastic constants"
run_type = run_enums.elastic_constants
else:
run_type = "single point"
run_type = run_enums.single_point
# One sequence. Currently calculations with multiple sequences are
# unsupported.
elif n_frame_seq == 1:
......@@ -351,11 +353,11 @@ class EncyclopediaNormalizer(Normalizer):
sampling_method = section_sampling_method["sampling_method"]
if sampling_method == "molecular_dynamics":
run_type = "molecular dynamics"
run_type = run_enums.molecular_dynamics
if sampling_method == "geometry_optimization":
run_type = "geometry optimization"
run_type = run_enums.geometry_optimization
if sampling_method == "taylor_expansion":
run_type = "phonon calculation"
run_type = run_enums.phonon_calculation
calculation.run_type = run_type
return run_type
......@@ -419,23 +421,28 @@ class EncyclopediaNormalizer(Normalizer):
def get_system_type(self, material, calculation) -> tuple:
system_type = config.services.unavailable_value
system = None
if calculation.run_type in {"geometry optimization", "molecular dynamics"}:
run_enums = Calculation.run_type.type
system_enums = Material.system_type.type
if calculation.run_type in {run_enums.geometry_optimization, run_enums.molecular_dynamics}:
frame_seqs = self._backend[s_frame_sequence]
frame_seq = frame_seqs[0]
frames = frame_seq[r_frame_sequence_local_frames]
systems = self._backend[s_system]
if calculation.run_type == "geometry optimization":
if calculation.run_type == run_enums.geometry_optimization:
system = systems[frames[-1]]
elif calculation.run_type == "molecular dynamics":
elif calculation.run_type == run_enums.molecular_dynamics:
system = systems[frames[0]]
elif calculation.run_type == "single point":
elif calculation.run_type == run_enums.single_point:
system = self._backend[s_system][0]
try:
system_type = system["system_type"]
if system_type == "2D / surface":
system_type = "2D"
stype = system["system_type"]
except KeyError:
self.logger.info("System type information not available for encyclopedia")
else:
if stype == "2D / surface":
system_type = system_enums.two_d
if stype == system_enums.bulk or stype == system_enums.one_d:
system_type = stype
material.system_type = system_type
return system, system_type
......@@ -449,6 +456,7 @@ class EncyclopediaNormalizer(Normalizer):
def normalize(self, logger=None) -> None:
super().normalize(logger)
system_enums = Material.system_type.type
# Initialise metainfo structure
sec_enc = Encyclopedia()
......@@ -463,7 +471,7 @@ class EncyclopediaNormalizer(Normalizer):
# Get the system type, stop if unknown
system, system_type = self.get_system_type(material, calculation)
if system_type != "bulk" and system_type != "surface" and system_type != "2D":
if system_type != system_enums.bulk and system_type != system_enums.two_d and system_type != system_enums.one_d:
self.logger.info("unknown system type for encyclopedia")
return
......
......@@ -90,47 +90,50 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
pass
def __representative_systems(self):
# look for sccs in last frames
sccs = []
"""Used to tag systems that are representative for a calculation. In
practice, takes the first and two last frames of all
section_frame_sequences. If no section_frame_sequence exists, take
first and two last frames from all defined sccs.
"""
systems = []
sequences = []
# Get all frame sequences
try:
frame_seqs = self._backend.get_sections(s_frame_sequence)
except Exception:
frame_seqs = []
for frame_seq in frame_seqs:
try:
frames = self._backend.get_value(r_frame_sequence_local_frames, frame_seq)
except Exception:
frames = []
if len(frames) > 0:
sccs.append(frames[-1])
# no sccs from frames -> consider all sccs
if len(sccs) == 0:
else:
for frame_seq in frame_seqs:
try:
frames = self._backend.get_value(r_frame_sequence_local_frames, frame_seq)
except Exception:
pass
else:
sequences.append(frames)
# If no frames exist, consider all existing sccs
if len(sequences) == 0:
try:
sccs = self._backend.get_sections(s_scc)
except Exception:
sccs = []
try:
systems = [self._backend.get_value(r_scc_to_system, scc) for scc in sccs]
except Exception:
systems = []
# only take the first, and last two systems
if len(systems) == 0:
try:
systems = self._backend.get_sections(s_system)
except Exception:
systems = []
if len(systems) > 2:
systems = [systems[0], systems[-2], systems[-1]]
pass
else:
sequences.append(sccs)
# Take first and last fwo frames from each detected sequence
sccs = self._backend[s_scc]
for seq in sequences:
if len(seq) == 1:
indices = [0]
elif len(seq) > 2:
indices = [0, -2, -1]
for scc_idx in [seq[idx] for idx in indices]:
system_idx = sccs[scc_idx][r_scc_to_system]
systems.append(system_idx)
if len(systems) == 0:
self.logger.error('no "representative" section system found')
self.logger.info(
'chose "representative" systems for normalization',
number_of_systems=len(systems))
......
......@@ -247,7 +247,6 @@ class SystemNormalizer(SystemBasedNormalizer):
with utils.timer(
self.logger, 'system classification executed',
system_size=atoms.get_number_of_atoms()):
self.system_type_analysis(atoms)
# symmetry analysis
......@@ -291,6 +290,9 @@ class SystemNormalizer(SystemBasedNormalizer):
self.logger.error(
'matid project system classification failed', exc_info=e, error=str(e))
# from ase.visualize import view
# view(atoms)
#raise Exception(system_type)
self._backend.addValue('system_type', system_type)
def symmetry_analysis(self, atoms) -> None:
......
......@@ -51,10 +51,20 @@ def normalized_template_example(parsed_template_example) -> LocalBackend:
@pytest.fixture
def geometry_optimization(parsed_template_example) -> Encyclopedia:
def geometry_optimization() -> Encyclopedia:
parser_name = "parsers/template"
filepath = "tests/data/normalizers/fcc_crystal_structure.json"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
enc = backend.get_mi2_section(Encyclopedia.m_def)
return enc
@pytest.fixture
def molecular_dynamics() -> Encyclopedia:
parser_name = "parsers/template"
filepath = "tests/data/normalizers/encyclopedia/cp2k_md_nve.json"
backend = parse_file((parser_name, filepath))
backend = run_normalize(backend)
enc = backend.get_mi2_section(Encyclopedia.m_def)
return enc
......@@ -13,7 +13,7 @@
# limitations under the License.
from nomad.metainfo.encyclopedia import Encyclopedia
from tests.normalizing.conftest import geometry_optimization # pylint: disable=unused-import
from tests.normalizing.conftest import geometry_optimization, molecular_dynamics # pylint: disable=unused-import
def test_geometry_optimization(geometry_optimization: Encyclopedia):
......@@ -23,6 +23,13 @@ def test_geometry_optimization(geometry_optimization: Encyclopedia):
assert run_type == "geometry optimization"
# def test_molecular_dynamics(molecular_dynamics: Encyclopedia):
# """Tests that geometry optimizations are correctly processed."
# """
# run_type = molecular_dynamics.calculation.run_type
# assert run_type == "molecular dynamics"
def test_system_type(geometry_optimization: Encyclopedia):
"""Tests that geometry optimizations are correctly processed."
"""
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment