Commit 42845912 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Continued to detail the metainfo implementation.

parent 2ace97ee
......@@ -36,110 +36,117 @@ class StructureEntry(MObject):
type=Enum(chemical_symbols), shape=['1..*'],
links=optimade_links('h.6.2.1'),
a_elastic=dict(type=Keyword),
a_optimade=Optimade(query=True, entry=True))
""" Names of the different elements present in the structure. """
a_optimade=Optimade(query=True, entry=True),
description='''
Names of the different elements present in the structure.
''')
nelements = Quantity(
type=int,
links=optimade_links('h.6.2.2'),
a_elastic=dict(type=Integer),
a_optimade=Optimade(query=True, entry=True))
""" Number of different elements in the structure as an integer. """
a_optimade=Optimade(query=True, entry=True),
description='''
Number of different elements in the structure as an integer.
''')
elements_ratios = Quantity(
type=float, shape=['nelements'],
links=optimade_links('h.6.2.3'),
a_elastic=dict(type=lambda: Nested(ElementRatio), mapping=ElementRatio.from_structure_entry),
a_optimade=Optimade(query=True, entry=True))
""" Relative proportions of different elements in the structure. """
a_optimade=Optimade(query=True, entry=True),
description='''
Relative proportions of different elements in the structure.
''')
chemical_formula_descriptive = Quantity(
type=str,
links=optimade_links('h.6.2.4'),
a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)),
a_optimade=Optimade(query=True, entry=True))
"""
The chemical formula for a structure as a string in a form chosen by the API
implementation.
"""
a_optimade=Optimade(query=True, entry=True),
description='''
The chemical formula for a structure as a string in a form chosen by the API
implementation.
''')
chemical_formula_reduced = Quantity(
type=str,
links=optimade_links('h.6.2.5'),
a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)),
a_optimade=Optimade(query=True, entry=True))
"""
The reduced chemical formula for a structure as a string with element symbols and
integer chemical proportion numbers. The proportion number MUST be omitted if it is 1.
"""
a_optimade=Optimade(query=True, entry=True),
description='''
The reduced chemical formula for a structure as a string with element symbols and
integer chemical proportion numbers. The proportion number MUST be omitted if it is 1.
''')
chemical_formula_hill = Quantity(
type=str,
links=optimade_links('h.6.2.6'),
a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)),
a_optimade=Optimade(query=True, entry=False))
"""
The chemical formula for a structure in Hill form with element symbols followed by
integer chemical proportion numbers. The proportion number MUST be omitted if it is 1.
"""
a_optimade=Optimade(query=True, entry=False),
description='''
The chemical formula for a structure in Hill form with element symbols followed by
integer chemical proportion numbers. The proportion number MUST be omitted if it is 1.
''')
chemical_formula_anonymous = Quantity(
type=str,
links=optimade_links('h.6.2.7'),
a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)),
a_optimade=Optimade(query=True, entry=True))
"""
The anonymous formula is the chemical_formula_reduced, but where the elements are
instead first ordered by their chemical proportion number, and then, in order left to
right, replaced by anonymous symbols A, B, C, ..., Z, Aa, Ba, ..., Za, Ab, Bb, ... and
so on.
"""
a_optimade=Optimade(query=True, entry=True),
description='''
The anonymous formula is the chemical_formula_reduced, but where the elements are
instead first ordered by their chemical proportion number, and then, in order left to
right, replaced by anonymous symbols A, B, C, ..., Z, Aa, Ba, ..., Za, Ab, Bb, ... and
so on.
''')
dimension_types = Quantity(
type=int, shape=[3],
links=optimade_links('h.6.2.8'),
a_elastic=dict(type=Integer, mapping=lambda a: sum(a.dimension_types)),
a_optimade=Optimade(query=True, entry=True))
"""
List of three integers. For each of the three directions indicated by the three lattice
vectors (see property lattice_vectors). This list indicates if the direction is
periodic (value 1) or non-periodic (value 0). Note: the elements in this list each
refer to the direction of the corresponding entry in lattice_vectors and not
the Cartesian x, y, z directions.
"""
a_optimade=Optimade(query=True, entry=True),
description='''
List of three integers. For each of the three directions indicated by the three lattice
vectors (see property lattice_vectors). This list indicates if the direction is
periodic (value 1) or non-periodic (value 0). Note: the elements in this list each
refer to the direction of the corresponding entry in lattice_vectors and not
the Cartesian x, y, z directions.
''')
lattice_vectors = Quantity(
type=float, shape=[3, 3], unit=units.angstrom,
links=optimade_links('h.6.2.9'),
a_optimade=Optimade(query=False, entry=True))
""" The three lattice vectors in Cartesian coordinates, in ångström (Å). """
a_optimade=Optimade(query=False, entry=True),
description='''
The three lattice vectors in Cartesian coordinates, in ångström (Å).
''')
cartesian_site_positions = Quantity(
type=float, shape=['nsites', 3], unit=units.angstrom,
links=optimade_links('h.6.2.10'),
a_optimade=Optimade(query=False, entry=True))
"""
Cartesian positions of each site. A site is an atom, a site potentially occupied by
an atom, or a placeholder for a virtual mixture of atoms (e.g., in a virtual crystal
approximation).
"""
a_optimade=Optimade(query=False, entry=True), description='''
Cartesian positions of each site. A site is an atom, a site potentially occupied by
an atom, or a placeholder for a virtual mixture of atoms (e.g., in a virtual crystal
approximation).
''')
nsites = Quantity(
type=int,
links=optimade_links('h.6.2.11'),
a_elastic=dict(type=Integer),
a_optimade=Optimade(query=True, entry=True))
""" An integer specifying the length of the cartesian_site_positions property. """
a_optimade=Optimade(query=True, entry=True), description='''
An integer specifying the length of the cartesian_site_positions property.
''')
species_at_sites = Quantity(
type=str, shape=['nsites'],
links=optimade_links('h.6.2.12'),
a_optimade=Optimade(query=False, entry=True))
"""
Name of the species at each site (where values for sites are specified with the same
order of the cartesian_site_positions property). The properties of the species are
found in the species property.
"""
a_optimade=Optimade(query=False, entry=True), description='''
Name of the species at each site (where values for sites are specified with the same
order of the cartesian_site_positions property). The properties of the species are
found in the species property.
''')
# TODO assemblies
......@@ -147,16 +154,15 @@ class StructureEntry(MObject):
type=Enum(['disorder', 'unknown_positions', 'assemblies']), shape=['1..*'],
links=optimade_links('h.6.2.15'),
a_elastic=dict(type=Keyword),
a_optimade=Optimade(query=True, entry=True))
"""
A list of strings that flag which special features are used by the structure.
a_optimade=Optimade(query=True, entry=True), description='''
A list of strings that flag which special features are used by the structure.
- disorder: This flag MUST be present if any one entry in the species list has a
chemical_symbols list that is longer than 1 element.
- unknown_positions: This flag MUST be present if at least one component of the
cartesian_site_positions list of lists has value null.
- assemblies: This flag MUST be present if the assemblies list is present.
"""
- disorder: This flag MUST be present if any one entry in the species list has a
chemical_symbols list that is longer than 1 element.
- unknown_positions: This flag MUST be present if at least one component of the
cartesian_site_positions list of lists has value null.
- assemblies: This flag MUST be present if the assemblies list is present.
''')
class Species(MObject):
......@@ -171,59 +177,56 @@ class Species(MObject):
links=optimade_links('h.6.2.13'))
name = Quantity(
type=str,
a_optimade=Optimade(entry=True))
""" The name of the species; the name value MUST be unique in the species list. """
type=str, a_optimade=Optimade(entry=True), description='''
The name of the species; the name value MUST be unique in the species list.
''')
chemical_symbols = Quantity(
type=Enum(chemical_symbols + ['x', 'vacancy']), shape=['1..*'],
a_optimade=Optimade(entry=True))
"""
A list of strings of all chemical elements composing this species.
a_optimade=Optimade(entry=True), description='''
A list of strings of all chemical elements composing this species.
It MUST be one of the following:
It MUST be one of the following:
- a valid chemical-element name, or
- the special value "X" to represent a non-chemical element, or
- the special value "vacancy" to represent that this site has a non-zero probability
of having a vacancy (the respective probability is indicated in the concentration
list, see below).
- a valid chemical-element name, or
- the special value "X" to represent a non-chemical element, or
- the special value "vacancy" to represent that this site has a non-zero probability
of having a vacancy (the respective probability is indicated in the concentration
list, see below).
If any one entry in the species list has a chemical_symbols list that is longer than 1
element, the correct flag MUST be set in the list structure_features (see
structure_features)
"""
If any one entry in the species list has a chemical_symbols list that is longer than 1
element, the correct flag MUST be set in the list structure_features (see
structure_features)
''')
concentration = Quantity(
type=float, shape=['1..*'],
a_optimade=Optimade(entry=True))
"""
A list of floats, with same length as chemical_symbols. The numbers represent the
relative concentration of the corresponding chemical symbol in this species. The
numbers SHOULD sum to one. Cases in which the numbers do not sum to one typically fall
only in the following two categories:
- Numerical errors when representing float numbers in fixed precision, e.g. for two
chemical symbols with concentrations 1/3 and 2/3, the concentration might look
something like [0.33333333333, 0.66666666666]. If the client is aware that the sum
is not one because of numerical precision, it can renormalize the values so that the
sum is exactly one.
- Experimental errors in the data present in the database. In this case, it is the
responsibility of the client to decide how to process the data.
Note that concentrations are uncorrelated between different sites (even of the same
species).
"""
a_optimade=Optimade(entry=True), description='''
A list of floats, with same length as chemical_symbols. The numbers represent the
relative concentration of the corresponding chemical symbol in this species. The
numbers SHOULD sum to one. Cases in which the numbers do not sum to one typically fall
only in the following two categories:
- Numerical errors when representing float numbers in fixed precision, e.g. for two
chemical symbols with concentrations 1/3 and 2/3, the concentration might look
something like [0.33333333333, 0.66666666666]. If the client is aware that the sum
is not one because of numerical precision, it can renormalize the values so that the
sum is exactly one.
- Experimental errors in the data present in the database. In this case, it is the
responsibility of the client to decide how to process the data.
Note that concentrations are uncorrelated between different sites (even of the same
species).
''')
mass = Quantity(type=float, unit=units.amu, a_optimade=dict(entry='optional'))
original_name = Quantity(type=str, a_optimade=dict(entry='optional'))
"""
Can be any valid Unicode string, and SHOULD contain (if specified) the name of the
species that is used internally in the source database.
original_name = Quantity(type=str, a_optimade=dict(entry='optional'), description='''
Can be any valid Unicode string, and SHOULD contain (if specified) the name of the
species that is used internally in the source database.
Note: With regards to "source database", we refer to the immediate source being
queried via the OPTiMaDe API implementation. The main use of this field is for source
databases that use species names, containing characters that are not allowed (see
description of the species_at_sites list).
"""
Note: With regards to "source database", we refer to the immediate source being
queried via the OPTiMaDe API implementation. The main use of this field is for source
databases that use species names, containing characters that are not allowed (see
description of the species_at_sites list).
''')
......@@ -36,10 +36,12 @@ Here is a simple example that demonstrates the definition of System related quan
system (a.k.a. geometry).
\"\"\"
m_section = Section(repeats=True, parent=Run.m_section)
m_section = Section(repeats=True, parent=Run)
n_atoms = Quantity(type=int)
\"\"\" A Defines the number of atoms in the system. \"\"\"
n_atoms = Quantity(
type=int, description='''
A Defines the number of atoms in the system.
''')
atom_labels = Quantity(type=Enum(ase.data.chemical_symbols), shape['n_atoms'])
atom_positions = Quantity(type=float, shape=['n_atoms', 3], unit=Units.m)
......@@ -137,8 +139,11 @@ See the reference of classes :class:`Section` and :class:`Quantities` for detail
from typing import Type, TypeVar, Union, Tuple, Iterable, List, Any, Dict, cast
import sys
import inspect
import re
from pint.unit import _Unit
from pint import UnitRegistry
import inflection
__module__ = sys.modules[__name__]
MObjectBound = TypeVar('MObjectBound', bound='MObject')
......@@ -147,9 +152,57 @@ MObjectBound = TypeVar('MObjectBound', bound='MObject')
# Reflection
class Enum(list):
""" Allows to define str types with values limited to a pre-set list of possible values. """
pass
class DataType:
"""
Allows to define custom data types that can be used in the meta-info.
The metainfo supports most types out of the box. These includes the python build-in
primitive types (int, bool, str, float, ...), references to sections, and enums.
However, in some occasions you need to add custom data types.
"""
def check_type(self, value):
pass
def normalize(self, value):
return value
def to_json_serializable(self, value):
return value
def from_json_serializable(self, value):
return value
class Dimension(DataType):
def check_type(self, value):
if isinstance(value, int):
return
if isinstance(value, str):
if value.isidentifier():
return
if re.match(r'(\d)\.\.(\d|\*)', value):
return
if isinstance(value, Section):
return
if isinstance(value, type) and hasattr(value, 'm_section'):
return
raise TypeError('%s is not a valid dimension' % str(value))
# TODO
# TODO class Unit(DataType)
# TODO class MetainfoType(DataType)
# TODO class Datetime(DataType)
class MObjectMeta(type):
def __new__(self, cls_name, bases, dct):
......@@ -204,11 +257,10 @@ class MObject(metaclass=MObjectMeta):
m_section: 'Section' = None
def __init__(self, m_section: 'Section' = None, m_parent: 'MObject' = None, **kwargs):
def __init__(self, m_section: 'Section' = None, m_parent: 'MObject' = None, _bs: bool = False, **kwargs):
self.m_section: 'Section' = m_section
self.m_parent: 'MObject' = m_parent
self.m_parent_index = -1
self.m_data = dict(**kwargs)
cls = self.__class__
if self.m_section is None:
......@@ -218,30 +270,53 @@ class MObject(metaclass=MObjectMeta):
assert self.m_section == cls.m_section, \
'Section class and section definition must match'
self.m_data = dict(**kwargs)
# TODO
# self.m_data = {}
# if _bs:
# self.m_data.update(**kwargs)
# else:
# self.m_update(**kwargs)
@classmethod
def __init_section_cls__(cls):
if not hasattr(__module__, 'Quantity') or not hasattr(__module__, 'Section'):
# no initialization during bootstrapping, will be done maunally
# only works after bootstrapping, since functionality is still missing
if not all([hasattr(__module__, cls) for cls in ['Quantity', 'Section', 'sub_section']]):
return
# ensure that the m_section is defined
m_section = cls.m_section
if m_section is None and cls != MObject:
m_section = Section()
setattr(cls, 'm_section', m_section)
# transfer name and description to m_section
m_section.name = cls.__name__
if cls.__doc__ is not None:
m_section.description = inspect.cleandoc(cls.__doc__)
m_section.section_cls = cls
# add sub_section to parent section
if m_section.parent is not None:
sub_section_name = inflection.underscore(m_section.name)
setattr(m_section.parent.section_cls, sub_section_name, sub_section(m_section))
for name, attr in cls.__dict__.items():
# transfer names and descriptions for quantities
if isinstance(attr, Quantity):
attr.name = name
if attr.__doc__ is not None:
attr.description = inspect.cleandoc(attr.__doc__)
if attr.description is not None:
attr.description = inspect.cleandoc(attr.description)
attr.__doc__ = attr.description
# manual manipulation of m_data due to bootstrapping
m_section.m_data.setdefault('Quantity', []).append(attr)
# set names and parent on sub-sections
elif isinstance(attr, sub_section):
attr.section_def.parent = m_section
if attr.section_def.name is None:
attr.section_def.name = inflection.camelize(name)
@staticmethod
def m_type_check(definition: 'Quantity', value: Any, check_item: bool = False):
"""Checks if the value fits the given quantity in type and shape; raises
......@@ -265,7 +340,9 @@ class MObject(metaclass=MObjectMeta):
raise TypeError('The value is not a section of wrong section definition')
else:
raise Exception('Invalid quantity type: %s' % str(definition.type))
# TODO
# raise Exception('Invalid quantity type: %s' % str(definition.type))
pass
shape = None
try:
......@@ -289,7 +366,7 @@ class MObject(metaclass=MObjectMeta):
# TODO check dimension
def __resolve_section(self, definition: SectionDef) -> 'Section':
def _resolve_section(self, definition: SectionDef) -> 'Section':
"""Resolves and checks the given section definition. """
if isinstance(definition, str):
section = self.m_section.sub_sections[definition]
......@@ -319,9 +396,15 @@ class MObject(metaclass=MObjectMeta):
IndexError: If the given index is wrong, or if an index is given for a non
repeatable section
"""
section_def = self.__resolve_section(definition)
section_def = self._resolve_section(definition)
m_data_value = self.m_data[section_def.name]
m_data_value = self.m_data.get(section_def.name, None)
if m_data_value is None:
if section_def.repeats:
m_data_value = []
else:
m_data_value = None
if isinstance(m_data_value, list):
m_data_values = m_data_value
......@@ -338,7 +421,22 @@ class MObject(metaclass=MObjectMeta):
else:
return m_data_value
def m_create(self, definition: SectionDef, **kwargs) -> MObjectBound:
def m_add_sub_section(self, sub_section: MObjectBound) -> MObjectBound:
"""Adds the given section instance as a sub section to this section."""
section_def = sub_section.m_section
if section_def.repeats:
m_data_sections = self.m_data.setdefault(section_def.name, [])
section_index = len(m_data_sections)
m_data_sections.append(sub_section)
sub_section.m_parent_index = section_index
else:
self.m_data[section_def.name] = sub_section
return sub_section
def m_create(self, definition: SectionDef, **kwargs) -> 'MObject':
"""Creates a subsection and adds it this this section
Args:
......@@ -352,20 +450,12 @@ class MObject(metaclass=MObjectMeta):
Raises:
KeyError: If the given section is not a subsection of this section.
"""
section_def: 'Section' = self.__resolve_section(definition)
section_def: 'Section' = self._resolve_section(definition)
section_cls = section_def.section_cls
section_instance = section_cls(m_section=section_def, m_parent=self, **kwargs)
if section_def.repeats:
m_data_sections = self.m_data.setdefault(section_def.name, [])
section_index = len(m_data_sections)
m_data_sections.append(section_instance)
section_instance.m_parent_index = section_index
else:
self.m_data[section_def.name] = section_instance
return cast(MObjectBound, section_instance)
return self.m_add_sub_section(section_instance)
def __resolve_quantity(self, definition: Union[str, 'Quantity']) -> 'Quantity':
"""Resolves and checks the given quantity definition. """
......@@ -401,6 +491,26 @@ class MObject(metaclass=MObjectMeta):
for value in values:
m_data_values.append(value)
def m_update(self, **kwargs):
""" Updates all quantities and sub-sections with the given arguments. """
for name, value in kwargs.items():
attribute = self.m_section.attributes.get(name, None)
if attribute is None:
raise KeyError('%s is not an attribute of this section' % name)
if isinstance(attribute, Section):
if attribute.repeats:
if isinstance(value, List):
for item in value:
self.m_add_sub_section(item)
else:
raise TypeError('Sub section %s repeats, but no list was given' % attribute.name)
else:
self.m_add_sub_section(item)
else:
setattr(self, name, value)
def m_to_dict(self) -> Dict[str, Any]:
"""Returns the data of this section as a json serializeable dictionary. """
pass
......@@ -445,7 +555,13 @@ class MObject(metaclass=MObjectMeta):
# These placeholder are replaced, once the necessary classes are defined. This process
# is referred to as 'bootstrapping'.
class Quantity(MObject):
class Definition(MObject):
name: 'Quantity' = None
description: 'Quantity' = None
links: 'Quantity' = None
class Quantity(Definition):
"""Used to define quantities that store a certain piece of (meta-)data.
Quantities are the basic building block with meta-info data. The Quantity class is
......@@ -457,51 +573,17 @@ class Quantity(MObject):
type and shape fit the set values.
"""
name: 'Quantity' = None
""" The name of the quantity. Must be unique within a section. """
description: 'Quantity' = None
""" An optional human readable description. """
links: 'Quantity' = None
""" A list of URLs to external resource that describe this definition. """
type: 'Quantity' = None
""" The type of the quantity.
Can be one of the following:
- a build-in Python type, e.g. ``int``, ``str``, ``any``
- an instance of :class:`Enum`, e.g. ``Enum(['one', 'two', 'three'])
- a instance of Section, i.e. a section definition. This will define a reference
- the Python typing ``Any`` to denote an arbitrary type
- a Python class, e.g. ``datetime``
In the NOMAD CoE meta-info this was basically the ``dTypeStr``.
"""
shape: 'Quantity' = None
""" The shape of the quantity that defines its dimensionality.
A shape is a list, where each item defines a dimension. Each dimension can be:
- an integer that defines the exact size of the dimension, e.g. ``[3]`` is the
shape of a spacial vector
- the name of an int typed quantity in the same section
- a range specification as string build from a lower bound (i.e. int number),
and an upper bound (int or ``*`` denoting arbitrary large), e.g. ``'0..*'``, ``'1..3'``
"""
unit: 'Quantity' = None
""" The optional physics unit for this quantity.
Units are given in `pint` units. Pint is a Python package that defines units and