Commit e7089e2d authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Continued to work on legacy metainfo integration.

parent 576c6779
Subproject commit 22f8a9063125da7bbde4f3f0153548005506e06b
Subproject commit 3c2cf8293c17a86ac91b852382e4856c17a30862
%% Cell type:markdown id: tags:
# NOMAD Metainfo 2.0 demonstration
You can find more complete documentation [here](https://labdev-nomad.esc.rzg.mpg.de/fairdi/nomad/testing/docs/metainfo.html)
%% Cell type:code id: tags:
``` python
from nomad.metainfo import MSection, SubSection, Quantity, Datetime, units
import numpy as np
import datetime
```
%% Cell type:markdown id: tags:
## Sections and quantities
To define sections and their quantities, we use Python classes and attributes. Quantities have *type*, *shape*, and *unit*.
%% Cell type:code id: tags:
``` python
class System(MSection):
""" The simulated system """
number_of_atoms = Quantity(type=int, derived=lambda system: len(system.atom_labels))
atom_labels = Quantity(type=str, shape=['number_of_atoms'])
atom_positions = Quantity(type=np.dtype(np.float64), shape=['number_of_atoms', 3], unit=units.m)
```
%% Cell type:markdown id: tags:
Such *section classes* can then be instantiated like regular Python classes. Respectively, *section instances* are just regular Python object and section quantities can be get and set like regular Python object attributes.
%% Cell type:code id: tags:
``` python
system = System()
system.atom_labels = ['H', 'H', '0']
system.atom_positions = np.array([[6, 0, 0], [0, 0, 0], [3, 2, 0]]) * units.angstrom
```
%% Cell type:markdown id: tags:
Of course the metainfo is not just about dealing with physics data in Python. Its also about storing and managing data in various fileformats and databases. Therefore, the created data can be serialized, e.g. to JSON. All *section
instances* have a set of additional `m_`-methods that provide addtional functions. Note the unit conversion.
%% Cell type:code id: tags:
``` python
system.m_to_json()
```
%%%% Output: execute_result
'{"atom_labels": ["H", "H", "0"], "atom_positions": [[6e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [3e-10, 2e-10, 0.0]]}'
%% Cell type:markdown id: tags:
## Sub-sections to form hiearchies of data
*Section instances* can be nested to form data hierarchies. To achive this, we first have to create *section
definitions* that have sub-sections.
%% Cell type:code id: tags:
``` python
class Run(MSection):
timestamp = Quantity(type=Datetime, description='The time that this run was conducted.')
systems = SubSection(sub_section=System, repeats=True)
```
%% Cell type:markdown id: tags:
Now we can add *section instances* for `System` to *instances* of `Run`.
%% Cell type:code id: tags:
``` python
run = Run()
run.timestamp = datetime.datetime.now()
system = run.m_create(System)
system.atom_labels = ['H', 'H', '0']
system.atom_positions = np.array([[6, 0, 0], [0, 0, 0], [3, 2, 0]]) * units.angstrom
system = run.m_create(System)
system.atom_labels = ['H', 'H', '0']
system.atom_positions = np.array([[5, 0, 0], [0, 0, 0], [2.5, 2, 0]]) * units.angstrom
run.m_to_json()
```
%%%% Output: execute_result
'{"timestamp": "2019-10-07T22:37:33.376139", "systems": [{"atom_labels": ["H", "H", "0"], "atom_positions": [[6e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [3e-10, 2e-10, 0.0]]}, {"atom_labels": ["H", "H", "0"], "atom_positions": [[5e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [2.5e-10, 2e-10, 0.0]]}]}'
'{"timestamp": "2019-10-09T14:48:43.663363", "systems": [{"atom_labels": ["H", "H", "0"], "atom_positions": [[6e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [3e-10, 2e-10, 0.0]]}, {"atom_labels": ["H", "H", "0"], "atom_positions": [[5e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [2.5e-10, 2e-10, 0.0]]}]}'
%% Cell type:markdown id: tags:
The whole data hiearchy can be navigated with regular Python object/attribute style programming and values can be
used for calculations as usual.
%% Cell type:code id: tags:
``` python
(run.systems[1].atom_positions - run.systems[0].atom_positions).to(units.angstrom)
```
%%%% Output: execute_result
$[[-1. 0. 0. ] [ 0. 0. 0. ] [-0.5 0. 0. ]] angstrom$
<Quantity([[-1. 0. 0. ]
[ 0. 0. 0. ]
[-0.5 0. 0. ]], 'angstrom')>
%% Cell type:markdown id: tags:
## Reflection, inspection, and code-completion
Since all definitions are available as *section classes*, Python already knows about all possible quantities. We can
use this in Python notebooks, via *tab* or the `?`-operator. Furthermore, you can access the *section definition* of all *section instances* with `m_def`. Since a *section defintion* itself is just a piece of metainfo data, you can use it to programatically explore the definition itselve.
%% Cell type:code id: tags:
``` python
run.systems[0].m_def.quantities
```
%%%% Output: execute_result
[number_of_atoms:Quantity, atom_labels:Quantity, atom_positions:Quantity]
%% Cell type:code id: tags:
``` python
run.m_def.all_quantities['timestamp'].description
```
%%%% Output: execute_result
'The time that this run was conducted.'
%% Cell type:code id: tags:
``` python
System.atom_labels.shape
```
%%%% Output: execute_result
['number_of_atoms']
%% Cell type:code id: tags:
``` python
t = np.dtype(np.i64)
```
%% Cell type:code id: tags:
``` python
t.type
```
%%%% Output: execute_result
numpy.int64
%% Cell type:code id: tags:
``` python
```
......
......@@ -264,4 +264,4 @@ A more complex example
from .metainfo import MSection, MCategory, Definition, Property, Quantity, SubSection, \
Section, Category, Package, Enum, Datetime, MProxy, MetainfoError, DeriveError, \
MetainfoReferenceError, DataType, MData, MDataDict, m_package, units
MetainfoReferenceError, DataType, MData, MDataDict, Reference, m_package, units
This diff is collapsed.
from typing import Tuple, Dict, List, Type, TypeVar
from typing import Tuple, Dict, List, Type, TypeVar, Any
import os.path
import numpy as np
import json
from jinja2 import Environment, PackageLoader, select_autoescape
import textwrap
from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
import nomad_meta_info
from nomad import utils
from nomad.metainfo import Definition, Package, Category, Section, Quantity, SubSection
from nomad.metainfo import Definition, Package, Category, Section, Quantity, SubSection, Reference, units
def load_legacy_metainfo(
......@@ -28,13 +32,12 @@ def load_legacy_metainfo(
return defs, packages
legacy_defs, legacy_packages = load_legacy_metainfo(['common.nomadmetainfo.json', 'public.nomadmetainfo.json'])
legacy_defs, legacy_packages = load_legacy_metainfo(['common.nomadmetainfo.json', 'public.nomadmetainfo.json', 'vasp.nomadmetainfo.json'])
logger = utils.get_logger(__name__)
all_defs: Dict[str, Definition] = dict()
T = TypeVar('T', bound=Definition)
dtype_strs = set()
def convert_package(legacy_definitions: List[InfoKindEl], **kwargs) -> Package:
......@@ -48,7 +51,8 @@ def convert_package(legacy_definitions: List[InfoKindEl], **kwargs) -> Package:
definition = package.all_definitions.get(legacy_name)
if definition is None:
definition = package.m_create(section_cls, name=legacy_name)
definition = package.m_create(
section_cls, name=legacy_name, description=legacy_def.description)
if is_new:
all_defs[legacy_def.name] = definition
......@@ -63,9 +67,38 @@ def convert_package(legacy_definitions: List[InfoKindEl], **kwargs) -> Package:
definition = flux_box(legacy_def.name, Section, is_new=True)
elif legacy_def.kindStr in ['type_dimension', 'type_document_content']:
definition = Quantity(name=legacy_def.name, type=int)
# map shape, map type
dtype_strs.add(legacy_def.dtypeStr)
definition = Quantity(
name=legacy_def.name, description=legacy_def.description)
referenced_sections = legacy_def.extra_args.get('referencedSections')
if referenced_sections is not None and len(referenced_sections) > 0:
if len(referenced_sections) == 1:
definition.type = Reference(flux_box(referenced_sections[0], Section))
else:
logger.error('Could not map non higher dimensional reference quantity %s.' % definition.name)
definition.type = np.dtype(int)
elif legacy_def.kindStr == 'type_dimension':
definition.type = int
elif legacy_def.dtypeStr == 'D':
definition.type = Any
elif legacy_def.dtypeStr == 'C':
definition.type = str
elif legacy_def.dtypeStr == 'r':
definition.type = int
elif legacy_def.dtypeStr == 'i64':
definition.type = np.dtype(np.int64)
else:
definition.type = np.dtype(legacy_def.dtypeStr)
legacy_shape = legacy_def.shape
if legacy_shape is None:
legacy_shape = []
definition.shape = legacy_shape
if legacy_def.units is not None:
definition.unit = units.parse_units(legacy_def.units)
else:
logger.error(
......@@ -82,8 +115,9 @@ def convert_package(legacy_definitions: List[InfoKindEl], **kwargs) -> Package:
if legacy_super_def.kindStr == 'type_section':
parent_def = flux_box(legacy_super_name, Section)
if isinstance(definition, Section):
parent_def.m_create(
sub_section = parent_def.m_create(
SubSection, name=legacy_def.name, sub_section=definition)
sub_section.repeats = legacy_def.repeats is not None and legacy_def.repeats
elif isinstance(definition, Quantity):
parent_def.m_add_sub_section(Section.quantities, definition)
......@@ -103,5 +137,50 @@ common_pkg = convert_package(
legacy_packages['common.nomadmetainfo.json'] + legacy_packages['public.nomadmetainfo.json'],
name='common')
# print(common_pkg.m_to_json(indent=2))
print(dtype_strs)
vasp_pkg = convert_package(legacy_packages['vasp.nomadmetainfo.json'], name='vasp')
for error in common_pkg.m_all_validate() + vasp_pkg.m_all_validate():
print(error)
json.dumps([common_pkg.m_to_dict(), vasp_pkg.m_to_dict()], indent=2)
def format_description(description, indent=0, width=90):
paragraphs = [paragraph.strip() for paragraph in description.split('\n')]
def format_paragraph(paragraph, first):
lines = textwrap.wrap(text=paragraph, width=width - indent * 4)
lines = [l.replace('\\', '\\\\') for l in lines]
return textwrap.indent(
'\n'.join(lines), ' ' * 4 * indent, lambda x: not (first and x.startswith(lines[0])))
return '\n\n'.join([
format_paragraph(p, i == 0)
for i, p in enumerate(paragraphs) if p != ''])
def format_type(mi_type):
if type(mi_type) == np.dtype:
return 'np.dtype(np.%s)' % mi_type
if mi_type in [int, float, str, bool]:
return mi_type.__name__
if isinstance(mi_type, Reference):
return "MProxy('%s')" % mi_type.target_section_def.name
else:
return str(mi_type)
def format_unit(unit):
return "'%s'" % unit
env = Environment(
loader=PackageLoader('nomad.metainfo', 'templates'),
autoescape=select_autoescape(['python']))
env.globals.update(
format_description=format_description,
format_type=format_type,
format_unit=format_unit)
with open(os.path.join(os.path.dirname(__file__), 'common.py'), 'wt') as f:
f.write(env.get_template('package.j2').render(pkg=common_pkg))
......@@ -218,7 +218,7 @@ class _QuantityType(DataType):
(value, quantity_def))
def serialize(self, section, quantity_def, value):
if value in [str, int, float, bool]:
if value is str or value is int or value is float or value is bool:
return dict(type_kind='python', type_data=value.__name__)
if isinstance(value, Enum):
......@@ -767,13 +767,20 @@ class MSection(metaclass=MObjectMeta):
(quantity_def, value))
value = value.to(quantity_def.unit).magnitude
if type(value) != np.ndarray:
if len(quantity_def.shape) > 0 and type(value) != np.ndarray:
try:
value = np.asarray(value)
except TypeError:
raise TypeError(
'Could not convert value %s of %s to a numpy array' %
(value, quantity_def))
elif type(value) != quantity_def.type.type:
try:
value = quantity_def.type.type(value)
except TypeError:
raise TypeError(
'Could not convert value %s of %s to a numpy scalar' %
(value, quantity_def))
return self.__check_np(quantity_def, value)
......@@ -785,10 +792,7 @@ class MSection(metaclass=MObjectMeta):
raise MetainfoError('The quantity %s is derived and cannot be set.' % quantity_def)
if type(quantity_def.type) == np.dtype:
if type(value) != np.ndarray:
value = self.__to_np(quantity_def, value)
value = self.__check_np(quantity_def, value)
value = self.__to_np(quantity_def, value)
else:
dimensions = len(quantity_def.shape)
......@@ -1387,9 +1391,9 @@ class Quantity(Property):
- a numpy `dtype`, e.g. ``np.dtype('float32')``
- ``typing.Any`` to support any value
If set to `dtype`, this quantity will use a numpy array to store values internally.
If a regular (nested) Python list is given, it will be automatically converted.
The given `dtype` will be used in the numpy array.
If set to `dtype`, this quantity will use a numpy array or scalar to store values
internally. If a regular (nested) Python list or Python scalar is given, it will
be automatically converted. The given `dtype` will be used in the numpy value.
To define a reference, either a `section class` or instance of :class:`Section`
can be given. See :ref:`metainfo-sections` for details. Instances of the given section
......@@ -1489,14 +1493,21 @@ class Quantity(Property):
if isinstance(dimension, str):
if dimension.isidentifier():
dim_quantity = self.m_parent.all_quantities.get(dimension, None)
assert dim_quantity is not None, 'Dimensions must be quantities of the same section.'
assert len(dim_quantity.shape) == 0 and dim_quantity.type == int, \
'Dimensions must be shapeless and int typed.'
assert dim_quantity is not None, \
'Dimensions (%s) must be quantities of the same section (%s).' % (
dimension, self.m_parent)
assert len(dim_quantity.shape) == 0 and \
dim_quantity.type in [int, np.int16, np.int32, np.int8, np.uint8], \
'Dimensions (%s) must be shapeless (%s) and int (%s) typed.' % (
dimension, dim_quantity.shape, dim_quantity.type)
def c_higher_shapes_require_dtype(self):
if len(self.shape) > 1:
assert type(self.type) == np.dtype, \
'Higher dimensional quantities need a dtype and will be treated as numpy arrays.'
'Higher dimensional quantities (%s) need a dtype and will be treated as ' \
'numpy arrays.' % self
class DirectQuantity(Quantity):
......
import numpy as np
import typing
from nomad.metainfo import MSection, Package, Quantity, SubSection, MProxy
m_package = Package(name='{{ pkg.name }}', description='{{ pkg.description }}')
{% for section in pkg.section_definitions %}
class {{ section.name }}(MSection):
{% if section.description is not none -%}
'''
{{ format_description(section.description, indent=1) }}
'''
{% endif %}
{%- for quantity in section.quantities %}
{{ quantity.name }} = Quantity(
type={{ format_type(quantity.type) }},
shape={{ quantity.shape }}
{%- if quantity.unit is not none -%},
unit={{ format_unit(quantity.unit) }}
{%- endif -%}
{%- if quantity.description is not none -%},
description='''
{{ format_description(quantity.description, indent=2) }}
'''{%- endif -%})
{% endfor -%}
{%- for sub_section in section.sub_sections %}
{{ sub_section.name }} = SubSection(
sub_section=MProxy('{{ sub_section.sub_section.name }}'),
repeats={{ sub_section.repeats }}
{%- if sub_section.description is not none -%},
description='''
{{ format_description(sub_section.description, indent=2) }}
'''{%- endif -%})
{% endfor -%}
{%- endfor %}
......@@ -117,6 +117,4 @@ class OptimadeNormalizer(SystemBasedNormalizer):
optimade = self.get_optimade_data(index)
self._backend.add_mi2_section(optimade)
except Exception as e:
import traceback
traceback.print_exc()
self.logger.warn('could not acquire optimade data', exc_info=e)
......@@ -291,11 +291,20 @@ class TestM1:
with assert_exception(TypeError):
System().atom_labels = 'label'
def test_np(self):
def test_np_array(self):
system = System()
system.atom_positions = [[1, 2, 3]]
assert isinstance(system.atom_positions, pint.quantity._Quantity)
def test_np_scalar(self):
class TestSection(MSection):
test_quantity = Quantity(type=np.dtype('int16'))
test_section = TestSection()
test_section.test_quantity = 12
assert test_section.test_quantity == 12
assert type(test_section.test_quantity) == np.int16
def test_unit_conversion(self):
system = System()
system.atom_positions = [[1, 2, 3]] * units.angstrom
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment