Commit 14ea3639 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Example metainfo notebook. Unit support on np arrays.

parent cbe5105f
.DS_Store
.pyenv/
.env/
.ipynb_checkpoints/
__pycache__
.mypy_cache
*.pyc
......
%% Cell type:markdown id: tags:
# NOMAD Metainfo 2.0 demonstration
You can find more complete documentation [here](https://labdev-nomad.esc.rzg.mpg.de/fairdi/nomad/testing/docs/metainfo.html)
%% Cell type:code id: tags:
``` python
from nomad.metainfo import MSection, SubSection, Quantity, Datetime, units
import numpy as np
import datetime
```
%% Cell type:markdown id: tags:
## Sections and quantities
To define sections and their quantities, we use Python classes and attributes. Quantities have *type*, *shape*, and *unit*.
%% Cell type:code id: tags:
``` python
class System(MSection):
""" The simulated system """
number_of_atoms = Quantity(type=int, derived=lambda system: len(system.atom_labels))
atom_labels = Quantity(type=str, shape=['number_of_atoms'])
atom_positions = Quantity(type=np.dtype(np.float64), shape=['number_of_atoms', 3], unit=units.m)
```
%% Cell type:markdown id: tags:
Such *section classes* can then be instantiated like regular Python classes. Respectively, *section instances* are just regular Python object and section quantities can be get and set like regular Python object attributes.
%% Cell type:code id: tags:
``` python
system = System()
system.atom_labels = ['H', 'H', '0']
system.atom_positions = np.array([[6, 0, 0], [0, 0, 0], [3, 2, 0]]) * units.angstrom
```
%% Cell type:markdown id: tags:
Of course the metainfo is not just about dealing with physics data in Python. Its also about storing and managing data in various fileformats and databases. Therefore, the created data can be serialized, e.g. to JSON. All *section
instances* have a set of additional `m_`-methods that provide addtional functions. Note the unit conversion.
%% Cell type:code id: tags:
``` python
system.m_to_json()
```
%%%% Output: execute_result
'{"atom_labels": ["H", "H", "0"], "atom_positions": [[6e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [3e-10, 2e-10, 0.0]]}'
%% Cell type:markdown id: tags:
## Sub-sections to form hiearchies of data
*Section instances* can be nested to form data hierarchies. To achive this, we first have to create *section
definitions* that have sub-sections.
%% Cell type:code id: tags:
``` python
class Run(MSection):
timestamp = Quantity(type=Datetime, description='The time that this run was conducted.')
systems = SubSection(sub_section=System, repeats=True)
```
%% Cell type:markdown id: tags:
Now we can add *section instances* for `System` to *instances* of `Run`.
%% Cell type:code id: tags:
``` python
run = Run()
run.timestamp = datetime.datetime.now()
system = run.m_create(System)
system.atom_labels = ['H', 'H', '0']
system.atom_positions = np.array([[6, 0, 0], [0, 0, 0], [3, 2, 0]]) * units.angstrom
system = run.m_create(System)
system.atom_labels = ['H', 'H', '0']
system.atom_positions = np.array([[5, 0, 0], [0, 0, 0], [2.5, 2, 0]]) * units.angstrom
run.m_to_json()
```
%%%% Output: execute_result
'{"timestamp": "2019-10-06T13:06:57.593988", "systems": [{"atom_labels": ["H", "H", "0"], "atom_positions": [[6e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [3e-10, 2e-10, 0.0]]}, {"atom_labels": ["H", "H", "0"], "atom_positions": [[5e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [2.5e-10, 2e-10, 0.0]]}]}'
%% Cell type:markdown id: tags:
The whole data hiearchy can be navigated with regular Python object/attribute style programming and values can be
used for calculations as usual.
%% Cell type:code id: tags:
``` python
(run.systems[1].atom_positions - run.systems[0].atom_positions).to(units.angstrom)
```
%%%% Output: execute_result
$[[-1. 0. 0. ] [ 0. 0. 0. ] [-0.5 0. 0. ]] angstrom$
<Quantity([[-1. 0. 0. ]
[ 0. 0. 0. ]
[-0.5 0. 0. ]], 'angstrom')>
%% Cell type:markdown id: tags:
## Reflection, inspection, and code-completion
Since all definitions are available as *section classes*, Python already knows about all possible quantities. We can
use this in Python notebooks, via *tab* or the `?`-operator. Furthermore, you can access the *section definition* of all *section instances* with `m_def`. Since a *section defintion* itself is just a piece of metainfo data, you can use it to programatically explore the definition itselve.
%% Cell type:code id: tags:
``` python
run.systems[0].m_def.quantities
```
%%%% Output: execute_result
[number_of_atoms:Quantity, atom_labels:Quantity, atom_positions:Quantity]
%% Cell type:code id: tags:
``` python
run.m_def.all_quantities['timestamp'].description
```
%%%% Output: execute_result
'The time that this run was conducted.'
%% Cell type:code id: tags:
``` python
System.atom_labels.shape
```
%%%% Output: execute_result
['number_of_atoms']
......@@ -21,8 +21,9 @@ import re
import json
import itertools
import numpy as np
from pint.unit import _Unit
from pint import UnitRegistry
import pint
import pint.unit
import pint.quantity
import aniso8601
from datetime import datetime
import pytz
......@@ -131,7 +132,7 @@ class __Unit(DataType):
if isinstance(value, str):
value = units.parse_units(value)
elif not isinstance(value, _Unit):
elif not isinstance(value, pint.unit._Unit):
raise TypeError('Units must be given as str or pint Unit instances.')
return value
......@@ -143,7 +144,7 @@ class __Unit(DataType):
return units.parse_units(value)
units = UnitRegistry()
units = pint.UnitRegistry()
""" The default pint unit registry that should be used to give units to quantity definitions. """
......@@ -723,6 +724,24 @@ class MSection(metaclass=MObjectMeta):
return self.m_def.all_quantities[quantity_def.synonym_for]
return quantity_def
def __to_np(self, quantity_def: 'Quantity', value):
if isinstance(value, pint.quantity._Quantity):
if quantity_def.unit is None:
raise MetainfoError(
'The quantity %s has not a unit, but value %s has.' %
(quantity_def, value))
value = value.to(quantity_def.unit).magnitude
if type(value) != np.ndarray:
try:
value = np.asarray(value)
except TypeError:
raise TypeError(
'Could not convert value %s of %s to a numpy array' %
(value, quantity_def))
return self.__check_np(quantity_def, value)
def m_set(self, quantity_def: 'Quantity', value: Any) -> None:
""" Set the given value for the given quantity. """
quantity_def = self.__resolve_synonym(quantity_def)
......@@ -732,12 +751,7 @@ class MSection(metaclass=MObjectMeta):
if type(quantity_def.type) == np.dtype:
if type(value) != np.ndarray:
try:
value = np.asarray(value)
except TypeError:
raise TypeError(
'Could not convert value %s of %s to a numpy array' %
(value, quantity_def))
value = self.__to_np(quantity_def, value)
value = self.__check_np(quantity_def, value)
......@@ -791,6 +805,10 @@ class MSection(metaclass=MObjectMeta):
'Only numpy arrays and dtypes can be used for higher dimensional '
'quantities.')
elif type(quantity_def.type) == np.dtype:
if quantity_def.unit is not None:
value = value * quantity_def.unit
return value
def m_is_set(self, quantity_def: 'Quantity') -> bool:
......@@ -894,7 +912,7 @@ class MSection(metaclass=MObjectMeta):
# quantities
for name, quantity in self.m_def.all_quantities.items():
if quantity.virtual:
if quantity.virtual or not self.m_is_set(quantity):
continue
if self.m_is_set(quantity) and quantity.derived is None:
......@@ -932,7 +950,7 @@ class MSection(metaclass=MObjectMeta):
'Do not know how to serialize data with type %s for quantity %s' %
(quantity.type, quantity))
value = getattr(self, name)
value = self.m_data.dct[name]
if type(quantity.type) == np.dtype:
serializable_value = value.tolist()
......
......@@ -14,8 +14,9 @@
import pytest
import numpy as np
import pint.quantity
from nomad.metainfo.metainfo import MSection, MCategory, Section, Quantity, Definition, Package, DeriveError
from nomad.metainfo.metainfo import MSection, MCategory, Section, Quantity, Definition, Package, DeriveError, units
from nomad.metainfo.example import Run, VaspRun, System, SystemHash, Parsing, m_package as example_package
......@@ -255,13 +256,19 @@ class TestM1:
def test_np(self):
system = System()
system.atom_positions = [[1, 2, 3]]
assert type(system.atom_positions) == np.ndarray
assert isinstance(system.atom_positions, pint.quantity._Quantity)
def test_unit_conversion(self):
system = System()
system.atom_positions = [[1, 2, 3]] * units.angstrom
assert system.atom_positions.units == units.meter
assert system.atom_positions[0][0] < 0.1 * units.meter
def test_synonym(self):
system = System()
system.lattice_vectors = [[1.2e-10, 0, 0], [0, 1.2e-10, 0], [0, 0, 1.2e-10]]
assert type(system.lattice_vectors) == np.ndarray
assert type(system.unit_cell) == np.ndarray
assert isinstance(system.lattice_vectors, pint.quantity._Quantity)
assert isinstance(system.unit_cell, pint.quantity._Quantity)
assert np.array_equal(system.unit_cell, system.lattice_vectors)
@pytest.fixture(scope='function')
......@@ -283,7 +290,7 @@ class TestM1:
assert system.m_def == System.m_def
assert system.n_atoms == 3
assert system.atom_labels == ['H', 'H', 'O']
assert type(system.atom_positions) == np.ndarray
assert isinstance(system.atom_positions, pint.quantity._Quantity)
def test_to_dict(self, example_data):
dct = example_data.m_to_dict()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment