From 14ea3639d506160ee4397b9fc6448cfb5fc22eea Mon Sep 17 00:00:00 2001
From: Markus Scheidgen <markus.scheidgen@gmail.com>
Date: Sun, 6 Oct 2019 15:34:28 +0200
Subject: [PATCH] Example metainfo notebook. Unit support on np arrays.

---
 .gitignore                 |   1 +
 examples/metainfo.ipynb    | 277 +++++++++++++++++++++++++++++++++++++
 nomad/metainfo/metainfo.py |  42 ++++--
 tests/test_metainfo.py     |  17 ++-
 4 files changed, 320 insertions(+), 17 deletions(-)
 create mode 100644 examples/metainfo.ipynb

diff --git a/.gitignore b/.gitignore
index aad8c4db6c..50f4e1e8dc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 .DS_Store
 .pyenv/
 .env/
+.ipynb_checkpoints/
 __pycache__
 .mypy_cache
 *.pyc
diff --git a/examples/metainfo.ipynb b/examples/metainfo.ipynb
new file mode 100644
index 0000000000..22dd3b9f79
--- /dev/null
+++ b/examples/metainfo.ipynb
@@ -0,0 +1,277 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# NOMAD Metainfo 2.0 demonstration\n",
+    "\n",
+    "You can find more complete documentation [here](https://labdev-nomad.esc.rzg.mpg.de/fairdi/nomad/testing/docs/metainfo.html)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nomad.metainfo import MSection, SubSection, Quantity, Datetime, units\n",
+    "import numpy as np\n",
+    "import datetime"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Sections and quantities\n",
+    "\n",
+    "To define sections and their quantities, we use Python classes and attributes. Quantities have *type*, *shape*, and *unit*."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class System(MSection):\n",
+    "    \"\"\" The simulated system \"\"\"\n",
+    "    number_of_atoms = Quantity(type=int, derived=lambda system: len(system.atom_labels))\n",
+    "    atom_labels = Quantity(type=str, shape=['number_of_atoms'])\n",
+    "    atom_positions = Quantity(type=np.dtype(np.float64), shape=['number_of_atoms', 3], unit=units.m)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Such *section classes* can then be instantiated like regular Python classes. Respectively, *section instances* are just regular Python object and section quantities can be get and set like regular Python object attributes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system = System()\n",
+    "system.atom_labels = ['H', 'H', '0']\n",
+    "system.atom_positions = np.array([[6, 0, 0], [0, 0, 0], [3, 2, 0]]) * units.angstrom"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Of course the metainfo is not just about dealing with physics data in Python. Its also about storing and managing data in various fileformats and databases. Therefore, the created data can be serialized, e.g. to JSON. All *section \n",
+    "instances* have a set of additional `m_`-methods that provide addtional functions. Note the unit conversion."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'{\"atom_labels\": [\"H\", \"H\", \"0\"], \"atom_positions\": [[6e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [3e-10, 2e-10, 0.0]]}'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "system.m_to_json()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Sub-sections to form hiearchies of data\n",
+    "\n",
+    "*Section instances* can be nested to form data hierarchies. To achive this, we first have to create *section \n",
+    "definitions* that have sub-sections."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Run(MSection):\n",
+    "    timestamp = Quantity(type=Datetime, description='The time that this run was conducted.')\n",
+    "    systems = SubSection(sub_section=System, repeats=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we can add *section instances* for `System` to *instances* of `Run`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'{\"timestamp\": \"2019-10-06T13:06:57.593988\", \"systems\": [{\"atom_labels\": [\"H\", \"H\", \"0\"], \"atom_positions\": [[6e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [3e-10, 2e-10, 0.0]]}, {\"atom_labels\": [\"H\", \"H\", \"0\"], \"atom_positions\": [[5e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [2.5e-10, 2e-10, 0.0]]}]}'"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "run = Run()\n",
+    "run.timestamp = datetime.datetime.now()\n",
+    "\n",
+    "system = run.m_create(System)\n",
+    "system.atom_labels = ['H', 'H', '0']\n",
+    "system.atom_positions = np.array([[6, 0, 0], [0, 0, 0], [3, 2, 0]]) * units.angstrom\n",
+    "\n",
+    "system = run.m_create(System)\n",
+    "system.atom_labels = ['H', 'H', '0']\n",
+    "system.atom_positions = np.array([[5, 0, 0], [0, 0, 0], [2.5, 2, 0]]) * units.angstrom\n",
+    "run.m_to_json()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The whole data hiearchy can be navigated with regular Python object/attribute style programming and values can be\n",
+    "used for calculations as usual."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "[[-1.   0.   0. ] [ 0.   0.   0. ] [-0.5  0.   0. ]] angstrom"
+      ],
+      "text/latex": [
+       "$[[-1.   0.   0. ] [ 0.   0.   0. ] [-0.5  0.   0. ]] angstrom$"
+      ],
+      "text/plain": [
+       "<Quantity([[-1.   0.   0. ]\n",
+       " [ 0.   0.   0. ]\n",
+       " [-0.5  0.   0. ]], 'angstrom')>"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(run.systems[1].atom_positions - run.systems[0].atom_positions).to(units.angstrom)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Reflection, inspection, and code-completion\n",
+    "\n",
+    "Since all definitions are available as *section classes*, Python already knows about all possible quantities. We can \n",
+    "use this in Python notebooks, via *tab* or the `?`-operator. Furthermore, you can access the *section definition* of all *section instances* with `m_def`. Since a *section defintion* itself is just a piece of metainfo data, you can use it to programatically explore the definition itselve."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[number_of_atoms:Quantity, atom_labels:Quantity, atom_positions:Quantity]"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "run.systems[0].m_def.quantities"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'The time that this run was conducted.'"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "run.m_def.all_quantities['timestamp'].description"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['number_of_atoms']"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "System.atom_labels.shape"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": ".venv"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py
index c9575ac9c2..05bbb362fd 100644
--- a/nomad/metainfo/metainfo.py
+++ b/nomad/metainfo/metainfo.py
@@ -21,8 +21,9 @@ import re
 import json
 import itertools
 import numpy as np
-from pint.unit import _Unit
-from pint import UnitRegistry
+import pint
+import pint.unit
+import pint.quantity
 import aniso8601
 from datetime import datetime
 import pytz
@@ -131,7 +132,7 @@ class __Unit(DataType):
         if isinstance(value, str):
             value = units.parse_units(value)
 
-        elif not isinstance(value, _Unit):
+        elif not isinstance(value, pint.unit._Unit):
             raise TypeError('Units must be given as str or pint Unit instances.')
 
         return value
@@ -143,7 +144,7 @@ class __Unit(DataType):
         return units.parse_units(value)
 
 
-units = UnitRegistry()
+units = pint.UnitRegistry()
 """ The default pint unit registry that should be used to give units to quantity definitions. """
 
 
@@ -723,6 +724,24 @@ class MSection(metaclass=MObjectMeta):
             return self.m_def.all_quantities[quantity_def.synonym_for]
         return quantity_def
 
+    def __to_np(self, quantity_def: 'Quantity', value):
+        if isinstance(value, pint.quantity._Quantity):
+            if quantity_def.unit is None:
+                raise MetainfoError(
+                    'The quantity %s has not a unit, but value %s has.' %
+                    (quantity_def, value))
+            value = value.to(quantity_def.unit).magnitude
+
+        if type(value) != np.ndarray:
+            try:
+                value = np.asarray(value)
+            except TypeError:
+                raise TypeError(
+                    'Could not convert value %s of %s to a numpy array' %
+                    (value, quantity_def))
+
+        return self.__check_np(quantity_def, value)
+
     def m_set(self, quantity_def: 'Quantity', value: Any) -> None:
         """ Set the given value for the given quantity. """
         quantity_def = self.__resolve_synonym(quantity_def)
@@ -732,12 +751,7 @@ class MSection(metaclass=MObjectMeta):
 
         if type(quantity_def.type) == np.dtype:
             if type(value) != np.ndarray:
-                try:
-                    value = np.asarray(value)
-                except TypeError:
-                    raise TypeError(
-                        'Could not convert value %s of %s to a numpy array' %
-                        (value, quantity_def))
+                value = self.__to_np(quantity_def, value)
 
             value = self.__check_np(quantity_def, value)
 
@@ -791,6 +805,10 @@ class MSection(metaclass=MObjectMeta):
                     'Only numpy arrays and dtypes can be used for higher dimensional '
                     'quantities.')
 
+        elif type(quantity_def.type) == np.dtype:
+            if quantity_def.unit is not None:
+                value = value * quantity_def.unit
+
         return value
 
     def m_is_set(self, quantity_def: 'Quantity') -> bool:
@@ -894,7 +912,7 @@ class MSection(metaclass=MObjectMeta):
 
             # quantities
             for name, quantity in self.m_def.all_quantities.items():
-                if quantity.virtual:
+                if quantity.virtual or not self.m_is_set(quantity):
                     continue
 
                 if self.m_is_set(quantity) and quantity.derived is None:
@@ -932,7 +950,7 @@ class MSection(metaclass=MObjectMeta):
                             'Do not know how to serialize data with type %s for quantity %s' %
                             (quantity.type, quantity))
 
-                    value = getattr(self, name)
+                    value = self.m_data.dct[name]
 
                     if type(quantity.type) == np.dtype:
                         serializable_value = value.tolist()
diff --git a/tests/test_metainfo.py b/tests/test_metainfo.py
index c82524d489..061cada6bf 100644
--- a/tests/test_metainfo.py
+++ b/tests/test_metainfo.py
@@ -14,8 +14,9 @@
 
 import pytest
 import numpy as np
+import pint.quantity
 
-from nomad.metainfo.metainfo import MSection, MCategory, Section, Quantity, Definition, Package, DeriveError
+from nomad.metainfo.metainfo import MSection, MCategory, Section, Quantity, Definition, Package, DeriveError, units
 from nomad.metainfo.example import Run, VaspRun, System, SystemHash, Parsing, m_package as example_package
 
 
@@ -255,13 +256,19 @@ class TestM1:
     def test_np(self):
         system = System()
         system.atom_positions = [[1, 2, 3]]
-        assert type(system.atom_positions) == np.ndarray
+        assert isinstance(system.atom_positions, pint.quantity._Quantity)
+
+    def test_unit_conversion(self):
+        system = System()
+        system.atom_positions = [[1, 2, 3]] * units.angstrom
+        assert system.atom_positions.units == units.meter
+        assert system.atom_positions[0][0] < 0.1 * units.meter
 
     def test_synonym(self):
         system = System()
         system.lattice_vectors = [[1.2e-10, 0, 0], [0, 1.2e-10, 0], [0, 0, 1.2e-10]]
-        assert type(system.lattice_vectors) == np.ndarray
-        assert type(system.unit_cell) == np.ndarray
+        assert isinstance(system.lattice_vectors, pint.quantity._Quantity)
+        assert isinstance(system.unit_cell, pint.quantity._Quantity)
         assert np.array_equal(system.unit_cell, system.lattice_vectors)
 
     @pytest.fixture(scope='function')
@@ -283,7 +290,7 @@ class TestM1:
         assert system.m_def == System.m_def
         assert system.n_atoms == 3
         assert system.atom_labels == ['H', 'H', 'O']
-        assert type(system.atom_positions) == np.ndarray
+        assert isinstance(system.atom_positions, pint.quantity._Quantity)
 
     def test_to_dict(self, example_data):
         dct = example_data.m_to_dict()
-- 
GitLab