Commit dccc9f88 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Fixed system normalizer #206, #204.

parent 8079d55b
Subproject commit 43d77d198acfc3137d1c4d08a4601248fbf8548d
Subproject commit 22f8a9063125da7bbde4f3f0153548005506e06b
%% Cell type:markdown id: tags:
# NOMAD Metainfo 2.0 demonstration
You can find more complete documentation [here](https://labdev-nomad.esc.rzg.mpg.de/fairdi/nomad/testing/docs/metainfo.html)
%% Cell type:code id: tags:
``` python
from nomad.metainfo import MSection, SubSection, Quantity, Datetime, units
import numpy as np
import datetime
```
%% Cell type:markdown id: tags:
## Sections and quantities
To define sections and their quantities, we use Python classes and attributes. Quantities have *type*, *shape*, and *unit*.
%% Cell type:code id: tags:
``` python
class System(MSection):
""" The simulated system """
number_of_atoms = Quantity(type=int, derived=lambda system: len(system.atom_labels))
atom_labels = Quantity(type=str, shape=['number_of_atoms'])
atom_positions = Quantity(type=np.dtype(np.float64), shape=['number_of_atoms', 3], unit=units.m)
```
%% Cell type:markdown id: tags:
Such *section classes* can then be instantiated like regular Python classes. Respectively, *section instances* are just regular Python object and section quantities can be get and set like regular Python object attributes.
%% Cell type:code id: tags:
``` python
system = System()
system.atom_labels = ['H', 'H', '0']
system.atom_positions = np.array([[6, 0, 0], [0, 0, 0], [3, 2, 0]]) * units.angstrom
```
%% Cell type:markdown id: tags:
Of course the metainfo is not just about dealing with physics data in Python. Its also about storing and managing data in various fileformats and databases. Therefore, the created data can be serialized, e.g. to JSON. All *section
instances* have a set of additional `m_`-methods that provide addtional functions. Note the unit conversion.
%% Cell type:code id: tags:
``` python
system.m_to_json()
```
%%%% Output: execute_result
'{"atom_labels": ["H", "H", "0"], "atom_positions": [[6e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [3e-10, 2e-10, 0.0]]}'
%% Cell type:markdown id: tags:
## Sub-sections to form hiearchies of data
*Section instances* can be nested to form data hierarchies. To achive this, we first have to create *section
definitions* that have sub-sections.
%% Cell type:code id: tags:
``` python
class Run(MSection):
timestamp = Quantity(type=Datetime, description='The time that this run was conducted.')
systems = SubSection(sub_section=System, repeats=True)
```
%% Cell type:markdown id: tags:
Now we can add *section instances* for `System` to *instances* of `Run`.
%% Cell type:code id: tags:
``` python
run = Run()
run.timestamp = datetime.datetime.now()
system = run.m_create(System)
system.atom_labels = ['H', 'H', '0']
system.atom_positions = np.array([[6, 0, 0], [0, 0, 0], [3, 2, 0]]) * units.angstrom
system = run.m_create(System)
system.atom_labels = ['H', 'H', '0']
system.atom_positions = np.array([[5, 0, 0], [0, 0, 0], [2.5, 2, 0]]) * units.angstrom
run.m_to_json()
```
%%%% Output: execute_result
'{"timestamp": "2019-10-06T13:06:57.593988", "systems": [{"atom_labels": ["H", "H", "0"], "atom_positions": [[6e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [3e-10, 2e-10, 0.0]]}, {"atom_labels": ["H", "H", "0"], "atom_positions": [[5e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [2.5e-10, 2e-10, 0.0]]}]}'
'{"timestamp": "2019-10-07T22:37:33.376139", "systems": [{"atom_labels": ["H", "H", "0"], "atom_positions": [[6e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [3e-10, 2e-10, 0.0]]}, {"atom_labels": ["H", "H", "0"], "atom_positions": [[5e-10, 0.0, 0.0], [0.0, 0.0, 0.0], [2.5e-10, 2e-10, 0.0]]}]}'
%% Cell type:markdown id: tags:
The whole data hiearchy can be navigated with regular Python object/attribute style programming and values can be
used for calculations as usual.
%% Cell type:code id: tags:
``` python
(run.systems[1].atom_positions - run.systems[0].atom_positions).to(units.angstrom)
```
%%%% Output: execute_result
$[[-1. 0. 0. ] [ 0. 0. 0. ] [-0.5 0. 0. ]] angstrom$
<Quantity([[-1. 0. 0. ]
[ 0. 0. 0. ]
[-0.5 0. 0. ]], 'angstrom')>
%% Cell type:markdown id: tags:
## Reflection, inspection, and code-completion
Since all definitions are available as *section classes*, Python already knows about all possible quantities. We can
use this in Python notebooks, via *tab* or the `?`-operator. Furthermore, you can access the *section definition* of all *section instances* with `m_def`. Since a *section defintion* itself is just a piece of metainfo data, you can use it to programatically explore the definition itselve.
%% Cell type:code id: tags:
``` python
run.systems[0].m_def.quantities
```
%%%% Output: execute_result
[number_of_atoms:Quantity, atom_labels:Quantity, atom_positions:Quantity]
%% Cell type:code id: tags:
``` python
run.m_def.all_quantities['timestamp'].description
```
%%%% Output: execute_result
'The time that this run was conducted.'
%% Cell type:code id: tags:
``` python
System.atom_labels.shape
```
%%%% Output: execute_result
['number_of_atoms']
......
......@@ -183,7 +183,6 @@ mail = NomadConfig(
)
normalize = NomadConfig(
all_systems=False,
system_classification_with_clusters_threshold=50
)
......
......@@ -383,6 +383,13 @@ def get_optional_backend_value(backend, key, section, unavailable_value=None, lo
val = None # Initialize to None, so we can compare section values.
# Loop over the sections with the name section in the backend.
for section_index in backend.get_sections(section):
if section == 'section_system':
try:
if not backend.get_value('is_representative', section_index):
continue
except KeyError:
continue
try:
new_val = backend.get_value(key, section_index)
except KeyError:
......
......@@ -51,17 +51,16 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
"""
A normalizer base class for normalizers that only touch a section_system.
The normalizer is either run on all section systems or only for systems that are
linked to a section_single_configuration_calculation. Also if there are multiple sccs,
the normalizer is only run for the last frame belonging to a frame sequence.
The normalizer is run on all section systems in a run. However, some systems,
selected by heuristic, are more `representative systems` for the run. Sub-classes
might opt to do additional work for the `representative systems`.
Arguments:
all_sections: apply normalizer to all section_system instances or only the
last single config calc of the last frame sequence
Args:
only_representatives: Will only normalize the `representative` systems.
"""
def __init__(self, backend: AbstractParserBackend, all_sections=True) -> None:
super().__init__(backend=backend)
self._all_sections = all_sections
def __init__(self, backend: AbstractParserBackend, only_representatives: bool = False):
super().__init__(backend)
self.only_representatives = only_representatives
@property
def quantities(self) -> List[str]:
......@@ -74,75 +73,78 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
'configuration_periodic_dimensions'
]
def _normalize_system(self, g_index):
def _normalize_system(self, g_index, is_representative):
context = '/section_run/0/section_system/%d' % g_index
self._backend.openContext(context)
try:
self.normalize_system(g_index)
self.normalize_system(g_index, is_representative)
finally:
self._backend.closeContext(context)
@abstractmethod
def normalize_system(self, section_system_index: int) -> None:
def normalize_system(self, section_system_index: int, is_representative: bool) -> None:
""" Normalize the given section. """
pass
def normalize(self, logger=None) -> None:
super().normalize(logger)
def __representative_systems(self):
# look for sccs in last frames
sccs = []
try:
frame_seqs = self._backend.get_sections(s_frame_sequence)
except Exception:
frame_seqs = []
if self._all_sections:
try:
systems = self._backend.get_sections(s_system)
except Exception:
systems = []
else:
# look for sccs in last frames
sccs = []
for frame_seq in frame_seqs:
try:
frame_seqs = self._backend.get_sections(s_frame_sequence)
frames = self._backend.get_value(r_frame_sequence_local_frames, frame_seq)
except Exception:
frame_seqs = []
frames = []
for frame_seq in frame_seqs:
try:
frames = self._backend.get_value(r_frame_sequence_local_frames, frame_seq)
except Exception:
frames = []
if len(frames) > 0:
sccs.append(frames[-1])
if len(frames) > 0:
sccs.append(frames[-1])
# no sccs from frames -> consider all sccs
if len(sccs) == 0:
try:
sccs = self._backend.get_sections(s_scc)
except Exception:
sccs = []
# no sccs from frames -> consider all sccs
if len(sccs) == 0:
try:
sccs = self._backend.get_sections(s_scc)
except Exception:
sccs = []
try:
systems = [self._backend.get_value(r_scc_to_system, scc) for scc in sccs]
except Exception:
systems = []
# only take the first, and last two systems
if len(systems) == 0:
try:
systems = [self._backend.get_value(r_scc_to_system, scc) for scc in sccs]
systems = self._backend.get_sections(s_system)
except Exception:
systems = []
# only take the first, and last two systems
if len(systems) == 0:
try:
systems = self._backend.get_sections(s_system)
except Exception:
systems = []
if len(systems) > 2:
systems = [systems[0], systems[-2], systems[-1]]
if len(systems) > 2:
systems = [systems[0], systems[-2], systems[-1]]
if len(systems) == 0:
self.logger.error('no section system found')
self.logger.error('no "representative" section system found')
self.logger.info(
'chose "representative" systems for normalization',
number_of_systems=len(systems))
return set(systems)
def normalize(self, logger=None) -> None:
super().normalize(logger)
self.logger.info('chose systems for normalization', number_of_systems=len(systems))
representative_systems = self.__representative_systems()
all_systems = self._backend.get_sections(s_system)
selected_systems = representative_systems if self.only_representatives else all_systems
for g_index in systems:
for g_index in selected_systems:
try:
self._normalize_system(g_index)
self._normalize_system(g_index, g_index in representative_systems)
except KeyError as e:
self.logger.error(
'Could not read all input data', normalizer=self.__class__.__name__,
......
......@@ -15,7 +15,6 @@
from typing import Any, Dict
import numpy as np
from nomad import config
from nomad.normalizing.normalizer import SystemBasedNormalizer
from nomad.metainfo import units
from nomad.metainfo.optimade import OptimadeEntry
......@@ -28,7 +27,7 @@ class OptimadeNormalizer(SystemBasedNormalizer):
It assumes that the :class:`SystemNormalizer` was run before.
"""
def __init__(self, backend):
super().__init__(backend, all_sections=config.normalize.all_systems)
super().__init__(backend, only_representatives=True)
def get_optimade_data(self, index) -> OptimadeEntry:
"""
......@@ -92,7 +91,10 @@ class OptimadeNormalizer(SystemBasedNormalizer):
return optimade
def normalize_system(self, index):
def normalize_system(self, index, is_representative):
if not is_representative:
return False
try:
optimade = self.get_optimade_data(index)
self._backend.add_mi2_section(optimade)
......
......@@ -48,8 +48,6 @@ class SystemNormalizer(SystemBasedNormalizer):
This normalizer performs all system (atoms, cells, etc.) related normalizations
of the legacy NOMAD-coe *stats* normalizer.
"""
def __init__(self, backend):
super().__init__(backend, all_sections=config.normalize.all_systems)
@staticmethod
def atom_label_to_num(atom_label):
......@@ -63,7 +61,7 @@ class SystemNormalizer(SystemBasedNormalizer):
return 0
def normalize_system(self, index) -> None:
def normalize_system(self, index, is_representative) -> None:
"""
The 'main' method of this :class:`SystemBasedNormalizer`.
Normalizes the section with the given `index`.
......@@ -188,25 +186,28 @@ class SystemNormalizer(SystemBasedNormalizer):
configuration_id = utils.hash(json.dumps(configuration).encode('utf-8'))
set_value('configuration_raw_gid', configuration_id)
# system type analysis
if atom_positions is not None:
with utils.timer(
self.logger, 'system classification executed',
system_size=atoms.get_number_of_atoms()):
if is_representative:
self._backend.addValue('is_representative', is_representative)
self.system_type_analysis(atoms)
# system type analysis
if atom_positions is not None:
with utils.timer(
self.logger, 'system classification executed',
system_size=atoms.get_number_of_atoms()):
# symmetry analysis
if atom_positions is not None and (lattice_vectors is not None or not any(pbc)):
with utils.timer(
self.logger, 'symmetry analysis executed',
system_size=atoms.get_number_of_atoms()):
self.system_type_analysis(atoms)
self.symmetry_analysis(atoms)
# symmetry analysis
if atom_positions is not None and (lattice_vectors is not None or not any(pbc)):
with utils.timer(
self.logger, 'symmetry analysis executed',
system_size=atoms.get_number_of_atoms()):
self.symmetry_analysis(atoms)
def system_type_analysis(self, atoms) -> None:
"""
Determine the dimensioality and hence the system type of the system with
Determine the dimensionality and hence the system type of the system with
Matid. Write the system type to the backend.
"""
system_type = config.services.unavailable_value
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment