Commit ebf07aa0 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merged latest metainfo devlopment.

parent 11058e5f
Pipeline #61959 passed with stages
in 24 minutes and 8 seconds
Subproject commit ccbf641ab7a0930c5f18507147f6c5b51f4e7444
Subproject commit d73611bc1b16ea71daa3d0fd24ee275c78853557
Subproject commit 039ed6cb532b26926f8e0d7dc2027403e965e67c
Subproject commit dc59aff14e69a6818fbefeb7b1d504348438e26b
from typing import Union, Callable, cast
import os.path
import ujson
import json
import click
import sys
from nomad import config, utils, files
from nomad.parsing import LocalBackend, parser_dict, match_parser, MatchingParser
from nomad.parsing import LocalBackend, parser_dict, match_parser, MatchingParser, MetainfoBackend
from nomad.metainfo.legacy import LegacyMetainfoEnvironment
from nomad.normalizing import normalizers
from nomad.datamodel import CalcWithMetadata
......@@ -14,7 +15,9 @@ from .cli import cli
def parse(
mainfile: str, upload_files: Union[str, files.StagingUploadFiles],
parser_name: str = None, strict: bool = True, logger=None) -> LocalBackend:
parser_name: str = None,
backend_factory: Callable = None,
strict: bool = True, logger=None) -> LocalBackend:
Run the given parser on the downloaded calculation. If no parser is given,
do parser matching and use the respective parser.
......@@ -34,6 +37,8 @@ def parse(
assert parser is not None, 'there is no parser matching %s' % mainfile
logger = logger.bind( # type: ignore'identified parser')
if hasattr(parser, 'backend_factory'):
setattr(parser, 'backend_factory', backend_factory)
if isinstance(upload_files, str):
mainfile_path = os.path.join(upload_files, mainfile)
......@@ -96,10 +101,19 @@ def normalize_all(parser_backend: LocalBackend = None, logger=None) -> LocalBack
@click.option('--skip-normalizers', is_flag=True, default=False, help='Do not run the normalizer.')
@click.option('--not-strict', is_flag=True, help='Do also match artificial parsers.')
@click.option('--parser', help='Skip matching and use the provided parser')
def _parse(mainfile, show_backend, show_metadata, skip_normalizers, not_strict, parser):
@click.option('--metainfo', is_flag=True, help='Use the new metainfo instead of the legacy metainfo.')
def _parse(mainfile, show_backend, show_metadata, skip_normalizers, not_strict, parser, metainfo):
kwargs = dict(strict=not not_strict, parser_name=parser)
backend = parse(mainfile, '.', strict=not not_strict, parser_name=parser)
if metainfo:
def backend_factory(env, logger):
return MetainfoBackend(LegacyMetainfoEnvironment(env), logger=logger)
backend = parse(mainfile, '.', **kwargs)
if not skip_normalizers:
......@@ -109,4 +123,4 @@ def _parse(mainfile, show_backend, show_metadata, skip_normalizers, not_strict,
if show_metadata:
metadata = CalcWithMetadata()
ujson.dump(metadata.to_dict(), sys.stdout, indent=4)
json.dump(metadata.to_dict(), sys.stdout, indent=4)
......@@ -16,6 +16,7 @@ from typing import Iterable, List, Dict, Type, Tuple, Callable, Any
import datetime
from elasticsearch_dsl import Keyword
from import Mapping
import numpy as np
from nomad import utils, config
from nomad.metainfo import MSection
......@@ -429,4 +430,7 @@ def get_optional_backend_value(backend, key, section, unavailable_value=None, lo
'The values for %s where not available in any %s' % (key, section))
return unavailable_value if unavailable_value is not None else
if isinstance(val, np.generic):
return val.item()
return val
......@@ -158,7 +158,7 @@ class DFTCalcWithMetadata(CalcWithMetadata):
n_total_energies = 0
n_geometries = 0
for meta_info, event, value in backend._delegate.results.traverse():
for meta_info, event, value in backend.traverse():
if event == ParserEvent.add_value or event == ParserEvent.add_array_value:
......@@ -105,7 +105,7 @@ class EMSEntryWithMetadata(CalcWithMetadata):
quantities = set()
for meta_info, _, _ in backend._delegate.results.traverse(root_section='section_experiment'):
for meta_info, _, _ in backend.traverse(root_section='section_experiment'):
self.quantities = list(quantities)
......@@ -196,6 +196,13 @@ Packages
.. _metainfo-custom-types:
.. autoclass:: Environment
Custom data types
......@@ -254,6 +261,11 @@ object when a respective quantity is accessed.
.. autoclass:: MProxy
.. autoclass:: MResource
A more complex example
......@@ -263,5 +275,6 @@ A more complex example
from .metainfo import MSection, MCategory, Definition, Property, Quantity, SubSection, \
Section, Category, Package, Enum, Datetime, MProxy, MetainfoError, DeriveError, \
MetainfoReferenceError, DataType, MData, MDataDict, Reference, m_package, units
Section, Category, Package, Environment, Enum, Datetime, MProxy, MetainfoError, DeriveError, \
MetainfoReferenceError, DataType, MData, MDataDict, Reference, MResource, m_package, \
This diff is collapsed.
This diff is collapsed.
......@@ -337,16 +337,20 @@ Datetime = _Datetime()
class MObjectMeta(type):
def __new__(self, cls_name, bases, dct):
do_init = dct.get('do_init', None)
if do_init is not None:
do_init = True
cls = super().__new__(self, cls_name, bases, dct)
init = getattr(cls, '__init_cls__')
if init is not None and not is_bootstrapping:
if init is not None and do_init and not is_bootstrapping:
return cls
Content = Tuple['MSection', int, 'SubSection', 'MSection']
SectionDef = Union[str, 'Section', 'SubSection', Type[MSectionBound]]
""" Type for section definition references.
......@@ -492,6 +496,49 @@ class MDataDict(MData):
return len(self.dct[sub_section_name])
class MResource():
"""Represents a collection of related metainfo data, i.e. a set of :class:`MSection` instances.
MResource allows to keep related objects together and resolve sections of certain
section definitions.
def __init__(self):
self.__data: Dict['Section', List['MSection']] = dict()
self.contents: List['MSection'] = []
def create(self, section_cls: Type[MSectionBound], *args, **kwargs) -> MSectionBound:
""" Create an instance of the given section class and adds it to this resource. """
result = section_cls(*args, **kwargs)
return cast(MSectionBound, result)
def add(self, section):
section.m_resource = self
self.__data.setdefault(section.m_def, []).append(section)
if section.m_parent is None:
def remove(self, section):
assert section.m_resource == self, 'Can only remove section from the resource that contains it.'
section.m_resource = None
if section.m_parent is not None:
def all(self, section_cls: Type[MSectionBound]) -> List[MSectionBound]:
""" Returns all instances of the given section class in this resource. """
return cast(List[MSectionBound], self.__data.get(section_cls.m_def, []))
def unload(self):
""" Breaks all references among the contain metainfo sections to allow GC. """
for collections in self.__data.values():
for section in collections:
section.m_parent = None
# TODO break actual references via quantities
class MSection(metaclass=MObjectMeta):
"""Base class for all section instances on all meta-info levels.
......@@ -523,16 +570,21 @@ class MSection(metaclass=MObjectMeta):
the quantity values and sub-section. It should only be read directly
(and never manipulated).
m_resource: The :class:`MResource` that contains and manages this section.
m_def: 'Section' = None
def __init__(self, m_def: 'Section' = None, m_data: MData = None, **kwargs):
def __init__(
self, m_def: 'Section' = None, m_data: MData = None,
m_resource: MResource = None, **kwargs):
self.m_def: 'Section' = m_def
self.m_parent: 'MSection' = None
self.m_parent_sub_section: 'SubSection' = None
self.m_parent_index = -1
self.m_resource = m_resource
# get missing m_def from class
cls = self.__class__
......@@ -746,6 +798,12 @@ class MSection(metaclass=MObjectMeta):
elif quantity_def.type == Any:
elif quantity_def.type == str and type(value) == np.str_:
return str(value)
elif quantity_def.type == bool and type(value) == np.bool_:
return bool(value)
if type(value) != quantity_def.type:
raise TypeError(
......@@ -829,6 +887,9 @@ class MSection(metaclass=MObjectMeta):
value = self.m_data.m_get(self, quantity_def)
if value is None:
return value
if isinstance(quantity_def.type, DataType) and quantity_def.type.get_normalize != DataType.get_normalize:
dimensions = len(quantity_def.shape)
if dimensions == 0:
......@@ -871,6 +932,10 @@ class MSection(metaclass=MObjectMeta):
sub_section.m_parent = self
sub_section.m_parent_sub_section = sub_section_def
sub_section.m_parent_index = parent_index
if sub_section.m_resource is not None:
if self.m_resource is not None:
self.m_data.m_add_sub_section(self, sub_section_def, sub_section)
......@@ -1098,26 +1163,26 @@ class MSection(metaclass=MObjectMeta):
""" Returns the data of this section as a json string. """
return json.dumps(self.m_to_dict(), **kwargs)
def m_all_contents(self) -> Iterable[Content]:
def m_all_contents(self) -> Iterable['MSection']:
""" Returns an iterable over all sub and sub subs sections. """
for content in self.m_contents():
for sub_content in content[0].m_all_contents():
for sub_content in content.m_all_contents():
yield sub_content
yield content
def m_contents(self) -> Iterable[Content]:
def m_contents(self) -> Iterable['MSection']:
""" Returns an iterable over all direct subs sections. """
for sub_section_def in self.m_def.all_sub_sections.values():
if sub_section_def.repeats:
index = 0
for sub_section in self.m_get_sub_sections(sub_section_def):
yield sub_section, index, sub_section_def, self
yield sub_section
index += 1
sub_section = self.m_get_sub_section(sub_section_def, -1)
yield sub_section, -1, sub_section_def, self
yield sub_section
def m_path(self, quantity_def: 'Quantity' = None) -> str:
""" Returns the path of this section or the given quantity within the section hierarchy. """
......@@ -1141,6 +1206,10 @@ class MSection(metaclass=MObjectMeta):
return self.m_parent.m_root(cls)
def m_parent_as(self, cls: Type[MSectionBound] = None) -> MSectionBound:
""" Returns the parent section with the given section class type. """
return cast(MSectionBound, self.m_parent)
def m_resolve(self, path: str, cls: Type[MSectionBound] = None) -> MSectionBound:
""" Resolves the given path using this section as context. """
......@@ -1243,7 +1312,7 @@ class MSection(metaclass=MObjectMeta):
def m_all_validate(self):
""" Evaluates all constraints in the whole section hierarchy, incl. this section. """
errors: List[str] = []
for section, _, _, _ in itertools.chain([(self, None, None, None)], self.m_all_contents()):
for section in itertools.chain([self], self.m_all_contents()):
for error in section.m_validate():
......@@ -1653,6 +1722,10 @@ class Section(Definition):
A helper attribute that gives all sub-section definition including inherited ones
as a dictionary that maps section classes (i.e. Python class objects) to
lists of :class:`SubSection`.
A helper attribute that gives all sub-section definitions that this section
is used in.
section_cls: Type[MSection] = None
......@@ -1673,6 +1746,7 @@ class Section(Definition):
self.all_quantities: Dict[str, Quantity] = dict()
self.all_sub_sections: Dict[str, SubSection] = dict()
self.all_sub_sections_by_section: Dict['Section', List['SubSection']] = dict()
self.parent_section_sub_section_defs: List['SubSection'] = list()
def on_add_sub_section(self, sub_section_def, sub_section):
if sub_section_def == Section.quantities:
......@@ -1685,6 +1759,9 @@ class Section(Definition):
sub_section.sub_section, []).append(sub_section)
if isinstance(sub_section, SubSection):
def on_set(self, quantity_def, value):
if quantity_def == Section.base_sections:
for base_section in value:
......@@ -1707,7 +1784,8 @@ class Section(Definition):
for def_list in [self.quantities, self.sub_sections]:
for definition in def_list:
assert not in names, 'All names in a section must be unique.'
assert not in names, 'All names in a section must be unique. ' \
'Name %s of %s in %s already exists in %s.' % (, definition, definition.m_parent, self)
......@@ -1842,3 +1920,48 @@ Section.__init_cls__()
class Environment(MSection):
""" Environments allow to manage many metainfo packages and quickly access all definitions.
Environments provide a name-table for large-sets of metainfo definitions that span
multiple packages. It provides various functions to resolve metainfo definitions by
their names, legacy names, and qualified names.
packages: Packages in this environment.
packages = SubSection(sub_section=Package, repeats=True)
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.all_definitions_by_name: Dict[str, List[Definition]] = dict()
def resolve_definitions( # type: ignore
self, name: str, cls: Type[MSectionBound] = Definition) -> List[MSectionBound]:
return [
cast(MSectionBound, definition)
for definition in self.all_definitions_by_name.get(name, [])
if isinstance(definition, cls)]
def resolve_definition( # type: ignore
self, name, cls: Type[MSectionBound] = Definition) -> MSectionBound:
defs = self.resolve_definitions(name, cls)
if len(defs) == 1:
return defs[0]
elif len(defs) > 1:
raise KeyError('Could not uniquely identify %s' % name)
raise KeyError('Could not resolve %s' % name)
def on_add_sub_section(self, sub_section_def: SubSection, sub_section: MSection):
if sub_section_def == Environment.packages:
package = sub_section.m_as(Package)
for definition in package.m_all_contents():
if isinstance(definition, Definition):
definitions = self.all_definitions_by_name.setdefault(, [])
......@@ -17,6 +17,7 @@ import numpy as np
import re
from string import ascii_uppercase
import pint.quantity
from nomad.normalizing.normalizer import SystemBasedNormalizer
from nomad.metainfo import units
......@@ -42,7 +43,7 @@ class OptimadeNormalizer(SystemBasedNormalizer):
optimade = OptimadeEntry()
def get_value(key: str, default: Any = None, numpy: bool = False) -> Any:
def get_value(key: str, default: Any = None, numpy: bool = False, unit=None) -> Any:
value = self._backend.get_value(key, index)
if type(value) == np.ndarray and not numpy:
......@@ -50,6 +51,12 @@ class OptimadeNormalizer(SystemBasedNormalizer):
if isinstance(value, list) and numpy:
return np.array(value)
if numpy and unit is not None:
if isinstance(value, pint.quantity._Quantity):
value =
value = value * unit
return value
except KeyError:
return default
......@@ -88,8 +95,8 @@ class OptimadeNormalizer(SystemBasedNormalizer):
# sites
optimade.nsites = len(nomad_species)
optimade.species_at_sites = nomad_species
optimade.lattice_vectors = (get_value('lattice_vectors', numpy=True) * units.m).to(units.angstrom).magnitude
optimade.cartesian_site_positions = (get_value('atom_positions', numpy=True) * units.m).to(units.angstrom).magnitude
optimade.lattice_vectors = get_value('lattice_vectors', numpy=True, unit=units.angstrom).magnitude
optimade.cartesian_site_positions = get_value('atom_positions', numpy=True, unit=units.angstrom).magnitude
optimade.dimension_types = [
1 if value else 0
for value in get_value('configuration_periodic_dimensions')]
This diff is collapsed.
......@@ -80,6 +80,7 @@ import os.path
from nomad import files, config
from nomad.parsing.backend import AbstractParserBackend, LocalBackend, LegacyLocalBackend, JSONStreamWriter, BadContextURI, WrongContextState
from nomad.parsing.metainfo import MetainfoBackend
from nomad.parsing.parser import Parser, LegacyParser, VaspOutcarParser, BrokenParser, MissingParser, MatchingParser
from nomad.parsing.artificial import TemplateParser, GenerateRandomParser, ChaosParser, EmptyParser
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment