From 2130cfcd7eff81b3dda692ea430745b668c718bf Mon Sep 17 00:00:00 2001 From: Markus Scheidgen <markus@dhcp-45-58.physik.hu-berlin.de> Date: Fri, 4 Oct 2019 10:53:25 +0200 Subject: [PATCH] Added basic reference serialization and resolution. --- nomad/metainfo/example.py | 20 ++++- nomad/metainfo/metainfo.py | 160 +++++++++++++++++++++++++++++-------- tests/test_metainfo.py | 13 ++- 3 files changed, 157 insertions(+), 36 deletions(-) diff --git a/nomad/metainfo/example.py b/nomad/metainfo/example.py index 25d66830e2..e399e541be 100644 --- a/nomad/metainfo/example.py +++ b/nomad/metainfo/example.py @@ -46,14 +46,22 @@ class System(MSection): description='A vector of booleans indicating in which dimensions the unit cell is repeated.') +class SCC(MSection): + + energy_total = Quantity(type=float, default=0.0, unit=units.J) + + system = Quantity(type=System.m_def, description='The system that this calculation is based on.') + + class Run(MSection): """ All data that belongs to a single code run. """ code_name = Quantity(type=str, description='The name of the code that was run.') code_version = Quantity(type=str, description='The version of the code that was run.') - systems = SubSection(sub_section=System.m_def, repeats=True) parsing = SubSection(sub_section=Parsing.m_def) + systems = SubSection(sub_section=System.m_def, repeats=True) + sccs = SubSection(sub_section=SCC.m_def, repeats=True) class VaspRun(Run): @@ -92,11 +100,21 @@ if __name__ == '__main__': system = run.m_create(System) system.atom_labels = ['H', 'H', 'O'] + calc = run.m_create(SCC) + calc.energy_total = 1.23e-10 + calc.system = system + # Or to read data from existing metainfo data: print(system.atom_labels) print(system.n_atoms) # To serialize the data: + serializable = run.m_to_dict() + # or print(run.m_to_json(indent=2)) + # To deserialize data + run = Run.m_from_dict(serializable) + print(run.sccs[0].system) + # print(m_package.m_to_json(indent=2)) # type: ignore, pylint: disable=undefined-variable diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index 691f7ba128..1897225df7 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -172,6 +172,13 @@ class Enum(list): pass +class MProxy(): + """ A placeholder object that acts as reference to a value that is not yet resolved. """ + + def __init__(self, url: str): + self.url = url + + class DataType: """ Allows to define custom data types that can be used in the meta-info. @@ -219,6 +226,7 @@ class Reference(DataType): is actually a reference (or references, depending on shape) to a section of the given definition. """ + # TODO not used yet def __init__(self, section: 'Section'): self.section = section @@ -464,6 +472,9 @@ class MDataTypeAndShapeChecks(MDataDelegating): return quantity_def.type.type_check(section, value) elif isinstance(quantity_def.type, Section): + if isinstance(value, MProxy): + return value + if not isinstance(value, MSection): raise TypeError( 'The value %s for reference quantity %s is not a section instance.' % @@ -705,7 +716,14 @@ class MSection(metaclass=MObjectMeta): except Exception as e: raise DeriveError('Could not derive value for %s: %s' % (quantity_def, str(e))) - return self.m_data.m_get(self, quantity_def) + value = self.m_data.m_get(self, quantity_def) + + if isinstance(quantity_def.type, Section): + if isinstance(value, MProxy): + value = self.m_resolve(value.url) + self.m_data.m_set(self, quantity_def, value) + + return value def m_is_set(self, quantity_def: 'Quantity') -> bool: quantity_def = self.__resolve_synonym(quantity_def) @@ -750,32 +768,36 @@ class MSection(metaclass=MObjectMeta): section_def = section_cls.m_def sub_section_def = self.m_def.all_sub_sections_by_section.get(section_def, None) if sub_section_def is None: - raise TypeError('There is not sub section for %s in %s.' % (section_def, self)) + raise TypeError('There is no sub section to hold a %s in %s.' % (section_def, self.m_def)) sub_section = section_cls(**kwargs) self.m_add_sub_section(sub_section_def, sub_section) return cast(MSectionBound, sub_section) - def m_update(self, **kwargs): + def m_update(self, safe: bool = True, **kwargs): """ Updates all quantities and sub-sections with the given arguments. """ - for name, value in kwargs.items(): - prop = self.m_def.all_properties.get(name, None) - if prop is None: - raise KeyError('%s is not an attribute of this section %s' % (name, self)) - - if isinstance(prop, SubSection): - if prop.repeats: - if isinstance(value, List): - for item in value: - self.m_add_sub_section(prop, item) + if safe: + for name, value in kwargs.items(): + prop = self.m_def.all_properties.get(name, None) + if prop is None: + raise KeyError('%s is not an attribute of this section %s' % (name, self)) + + if isinstance(prop, SubSection): + if prop.repeats: + if isinstance(value, List): + for item in value: + self.m_add_sub_section(prop, item) + else: + raise TypeError('Sub section %s repeats, but no list was given' % prop.name) else: - raise TypeError('Sub section %s repeats, but no list was given' % prop.name) + self.m_add_sub_section(prop, item) + else: - self.m_add_sub_section(prop, item) + self.m_set(prop, value) - else: - self.m_set(prop, value) + else: + self.m_data.m_data.dct.update(**kwargs) # type: ignore def m_as(self, section_cls: Type[MSectionBound]) -> MSectionBound: """ 'Casts' this section to the given extending sections. """ @@ -806,19 +828,15 @@ class MSection(metaclass=MObjectMeta): for name, quantity in self.m_def.all_quantities.items(): if self.m_is_set(quantity) and quantity.derived is None: - to_json_serializable: Callable[[Any], Any] = str + serialize: Callable[[Any], Any] = str if isinstance(quantity.type, DataType): - - def serialize(v): - quantity.type.to_json_serializable(self, v) - - to_json_serializable = serialize + serialize = lambda v: quantity.type.to_json_serializable(self, v) elif isinstance(quantity.type, Section): - # TODO - to_json_serializable = str + serialize = lambda s: s.m_path() + elif quantity.type in [str, int, float, bool]: - to_json_serializable = quantity.type + serialize = quantity.type else: # TODO @@ -831,9 +849,9 @@ class MSection(metaclass=MObjectMeta): else: if len(quantity.shape) == 0: - serializable_value = to_json_serializable(value) + serializable_value = serialize(value) elif len(quantity.shape) == 1: - serializable_value = [to_json_serializable(i) for i in value] + serializable_value = [serialize(i) for i in value] else: raise NotImplementedError('Higher shapes (%s) not supported: %s' % (quantity.shape, quantity)) @@ -845,7 +863,7 @@ class MSection(metaclass=MObjectMeta): def m_from_dict(cls: Type[MSectionBound], dct: Dict[str, Any]) -> MSectionBound: """ Creates a section from the given data dictionary. - This is the 'oposite' of :func:`m_to_dict`. It takes a deserialized dict, e.g + This is the 'opposite' of :func:`m_to_dict`. It takes a deserialised dict, e.g loaded from JSON, and turns it into a proper section, i.e. instance of the given section class. """ @@ -867,8 +885,14 @@ class MSection(metaclass=MObjectMeta): else: yield name, sub_section_def.sub_section.section_cls.m_from_dict(sub_section_value) - for key, value in dct.items(): - yield key, value + for name, quantity_def in section_def.all_quantities.items(): + if name in dct: + quantity_value = dct[name] + + if isinstance(quantity_def.type, Section): + quantity_value = MProxy(quantity_value) + + yield name, quantity_value dct = {key: value for key, value in items()} section_instance = cast(MSectionBound, section_def.section_cls()) @@ -900,6 +924,78 @@ class MSection(metaclass=MObjectMeta): sub_section = self.m_get_sub_section(sub_section_def, -1) yield sub_section, -1, sub_section_def, self + def m_path(self, quantity_def: 'Quantity' = None) -> str: + """ Returns the path of this section or the given quantity within the section hierarchy. """ + if self.m_parent is None: + return '/' + + if self.m_parent_index == -1: + segment = self.m_parent_sub_section.name + else: + segment = '%s/%d' % (self.m_parent_sub_section.name, self.m_parent_index) + + if quantity_def is not None: + segment = '%s/%s' % (segment, quantity_def.name) + + return '%s/%s' % (self.m_parent.m_path().rstrip('/'), segment) + + def m_root(self, cls: Type[MSectionBound] = None) -> MSectionBound: + if self.m_parent is None: + return cast(MSectionBound, self) + else: + return self.m_parent.m_root(cls) + + def m_resolve(self, path: str, cls: Type[MSectionBound] = None) -> MSectionBound: + """ Resolves the given path using this section as context. """ + + if path.startswith('/'): + context: 'MSection' = self.m_root() + else: + context = self + + path_stack = path.strip('/').split('/') + path_stack.reverse() + while len(path_stack) > 1: + prop_name = path_stack.pop() + prop_def = context.m_def.all_properties.get(prop_name, None) + + if prop_def is None: + raise ReferenceError( + 'Could not resolve %s, property %s does not exist in %s' % + (path, prop_name, context.m_def)) + + if isinstance(prop_def, SubSection): + if prop_def.repeats: + try: + index = int(path_stack.pop()) + except ValueError: + raise ReferenceError( + 'Could not resolve %s, %s repeats but there is no index in the path' % + (path, prop_name)) + + try: + context = context.m_get_sub_section(prop_def, index) + except Exception: + raise ReferenceError( + 'Could not resolve %s, there is no sub section for %s at %d' % + (path, prop_name, index)) + + else: + context = context.m_get_sub_section(prop_def, -1) + if context is None: + raise ReferenceError( + 'Could not resolve %s, there is no sub section for %s' % + (path, prop_name)) + + elif isinstance(prop_def, Quantity): + if len(path_stack) > 0: + raise ReferenceError( + 'Could not resolve %s, %s is not a sub section' % (path, prop_name)) + + return context.m_get(prop_def) + + return cast(MSectionBound, context) + def __repr__(self): m_section_name = self.m_def.name # name_quantity_def = self.m_def.all_quantities.get('name', None) @@ -1288,7 +1384,7 @@ SubSection.sub_section = Quantity( ''') Quantity.m_def.section_cls = Quantity -Quantity.type = Quantity( +Quantity.type = DirectQuantity( type=Union[type, Enum, Section, np.dtype], name='type', description=''' The type of the quantity. diff --git a/tests/test_metainfo.py b/tests/test_metainfo.py index 6a4f8d1c55..91fbba79c3 100644 --- a/tests/test_metainfo.py +++ b/tests/test_metainfo.py @@ -107,13 +107,13 @@ class TestM2: assert Run.m_def.all_quantities['code_name'] == Run.__dict__['code_name'] def test_sub_sections(self): - assert len(Run.m_def.sub_sections) == 2 + assert len(Run.m_def.sub_sections) == 3 assert Run.m_def.all_sub_sections['systems'] in Run.m_def.sub_sections assert Run.m_def.all_sub_sections['systems'].sub_section == System.m_def assert Run.m_def.all_sub_sections_by_section[System.m_def].sub_section == System.m_def def test_properties(self): - assert len(Run.m_def.all_properties) == 5 + assert len(Run.m_def.all_properties) == 6 def test_get_quantity_def(self): assert System.n_atoms == System.m_def.all_properties['n_atoms'] @@ -141,7 +141,7 @@ class TestM2: def test_package(self): assert example_package.name == 'nomad.metainfo.example' assert example_package.description == 'An example metainfo package.' - assert example_package.m_sub_section_count(Package.section_definitions) == 4 + assert example_package.m_sub_section_count(Package.section_definitions) == 5 assert example_package.m_sub_section_count(Package.category_definitions) == 1 def test_base_sections(self): @@ -302,3 +302,10 @@ class TestM1: run = Run() run.m_as(VaspRun).x_vasp_raw_format = 'outcar' assert run.m_as(VaspRun).x_vasp_raw_format == 'outcar' + + def test_resolve(self): + run = Run() + system = run.m_create(System) + + assert run.m_resolve('/systems/0') == system + assert system.m_resolve('/systems/0') == system -- GitLab