diff --git a/gui/tests/artifacts.js b/gui/tests/artifacts.js index b142bacc40731b5aff2cef5dd0c389ac4d792fbf..2860bc5fc74917192b7b1d82707eb95958f6fca9 100644 --- a/gui/tests/artifacts.js +++ b/gui/tests/artifacts.js @@ -71691,6 +71691,17 @@ window.nomadArtifacts = { "type_data": "bool" }, "default": false + }, + { + "m_def": "nomad.metainfo.metainfo.Quantity", + "m_parent_index": 2, + "m_parent_sub_section": "quantities", + "name": "key_quantity", + "type": { + "type_kind": "python", + "type_data": "str" + }, + "default": null } ] }, diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index 3962b36cb798e1083753f434776fdf310894d44b..a7a61edfca9412f1bd8473bc112c95e35fbd7174 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -16,6 +16,7 @@ # limitations under the License. # from __future__ import annotations + import base64 from copy import deepcopy import importlib @@ -26,14 +27,15 @@ import re import sys from collections.abc import Iterable as IterableABC from functools import reduce -from pydantic import parse_obj_as, ValidationError, BaseModel, Field from typing import ( Any, Callable as TypingCallable, Dict, Iterable, List, Optional, Set, Tuple, Type, TypeVar, Union, cast, ClassVar) + import docstring_parser import jmespath import numpy as np import pandas as pd import pint +from pydantic import parse_obj_as, ValidationError, BaseModel, Field from nomad.config import process from nomad.metainfo.util import ( @@ -915,8 +917,8 @@ class Context: pass def create_reference( - self, section: MSection, quantity_def: Quantity, value: MSection, - global_reference: bool = False + self, section: MSection, quantity_def: Quantity, value: MSection, + global_reference: bool = False ) -> str: ''' Returns a reference for the given target section (value) based on the given context. @@ -1240,11 +1242,35 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas return super().__setattr__(name, value) + @property + def m_key(self): + if (subsection := self.m_parent_sub_section) is None: + return None + + if subsection.key_quantity is not None: + key_quantity = subsection.key_quantity + elif (label := subsection.more.get('label_quantity')) is not None: + key_quantity = label + else: + return None + + quantity_def = self.m_def.all_quantities.get(key_quantity) + if quantity_def.type != str: + raise TypeError(f'Key quantity {key_quantity} must be of type str.') + + if self.m_is_set(quantity_def): + return self.m_get(quantity_def) + + return None + def __getattr__(self, name): # The existence of __getattr__ will make mypy and pylint ignore 'missing' dynamic # attributes and functions and wrong types of those. # Ideally we have a plugin for both that add the correct type info + if name == 'm_key': + return self.get(name) + if name in self.m_def.all_aliases: return getattr(self, self.m_def.all_aliases[name].name) @@ -1842,7 +1868,9 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas categories: List[Union[Category, Type['MCategory']]] = None, include: TypingCallable[[Definition, MSection], bool] = None, exclude: TypingCallable[[Definition, MSection], bool] = None, - transform: TypingCallable[[Definition, MSection, Any, str], Any] = None) -> Dict[str, Any]: + transform: TypingCallable[[Definition, MSection, Any, str], Any] = None, + subsection_as_dict: bool = False, + ) -> Dict[str, Any]: ''' Returns the data of this section as a (json serializable) dictionary. @@ -1889,6 +1917,8 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas might have to ensure that the result is JSON-serializable. By default, values are serialized to JSON according to the quantity type. + subsection_as_dict: If true, try to serialize subsections as dictionaries. + Only possible when the keys are unique. Otherwise, serialize as list. ''' if isinstance(self, Definition) and not with_out_meta: with_meta = True @@ -1899,7 +1929,9 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas include_derived=include_derived, resolve_references=resolve_references, exclude=exclude, - transform=transform) + transform=transform, + subsection_as_dict=subsection_as_dict + ) assert not (include is not None and exclude is not None), 'You can only include or exclude, not both.' @@ -2197,9 +2229,19 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas if sub_section_def.repeats: if self.m_sub_section_count(sub_section_def) > 0: is_set = True - yield name, [ - None if item is None else item.m_to_dict(**kwargs) - for item in self.m_get_sub_sections(sub_section_def)] + subsections = self.m_get_sub_sections(sub_section_def) + subsection_keys: list = [item.m_key for item in subsections if item and item.m_key] + has_dup: bool = 0 < len(subsection_keys) != len(set(subsection_keys)) + if not has_dup and subsection_as_dict: + serialised_dict: dict = {} + for index, item in enumerate(subsections): + if item is None: + continue + item_key = item.m_key if item.m_key else index + serialised_dict[item_key] = item.m_to_dict(**kwargs) + yield name, serialised_dict + else: + yield name, [None if item is None else item.m_to_dict(**kwargs) for item in subsections] else: sub_section = self.m_get_sub_section(sub_section_def, -1) if sub_section is not None: @@ -2265,9 +2307,12 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas sub_section_value = dct.get(name) sub_section_cls = sub_section_def.sub_section.section_cls if sub_section_def.repeats: - for sub_section_dct in sub_section_value: - sub_section = None if sub_section_dct is None else sub_section_cls.m_from_dict( - sub_section_dct, m_parent=self, m_context=m_context) + for sub_section_dct in sub_section_value if isinstance( + sub_section_value, list) else sub_section_value.values(): + sub_section = None + if sub_section_dct is not None: + sub_section = sub_section_cls.m_from_dict( + sub_section_dct, m_parent=self, m_context=m_context) section.m_add_sub_section(sub_section_def, sub_section) else: sub_section = sub_section_cls.m_from_dict( @@ -3542,6 +3587,7 @@ class SubSection(Property): sub_section: Quantity = _placeholder_quantity repeats: Quantity = _placeholder_quantity + key_quantity: Quantity = _placeholder_quantity def __get__(self, obj, type=None): # the class attribute case @@ -4339,6 +4385,7 @@ Section.has_variable_names = has_variable_names Section.path = section_path SubSection.repeats = Quantity(type=bool, name='repeats', default=False) +SubSection.key_quantity = Quantity(type=str, name='key_quantity', default=None) SubSection.sub_section = Quantity( type=SectionReference, name='sub_section', diff --git a/nomad/metainfo/util.py b/nomad/metainfo/util.py index 9be07a7bbd053c078d11062ec1cfc62f9cbc15f6..3952c512b1abde895d6db854326f1790a99d07cb 100644 --- a/nomad/metainfo/util.py +++ b/nomad/metainfo/util.py @@ -343,6 +343,15 @@ class MSubSectionList(list): def __setitem__(self, key, value): raise NotImplementedError('You can only append subsections.') + def __getitem__(self, item): + if isinstance(item, str): + for sub_section in self: + if sub_section.m_key == item: + return sub_section + raise KeyError(f'No subsection keyed {item} found.') + + return super().__getitem__(item) + def __delitem__(self, key): old_value = self[key] list.__delitem__(self, key) diff --git a/tests/metainfo/test_metainfo.py b/tests/metainfo/test_metainfo.py index 9a610b26bc461224852899e2830c66e3556eba31..bd0035aac4d7582abe7fb6bfbf24cd87dbcf171b 100644 --- a/tests/metainfo/test_metainfo.py +++ b/tests/metainfo/test_metainfo.py @@ -859,3 +859,49 @@ class TestEnvironment: sub_section_system = env.resolve_definition('systems', SubSection) assert sub_section_system.m_def == SubSection.m_def assert sub_section_system.name == 'systems' + + +@pytest.mark.parametrize('as_dict', [True, False]) +@pytest.mark.parametrize('add_key', [True, False]) +@pytest.mark.parametrize('str_type', [True, False]) +@pytest.mark.parametrize('dup_key', [True, False]) +def test_serialise_as_dict(as_dict, add_key, str_type, dup_key): + class TestSection(MSection): + q = Quantity(type=str if str_type else int) + + class TestContainer(MSection): + s = SubSection(sub_section=TestSection, repeats=True, key_quantity='q' if add_key else None) + + container = TestContainer() + + def __key(_i): + if str_type: + return 'abc' if dup_key else f'abc{_i}' + + return 0 if dup_key else _i + + for i in range(3): + section = TestSection(q=__key(i)) + container.m_add_sub_section(TestContainer.s, section) + + kwarg = {'subsection_as_dict': as_dict} + if not str_type and add_key: + with pytest.raises(TypeError): + for i in range(3): + _ = container.s[i].m_key + else: + for i in range(3): + assert container.s[i].q == __key(i) + if add_key and not dup_key: + assert container.s[f'abc{i}'].q == f'abc{i}' + else: + with pytest.raises(KeyError): + _ = container.s[f'abc{i}'].q + + json_dict = container.m_to_dict(**kwarg) + + if not as_dict or add_key and dup_key: + assert isinstance(json_dict['s'], list) + else: + assert isinstance(json_dict['s'], dict) + assert json_dict == TestContainer.m_from_dict(json_dict).m_to_dict(**kwarg)