diff --git a/gui/tests/artifacts.js b/gui/tests/artifacts.js index 6962daea15e51f88b37e5025f41b919fda5cde7d..f32f40faf3bd87e3946624cdede9acb8f41d049c 100644 --- a/gui/tests/artifacts.js +++ b/gui/tests/artifacts.js @@ -6598,8 +6598,7 @@ window.nomadArtifacts = { ], "constraints": [ "dimensions", - "has_type", - "higher_shapes_require_dtype" + "has_type" ] }, { @@ -60799,8 +60798,7 @@ window.nomadArtifacts = { ], "constraints": [ "dimensions", - "has_type", - "higher_shapes_require_dtype" + "has_type" ], "quantities": [ { diff --git a/nomad/datamodel/data.py b/nomad/datamodel/data.py index a3c149c8a82682a7c15c348dfc8d99e3cce52e62..fb35e9d3f687fbb94fb84180c5842a6dfba158de 100644 --- a/nomad/datamodel/data.py +++ b/nomad/datamodel/data.py @@ -30,7 +30,6 @@ from nomad.metainfo.metainfo import ( MCategory, MSection, Quantity, - MProxy, Capitalized, Section, Datetime, @@ -199,13 +198,24 @@ class UserReference(Reference): return {'type_kind': 'User', 'type_data': 'User'} def _normalize_impl(self, section, value): - # todo: need data validation + if isinstance(value, User): + return value + if isinstance(value, str): - return MProxy(value, m_proxy_section=section, m_proxy_type=self._proxy_type) - return value + try: + return User.get(value) + except Exception as _exc: # noqa + return value + + raise ValueError(f'Cannot normalize {value}.') def _serialize_impl(self, section, value): - return value.user_id + if isinstance(value, str): + return value + if isinstance(value, User): + return value.user_id + + raise ValueError(f'Cannot serialize {value}.') class AuthorReference(Reference): @@ -216,12 +226,23 @@ class AuthorReference(Reference): return {'type_kind': 'Author', 'type_data': 'Author'} def _normalize_impl(self, section, value): - # todo: need data validation - if isinstance(value, (str, dict)): - return MProxy(value, m_proxy_section=section, m_proxy_type=self._proxy_type) - return value + if isinstance(value, Author): + return value + + if isinstance(value, dict): + return Author.m_from_dict(value) + + if isinstance(value, str): + try: + return User.get(value) + except Exception as _exc: # noqa + return value + + raise ValueError(f'Cannot normalize {value}.') def _serialize_impl(self, section, value): + if isinstance(value, str): + return value if isinstance(value, User): return value.user_id if isinstance(value, Author): diff --git a/nomad/datamodel/datamodel.py b/nomad/datamodel/datamodel.py index 7621971c6ca412fea23cac18c98aa90adfa1548f..69b1e773fe58bce72988874d54a6c4dffa5031ec 100644 --- a/nomad/datamodel/datamodel.py +++ b/nomad/datamodel/datamodel.py @@ -40,13 +40,11 @@ from ..metainfo import ( Bytes, Package, Definition, - MProxy, MSection, MCategory, Section, SubSection, Quantity, - Reference, MEnum, Datetime, JSON, @@ -189,14 +187,19 @@ class DatasetReference(Reference): super().__init__(Dataset.m_def) def _normalize_impl(self, section, value): - # todo: need data validation + if isinstance(value, Dataset): + return value + if isinstance(value, str): - return MProxy(value, m_proxy_section=section, m_proxy_type=self._proxy_type) - return value + if (target := Dataset.m_def.a_mongo.get(dataset_id=value)) is not None: + return target + return value + + raise ValueError(f'Cannot normalize {value}.') def _serialize_impl(self, section, value): - if isinstance(value, MProxy): - return value.m_proxy_value + if isinstance(value, str): + return value return value.dataset_id diff --git a/nomad/datamodel/metainfo/annotations.py b/nomad/datamodel/metainfo/annotations.py index 60fc442795d8011ad20d53ed8eecd03dce602b37..8de3a066ff907e1ae62cd13112fca7be97cad442 100644 --- a/nomad/datamodel/metainfo/annotations.py +++ b/nomad/datamodel/metainfo/annotations.py @@ -24,7 +24,7 @@ import re from pydantic.main import BaseModel from nomad.utils import strip -from nomad.metainfo import AnnotationModel, MEnum, MTypes, Datetime, Reference, Quantity +from nomad.metainfo import AnnotationModel, MEnum, Datetime, Reference, Quantity from .plot import PlotlyError from ..data import Query from ...metainfo.data_type import Datatype diff --git a/nomad/metainfo/__init__.py b/nomad/metainfo/__init__.py index 49e83565cb7a8c53dc3b11d1bf60ba2f43366ca4..2beac67794baf7e43f3aab686dd68d5a867dba44 100644 --- a/nomad/metainfo/__init__.py +++ b/nomad/metainfo/__init__.py @@ -30,7 +30,6 @@ including JSON, (HDF5), mongodb, and elastic search. """ from .metainfo import ( - MTypes, MSectionBound, MSection, MCategory, diff --git a/nomad/metainfo/data_type.py b/nomad/metainfo/data_type.py index facd79daf6b4dfdefb3e60038c22139359c6730d..d6a05aa5f51bb8e5d6fedced0c11182a2ec36378 100644 --- a/nomad/metainfo/data_type.py +++ b/nomad/metainfo/data_type.py @@ -20,6 +20,7 @@ from __future__ import annotations import builtins import importlib import re +import typing from base64 import b64decode, b64encode from datetime import datetime, date from functools import reduce @@ -165,6 +166,19 @@ class Datatype: The given value is the actual value stored in the corresponding section. This method shall return an object that is JSON serializable. + + Optional keyword arguments: + section: the section object that the value belongs to + transform: a function that transforms the value, this function will apply to each element of the value + if the value is an array, or a nested array. + + The function shall have the following signature: + ```python + def transform(value, path): + pass + ``` + The value is the actual value, or the element in the array. + The path shall be None if the value is a scalar, or a list of indices if the value is an array. """ raise NotImplementedError() @@ -351,13 +365,24 @@ class Primitive(Datatype): """ This handles both scalar and array like values. """ + + transform: typing.Callable | None = kwargs.get('transform', None) + + def _convert(v, p=None): + if isinstance(v, list): + return [ + _convert(x, [i] if p is None else p + [i]) for i, x in enumerate(v) + ] + + return v if transform is None else transform(v, p) + if isinstance(value, np.ndarray): - return value.tolist() + return _convert(value.tolist()) if isinstance(value, np.generic): - return value.item() + return _convert(value.item()) - return value + return _convert(value) class Number(Primitive): @@ -738,11 +763,17 @@ class NonPrimitive(Datatype): Transparently return the given value. """ - def _convert(v): + transform: typing.Callable | None = kwargs.get('transform', None) + + def _convert(v, p=None): if isinstance(v, list): - return [_convert(x) for x in v] + return [ + _convert(x, [i] if p is None else p + [i]) for i, x in enumerate(v) + ] + + intermediate = self._serialize_impl(v, **kwargs) - return self._serialize_impl(v, **kwargs) + return intermediate if transform is None else transform(intermediate, p) return _convert(value) @@ -894,7 +925,7 @@ class Unit(NonPrimitive): else: raise TypeError('Units must be given as str or pint.Unit instances.') - _check_dimensionality(self._definition, unit_obj) + check_dimensionality(self._definition, unit_obj) return unit_obj @@ -1400,7 +1431,7 @@ def _normalize_complex(value, complex_type, to_unit: str | ureg.Unit | None): raise ValueError(f'Cannot convert {value} to complex number.') -def _check_dimensionality(quantity_def, unit: pint.Unit | None) -> None: +def check_dimensionality(quantity_def, unit: pint.Unit | None) -> None: if quantity_def is None or unit is None: return @@ -1422,33 +1453,5 @@ def _check_dimensionality(quantity_def, unit: pint.Unit | None) -> None: raise TypeError(f'Dimensionality {dimensionality} is not met by unit {unit}.') -def _split_python_definition(definition_with_id: str) -> tuple[list, str | None]: - """ - Split a Python type name into names and an optional ID. - - Example: - my_package.my_section ==> (['my_package', 'my_section'], None) - my_package.my_section@my_id ==> (['my_package', 'my_section'], 'my_id') - my_package/section_definitions/0 ==> (['my_package', 'section_definitions/0'], None) - """ - - def __split(name: str): - # The definition name must contain at least one dot which comes from the module name. - # The actual definition could be either a path (e.g., my_package/section_definitions/0) - # or a name (e.g., my_section). - # If it is a path (e.g., a.b.c/section_definitions/0), after splitting at '.', the last segment - # (c/section_definitions/0) contains the package name (c). It needs to be relocated. - segments: list = name.split('.') - if '/' in segments[-1]: - segments.extend(segments.pop().split('/', 1)) - return segments - - if '@' not in definition_with_id: - return __split(definition_with_id), None - - definition_names, definition_id = definition_with_id.split('@') - return __split(definition_names), definition_id - - if __name__ == '__main__': pass diff --git a/nomad/metainfo/elasticsearch_extension.py b/nomad/metainfo/elasticsearch_extension.py index 598915b9f7f2e861ba15d0cf529635a38069e5f5..5181fe56b16563008eb4aa19eabb79e1d0e932c6 100644 --- a/nomad/metainfo/elasticsearch_extension.py +++ b/nomad/metainfo/elasticsearch_extension.py @@ -285,7 +285,7 @@ class DocumentType: suggestion_value = value section_path = section.m_path()[len(root.m_path()) :] name = elasticsearch_annotation.property_name - if path: + if not isinstance(quantity.type, Datatype): suggestion_path = f'{section_path}/{path}/{name}' else: suggestion_path = f'{section_path}/{name}' diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index 92d0de6e32e9467a97b3f65cff44142f4a3b6655..39d5e63c7b1f7eb4a4f50274ee85f9d92d0f6db4 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -40,6 +40,7 @@ from typing import ( cast, ClassVar, ) +from urllib.parse import urlsplit, urlunsplit import docstring_parser import jmespath @@ -65,6 +66,7 @@ from nomad.metainfo.data_type import ( File as FileType, HDF5Reference as HDF5ReferenceType, Any as AnyType, + check_dimensionality, ) from nomad.metainfo.util import ( Annotation, @@ -72,10 +74,7 @@ from nomad.metainfo.util import ( MEnum, MQuantity, MSubSectionList, - MTypes, - ReferenceURL, SectionAnnotation, - check_dimensionality, convert_to, default_hash, dict_to_named_list, @@ -184,7 +183,7 @@ class MProxy: def __init__( self, - m_proxy_value: Union[str, int, dict], + m_proxy_value: str | int, m_proxy_section: MSection = None, m_proxy_context: Context = None, m_proxy_type: Reference = None, @@ -248,26 +247,17 @@ class MProxy: return context_section.m_resolve(fragment_with_id) def _resolve(self): - from nomad.datamodel.datamodel import Dataset, DatasetReference - from nomad.datamodel.data import UserReference, AuthorReference, User, Author - - if isinstance(self.m_proxy_type, DatasetReference): - return Dataset.m_def.a_mongo.get(dataset_id=self.m_proxy_value) - if isinstance(self.m_proxy_type, UserReference): - return User.get(user_id=self.m_proxy_value) - if isinstance(self.m_proxy_type, AuthorReference): - if isinstance(self.m_proxy_value, str): - return User.get(user_id=self.m_proxy_value) - if isinstance(self.m_proxy_value, dict): - return Author.m_from_dict(self.m_proxy_value) - - raise MetainfoReferenceError() - - url = ReferenceURL(self.m_proxy_value) + url_parts = urlsplit( + self.m_proxy_value + if '#' in self.m_proxy_value + else f'#{self.m_proxy_value}' + ) + archive_url: str = str(urlunsplit(url_parts[:4] + ('',))) + fragment = url_parts.fragment context_section = self.m_proxy_section if context_section is not None: context_section = context_section.m_root() - if url.archive_url or '@' in url.fragment: + if archive_url or '@' in fragment: context = self.m_proxy_context if context is None: context = context_section.m_context @@ -275,21 +265,19 @@ class MProxy: raise MetainfoReferenceError( 'Proxy with archive url, but no context to resolve it.' ) - if '@' in url.fragment: + if '@' in fragment: # It's a reference to a section definition - definition, definition_id = f'{url.archive_url}#{url.fragment}'.split( - '@' - ) + definition, definition_id = f'{archive_url}#{fragment}'.split('@') return context.resolve_section_definition( definition, definition_id ).m_def - context_section = context.resolve_archive_url(url.archive_url) + context_section = context.resolve_archive_url(archive_url) - if isinstance(context_section, Package) and 'definitions' in url.fragment: - url.fragment = url.fragment.replace('/definitions', '') + if isinstance(context_section, Package) and 'definitions' in fragment: + fragment = fragment.replace('/definitions', '') - return self._resolve_fragment(context_section, url.fragment) + return self._resolve_fragment(context_section, fragment) def m_proxy_resolve(self): if not self.m_proxy_resolved: @@ -448,7 +436,9 @@ class QuantityType(Datatype): if isinstance(value, Datatype): return value.serialize_self() if isinstance(value, Reference): - return value.serialize_self(kwargs.get('section')) + transform = kwargs.get('transform') + serialized = value.serialize_self(kwargs.get('section')) + return transform(serialized) if transform is not None else serialized raise MetainfoError(f'Type {value} is not a valid quantity type.') @@ -567,12 +557,19 @@ class Reference: value, ) - def serialize(self, section, value): - def _convert(_v): - if isinstance(_v, list): - return [_convert(v) for v in _v] + def serialize(self, value, *, section, transform=None): + def _convert(v, p=None): + if isinstance(v, list): + return [ + _convert(x, [i] if p is None else p + [i]) for i, x in enumerate(v) + ] + + if isinstance(v, MProxy) and v.m_proxy_resolved is None: + intermediate = v.m_serialize_proxy_value() + else: + intermediate = self._serialize_impl(section, v) - return self._serialize_impl(section, _v) + return intermediate if transform is None else transform(intermediate, p) return _convert(value) @@ -1769,171 +1766,93 @@ class MSection( ) def serialize_quantity(quantity, is_set, is_derived, path, target_value=None): - quantity_type = quantity.type - - if resolve_references and isinstance(quantity_type, QuantityReference): - quantity_type = quantity_type.target_quantity_def.type - - serialize: TypingCallable[[Any], Any] - - # define serialization functions for all valid data types - is_reference = False - if isinstance(quantity_type, Reference): - is_reference = True - - def serialize_reference(value, path_override): - if resolve_references: - assert not isinstance(quantity_type, QuantityReference) - value = value.m_resolved() - ref_kwargs = dict(kwargs) - if kwargs['transform']: - ref_kwargs['transform'] = lambda q, s, v, p: kwargs[ - 'transform' - ](q, s, v, path_override) - return value.m_to_dict(**ref_kwargs) - - type_with_def = quantity_type.attach_definition(quantity) - - if isinstance(value, MProxy): - if value.m_proxy_resolved is not None: - return type_with_def.serialize(self, value) - - return value.m_serialize_proxy_value() - - return type_with_def.serialize(self, value) - - serialize = serialize_reference - - elif isinstance(quantity_type, Datatype): - serialize = None - else: - raise MetainfoError( - f'Do not know how to serialize data with type {quantity_type} for quantity {quantity}' - ) - - quantity_type = quantity.type - if resolve_references and isinstance(quantity_type, QuantityReference): - serialize_before_reference_resolution = serialize - - def serialize_reference_v2(value: Any): - resolved = value.m_resolved() - target_name = quantity_type.target_quantity_def.name - try: - # should not use the following line alone - # to account for derived quantities - value = resolved.__dict__[target_name] - except KeyError: - # should not use the following line directly as - # it returns `pint.Quantity` for quantities with units - # here we want to get the value of the quantity stored in memory - value = getattr(resolved, target_name) - - if isinstance(quantity_type.target_quantity_def.type, Datatype): - return quantity_type.target_quantity_def.type.serialize(value) - - return serialize_before_reference_resolution(value) - - serialize = serialize_reference_v2 - # get the value to be serialized - # explicitly assigning the target value overrides the value from the section + # explicitly assigned the target value overrides the value from the section if target_value is None: if is_set: target_value = self.__dict__[quantity.name] elif is_derived: try: target_value = quantity.derived(self) - except Exception: + except Exception: # noqa target_value = quantity.default else: target_value = quantity.default - if transform is not None: - serialize_before_transform = serialize + def _transform_wrapper(_value, _stack=None): + _path = path + if _stack is not None: + _path += '/' + '/'.join(str(i) for i in _stack) + return ( + _value + if transform is None + else transform(quantity, self, _value, _path) + ) - def serialize_and_transform(value: Any, path_override=None): - if not is_reference: - return transform( - quantity, - self, - serialize_before_transform(value), - path_override, - ) + quantity_type = quantity.type - return transform( - quantity, - self, - serialize_before_transform(value, path_override), - path_override, - ) + if isinstance(quantity_type, Datatype) or not resolve_references: + return quantity_type.serialize( + target_value, section=self, transform=_transform_wrapper + ) - serialize = serialize_and_transform - - if isinstance(quantity_type, Datatype): - intermediate_value = quantity_type.serialize(target_value, section=self) - if transform is None: - return intermediate_value - if isinstance(quantity_type, Number) or len(quantity.shape) == 0: - return transform( - quantity, - self, - intermediate_value, - None, - ) + # need to resolve references + if isinstance(quantity_type, QuantityReference): + target_definition = quantity_type.target_quantity_def + target_name = target_definition.name + target_type = target_definition.type - if len(quantity.shape) == 1: - return [ - transform( - quantity, - self, - x, - None, - ) - for x in intermediate_value - ] + def _serialize_resolved(v, p=None): + if isinstance(v, list): + return [ + _serialize_resolved(x, [i] if p is None else p + [i]) + for i, x in enumerate(v) + ] - raise NotImplementedError('nOtSupporteD') + resolved_section = v.m_resolved() + try: + # should not use the following line alone + # to account for derived quantities + resolved_value = resolved_section.__dict__[target_name] + except KeyError: + # should not use the following line directly as + # it returns `pint.Quantity` for quantities with units + # here we want to get the value of the quantity stored in memory + resolved_value = getattr(resolved_section, target_name) - # serialization starts here - if len(quantity.shape) == 0: - return ( - serialize(target_value, path) - if is_reference - else serialize(target_value) - ) + return target_type.serialize( + resolved_value, + section=resolved_section, + transform=_transform_wrapper, + ) - if len(quantity.shape) == 1: - if not is_reference: - return [serialize(item) for item in target_value] + return _serialize_resolved(target_value) - return [ - serialize(item, f'{path}/{index}') - for index, item in enumerate(target_value) - ] + # other references + def _serialize_section(v, p): + if isinstance(v, list): + return [_serialize_section(x, f'{p}/{i}') for i, x in enumerate(v)] - raise NotImplementedError( - f'Higher shapes ({quantity.shape}) not supported: {quantity}' - ) + ref_kwargs = {k: v for k, v in kwargs.items() if k != 'transform'} + if transform: + + def _new_transform(_q, _s, _v, _): + return transform(_q, _s, _v, p) - def serialize_attribute(attribute: Attribute, value: Any) -> Any: - if isinstance(attribute.type, Datatype): - return attribute.type.serialize(value) + ref_kwargs['transform'] = _new_transform - if isinstance(attribute.type, Reference): - return attribute.type.attach_definition(None).serialize(self, value) + return v.m_resolved().m_to_dict(**ref_kwargs) - raise MetainfoError() + return _serialize_section(target_value, path) - def collect_attributes(attr_map: dict, all_attr: dict): + def serialize_attributes(attr_map: dict, all_attr: dict): result: dict = {} for attr_key, attr_value in attr_map.items(): attr_def = resolve_variadic_name(all_attr, attr_key) - result[attr_key] = serialize_attribute(attr_def, attr_value) + result[attr_key] = attr_def.type.serialize(attr_value, section=self) return result - def serialize_full_quantity( - quantity_def: Quantity, values: Dict[str, MQuantity] - ): + def serialize_full(quantity_def: Quantity, values: dict[str, MQuantity]): result: dict = {} for m_quantity in values.values(): m_result: dict = { @@ -1946,10 +1865,9 @@ class MSection( if m_quantity.original_unit: m_result['m_original_unit'] = str(m_quantity.original_unit) if m_quantity.attributes: - a_result: dict = collect_attributes( + if a_result := serialize_attributes( m_quantity.attributes, quantity_def.all_attributes - ) - if a_result: + ): m_result['m_attributes'] = a_result result[m_quantity.name] = m_result @@ -1958,51 +1876,52 @@ class MSection( def serialize_annotation(annotation): if isinstance(annotation, Annotation): return annotation.m_to_dict() - elif isinstance(annotation, Dict): - try: - json.dumps(annotation) - return annotation - except Exception: - return str(annotation) - else: + + if not isinstance(annotation, dict): + return str(annotation) + + try: + json.dumps(annotation) + return annotation + except Exception: # noqa return str(annotation) def items() -> Iterable[Tuple[str, Any]]: # metadata - if with_meta: + if ( + with_meta + or with_root_def + or ( + self.m_parent + and self.m_parent_sub_section.sub_section != self.m_def + ) + ): yield 'm_def', self.m_def.definition_reference(self) if with_def_id: yield 'm_def_id', self.m_def.definition_id + + if with_meta: if self.m_parent_index != -1: yield 'm_parent_index', self.m_parent_index if self.m_parent_sub_section is not None: yield 'm_parent_sub_section', self.m_parent_sub_section.name - elif with_root_def: - yield 'm_def', self.m_def.definition_reference(self) - if with_def_id: - yield 'm_def_id', self.m_def.definition_id - elif self.m_parent and self.m_parent_sub_section.sub_section != self.m_def: - # The subsection definition's section def is different from our - # own section def. We are probably a specialized derived section - # from the base section that was used in the subsection def. To allow - # clients to recognize the concrete section def, we force the export - # of the section def. - yield 'm_def', self.m_def.definition_reference(self) - if with_def_id: - yield 'm_def_id', self.m_def.definition_id - - annotations = {} - for annotation_name, annotation in self.m_annotations.items(): - if isinstance(annotation, list): - annotation_value = [ - serialize_annotation(item) for item in annotation + if len(self.m_annotations) > 0: + m_annotations: dict = { + k: [ + serialize_annotation(item) + for item in (v if isinstance(v, list) else [v]) ] - else: - annotation_value = [serialize_annotation(annotation)] - annotations[annotation_name] = annotation_value - if len(annotations) > 0: - yield 'm_annotations', annotations + for k, v in self.m_annotations.items() + } + yield 'm_annotations', m_annotations + + # section attributes + if attributes := self.__dict__.get('m_attributes', {}): + yield ( + 'm_attributes', + serialize_attributes(attributes, self.m_def.all_attributes), + ) # quantities sec_path = self.m_path() @@ -2017,55 +1936,34 @@ class MSection( yield name, serialize_quantity(quantity, False, True, path) continue - is_set = self.m_is_set(quantity) - if not is_set: - if not include_defaults or not quantity.m_is_set( - Quantity.default - ): - continue + if not (is_set := self.m_is_set(quantity)) and ( + not include_defaults or not quantity.m_is_set(Quantity.default) + ): + continue - if not quantity.use_full_storage: - yield name, serialize_quantity(quantity, is_set, False, path) + if quantity.use_full_storage: + yield name, serialize_full(quantity, self.__dict__[name]) else: - yield ( - name, - serialize_full_quantity( - quantity, self.__dict__[quantity.name] - ), - ) + yield name, serialize_quantity(quantity, is_set, False, path) except ValueError as e: raise ValueError(f'Value error ({str(e)}) for {quantity}') - # section attributes - if 'm_attributes' in self.__dict__: - yield ( - 'm_attributes', - collect_attributes( - self.__dict__['m_attributes'], self.m_def.all_attributes - ), - ) - # subsections for name, sub_section_def in self.m_def.all_sub_sections.items(): if exclude(sub_section_def, self): continue - is_set = False if sub_section_def.repeats: if self.m_sub_section_count(sub_section_def) > 0: - is_set = True subsections = self.m_get_sub_sections(sub_section_def) if subsection_as_dict: - subsection_keys: list = [ + all_keys: list = [ item.m_key for item in subsections if item and item.m_key ] - has_dup: bool = ( - 0 < len(subsection_keys) != len(set(subsection_keys)) - ) - if not has_dup: + if not (0 < len(all_keys) != len(set(all_keys))): serialised_dict: dict = {} for index, item in enumerate(subsections): if item is None: @@ -2073,92 +1971,74 @@ class MSection( item_key = item.m_key if item.m_key else index serialised_dict[item_key] = item.m_to_dict(**kwargs) yield name, serialised_dict - else: - yield ( - name, - [ - None - if item is None - else item.m_to_dict(**kwargs) - for item in subsections - ], - ) - else: - yield ( - name, - [ - None if item is None else item.m_to_dict(**kwargs) - for item in subsections - ], - ) - else: - sub_section = self.m_get_sub_section(sub_section_def, -1) - if sub_section is not None: - is_set = True - yield name, sub_section.m_to_dict(**kwargs) - - # attributes are disabled for subsections - # if is_set: - # yield from collect_attributes(sub_section_def.all_attributes) + continue + + serialised_list: list = [ + None if item is None else item.m_to_dict(**kwargs) + for item in subsections + ] + yield name, serialised_list + elif ( + sub_section := self.m_get_sub_section(sub_section_def, -1) + ) is not None: + yield name, sub_section.m_to_dict(**kwargs) return {key: value for key, value in items()} - def m_update_from_dict(self, dct: Dict[str, Any]) -> None: + def m_update_from_dict(self, data: dict) -> None: """ Updates this section with the serialized data from the given dict, e.g. data produced by :func:`m_to_dict`. """ - section_def = self.m_def - section = self m_context = self.m_context if self.m_context else self - if 'definitions' in dct: - definition_def = section_def.all_aliases['definitions'] + if 'definitions' in data: + definition_def = self.m_def.all_aliases['definitions'] definition_cls = definition_def.sub_section.section_cls definition_section = definition_cls.m_from_dict( - dct['definitions'], m_parent=self, m_context=m_context + data['definitions'], m_parent=self, m_context=m_context ) - section.m_add_sub_section(definition_def, definition_section) + self.m_add_sub_section(definition_def, definition_section) - for name, property_def in section_def.all_aliases.items(): - if name not in dct or name == 'definitions': + for name, property_def in self.m_def.all_aliases.items(): + if name not in data or name == 'definitions': continue + target_value = data.get(name) + if isinstance(property_def, SubSection): sub_section_def = property_def - sub_section_value = dct.get(name) sub_section_cls = sub_section_def.sub_section.section_cls + + def _append(value=None): + sub_section = None + if value is not None: + sub_section = sub_section_cls.m_from_dict( + value, m_parent=self, m_context=m_context + ) + self.m_add_sub_section(sub_section_def, sub_section) + if sub_section_def.repeats: for sub_section_dct in ( - sub_section_value - if isinstance(sub_section_value, list) - else sub_section_value.values() + target_value + if isinstance(target_value, list) + else target_value.values() ): - sub_section = None - if sub_section_dct is not None: - sub_section = sub_section_cls.m_from_dict( - sub_section_dct, m_parent=self, m_context=m_context - ) - section.m_add_sub_section(sub_section_def, sub_section) + _append(sub_section_dct) else: - sub_section = sub_section_cls.m_from_dict( - sub_section_value, m_parent=self, m_context=m_context - ) - section.m_add_sub_section(sub_section_def, sub_section) + _append(target_value) - if isinstance(property_def, Quantity): + elif isinstance(property_def, Quantity): quantity_def = property_def - quantity_value = dct[name] if quantity_def.virtual: - # We silently ignore this, similar to how we ignore additional values. continue if quantity_def.use_full_storage: - if not isinstance(quantity_value, dict): + if not isinstance(target_value, dict): raise MetainfoError('Full storage quantity must be a dict') - for each_name, each_quantity in quantity_value.items(): + for each_name, each_quantity in target_value.items(): m_quantity = MQuantity(each_name, each_quantity['m_value']) if 'm_unit' in each_quantity: m_quantity.unit = units.parse_units(each_quantity['m_unit']) @@ -2169,16 +2049,15 @@ class MSection( if 'm_attributes' in each_quantity: m_quantity.attributes = each_quantity['m_attributes'] - section.m_set(quantity_def, m_quantity) + self.m_set(quantity_def, m_quantity) else: # todo: setting None has different implications - section.__dict__[property_def.name] = quantity_def.type.normalize( - quantity_value, section=section + self.__dict__[property_def.name] = quantity_def.type.normalize( + target_value, section=self ) - if 'm_attributes' in dct: - for attr_key, attr_value in dct['m_attributes'].items(): - section.m_set_section_attribute(attr_key, attr_value) + for attr_key, attr_value in data.get('m_attributes', {}).items(): + self.m_set_section_attribute(attr_key, attr_value) @classmethod def m_from_dict( @@ -3234,19 +3113,6 @@ class Quantity(Property): if self.derived is not None: self.virtual = True # type: ignore - # replace the quantity implementation with an optimized version for the most - # primitive quantities if applicable - is_primitive = not self.derived and not self.use_full_storage - is_primitive = is_primitive and len(self.shape) <= 1 - is_primitive = is_primitive and self.type in [str, bool, float, int] - is_primitive = is_primitive and self.type not in MTypes.num_numpy - if is_primitive: - self._default = self.default - self._name = self.name - self._type = self.type - self._list = len(self.shape) == 1 - self.__class__ = PrimitiveQuantity - check_dimensionality(self, self.unit) def __get__(self, obj, cls): @@ -3367,13 +3233,6 @@ class Quantity(Property): f'and int ({dim_quantity.type}) typed.' ) - @constraint(warning=True) - def higher_shapes_require_dtype(self): - if len(self.shape) > 1: - assert ( - self.type in MTypes.numpy - ), f'Higher dimensional quantities ({self}) need a dtype and will be treated as numpy arrays.' - def _hash_seed(self) -> str: """ Generate a unique representation for this quantity. @@ -3459,73 +3318,6 @@ class DirectQuantity(Quantity): obj.__dict__[self._name] = ensure_complete_type(value, obj) -class PrimitiveQuantity(Quantity): - """An optimized replacement for Quantity suitable for primitive properties.""" - - def __get__(self, obj, cls): - try: - value = obj.__dict__[self._name] - except KeyError: - value = self._default - except AttributeError: - return self - if value is not None and self.unit is not None and self.type in MTypes.num: - return value * self.unit # type: ignore - return value - - def __set__(self, obj, value): - obj.m_mod_count += 1 - - if value is None: - obj.__dict__.pop(self.name, None) - return - - # Handle pint quantities. Conversion is done automatically between - # units. Notice that currently converting from float to int or vice - # versa is not allowed for primitive types. - if isinstance(value, pint.Quantity): - if self.unit is None: - if value.units.dimensionless: - value = value.magnitude - else: - raise TypeError( - f'The quantity {self} does not have a unit, but value {value} has.' - ) - elif self.type in MTypes.int: - raise TypeError( - f'Cannot save data with unit conversion into the quantity {self} ' - 'with integer data type due to possible precision loss.' - ) - else: - value = value.to(self.unit).magnitude - - if self._list: - if not isinstance(value, list): - if hasattr(value, 'tolist'): - value = value.tolist() - else: - raise TypeError( - f'The value {value} for quantity {self} has no shape {self.shape}' - ) - - if any(v is not None and type(v) is not self._type for v in value): - raise TypeError( - f'The value {value} with type {type(value)} for quantity {self} is not of type {self.type}' - ) - - elif type(value) is not self._type: - raise TypeError( - f'The value {value} with type {type(value)} for quantity {self} is not of type {self.type}' - ) - - try: - obj.__dict__[self._name] = value - except AttributeError: - raise KeyError( - 'Cannot overwrite quantity definition. Only values can be set.' - ) - - class SubSection(Property): """ Like quantities, subsections are defined in a `section class` as attributes diff --git a/nomad/metainfo/util.py b/nomad/metainfo/util.py index a511368462c5f8e9e9edb006e5f66b9aa711bc71..32db44e213af8ae86e63764d3cc75d723deee085 100644 --- a/nomad/metainfo/util.py +++ b/nomad/metainfo/util.py @@ -18,12 +18,9 @@ import hashlib import re -from dataclasses import dataclass from difflib import SequenceMatcher from typing import Any, Dict, Optional, Tuple, Union -from urllib.parse import SplitResult, urlsplit, urlunsplit -import numpy as np import pint from nomad.metainfo.data_type import Enum @@ -32,51 +29,6 @@ from nomad.units import ureg __hash_method = 'sha1' # choose from hashlib.algorithms_guaranteed -@dataclass(frozen=True) -class MTypes: - # todo: account for bytes which cannot be naturally serialized to JSON - primitive = { - str: lambda v: None if v is None else str(v), - int: lambda v: None if v is None else int(v), - float: lambda v: None if v is None else float(v), - complex: lambda v: None if v is None else complex(v), - bool: lambda v: None if v is None else bool(v), - np.bool_: lambda v: None if v is None else bool(v), - } - - primitive_name = {v.__name__: v for v in primitive} | { - 'string': str, - 'boolean': bool, - } - - int_numpy = { - np.int8, - np.int16, - np.int32, - np.int64, - np.uint8, - np.uint16, - np.uint32, - np.uint64, - } - int_python = {int} - int = int_python | int_numpy - float_numpy = {np.float16, np.float32, np.float64} - complex_numpy = {np.complex64, np.complex128} - float_python = {float} - complex_python = {complex} - float = float_python | float_numpy - complex = complex_python | complex_numpy - num_numpy = int_numpy | float_numpy | complex_numpy - num_python = int_python | float_python | complex_python - num = num_python | num_numpy - str_numpy = {np.str_} - bool_numpy = {np.bool_} - bool = {bool, np.bool_} - numpy = num_numpy | str_numpy | bool_numpy - str = {str} | str_numpy - - MEnum = Enum # type: ignore @@ -217,22 +169,6 @@ class MSubSectionList(list): self.section._on_remove_sub_section(self.sub_section_def, old_value) -@dataclass -class ReferenceURL: - fragment: str - archive_url: str - url_parts: SplitResult - - def __init__(self, url: str): - if '#' not in url: - url = f'#{url}' - - self.url_parts = urlsplit(url) - archive_url = urlunsplit(self.url_parts[0:4] + ('',)) - self.archive_url = None if archive_url is None else archive_url - self.fragment = self.url_parts.fragment - - class Annotation: """Base class for annotations.""" @@ -454,28 +390,6 @@ def split_python_definition(definition_with_id: str) -> Tuple[list, Optional[str return __split(definition_names), definition_id -def check_dimensionality(quantity_def, unit: Optional[pint.Unit]) -> None: - if quantity_def is None or unit is None: - return - - dimensionality = getattr(quantity_def, 'dimensionality', None) - - if dimensionality is None: # not set, do not validate - return - - if dimensionality in ('dimensionless', '1') and unit.dimensionless: # dimensionless - return - - if dimensionality == 'transformation': - # todo: check transformation dimensionality - return - - if ureg.Quantity(1 * unit).check(dimensionality): # dimensional - return - - raise TypeError(f'Dimensionality {dimensionality} is not met by unit {unit}') - - def dict_to_named_list(data) -> list: if not isinstance(data, dict): return data diff --git a/tests/metainfo/__init__.py b/tests/metainfo/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..6735eb66d8d60ac789ce9903604609990ece734a 100644 --- a/tests/metainfo/__init__.py +++ b/tests/metainfo/__init__.py @@ -0,0 +1,48 @@ +from dataclasses import dataclass + +import numpy as np + + +@dataclass(frozen=True) +class MTypes: + # todo: account for bytes which cannot be naturally serialized to JSON + primitive = { + str: lambda v: None if v is None else str(v), + int: lambda v: None if v is None else int(v), + float: lambda v: None if v is None else float(v), + complex: lambda v: None if v is None else complex(v), + bool: lambda v: None if v is None else bool(v), + np.bool_: lambda v: None if v is None else bool(v), + } + + primitive_name = {v.__name__: v for v in primitive} | { + 'string': str, + 'boolean': bool, + } + + int_numpy = { + np.int8, + np.int16, + np.int32, + np.int64, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + } + int_python = {int} + int = int_python | int_numpy + float_numpy = {np.float16, np.float32, np.float64} + complex_numpy = {np.complex64, np.complex128} + float_python = {float} + complex_python = {complex} + float = float_python | float_numpy + complex = complex_python | complex_numpy + num_numpy = int_numpy | float_numpy | complex_numpy + num_python = int_python | float_python | complex_python + num = num_python | num_numpy + str_numpy = {np.str_} + bool_numpy = {np.bool_} + bool = {bool, np.bool_} + numpy = num_numpy | str_numpy | bool_numpy + str = {str} | str_numpy diff --git a/tests/metainfo/test_metainfo.py b/tests/metainfo/test_metainfo.py index fb6fbc97813228a05a1a1d6388ff57705eee637f..8cdac1efb5ce63ca0bb31df6c8ed91ab20a8ea27 100644 --- a/tests/metainfo/test_metainfo.py +++ b/tests/metainfo/test_metainfo.py @@ -41,7 +41,6 @@ from nomad.metainfo.metainfo import ( Context, DefinitionAnnotation, derived, - MTypes, ) from nomad.metainfo.example import ( Run, @@ -56,6 +55,7 @@ from nomad import utils from nomad.units import ureg from tests import utils as test_utils +from tests.metainfo import MTypes def assert_section_def(section_def: Section): diff --git a/tests/metainfo/test_quantities.py b/tests/metainfo/test_quantities.py index 3e57b2386d8f05126fd0ec38ee04e47671b4595e..feed120d44403ba91164ef537ccdcc832f3bd000 100644 --- a/tests/metainfo/test_quantities.py +++ b/tests/metainfo/test_quantities.py @@ -30,12 +30,12 @@ from nomad.metainfo.metainfo import ( Dimension, JSON, MSection, - MTypes, Quantity, URL, Unit, units, ) +from tests.metainfo import MTypes @pytest.mark.parametrize( diff --git a/tests/metainfo/test_references.py b/tests/metainfo/test_references.py index e050719d9aeddd8518485139c9ac8e8a5f829d94..c9f20fcc985f1ecdc725f3abbe401a6e92fc1e53 100644 --- a/tests/metainfo/test_references.py +++ b/tests/metainfo/test_references.py @@ -420,16 +420,6 @@ def test_user_author(def_type, value, expected_name): # test assignment section.quantity = value - quantity = section.quantity - resolved_quantity = quantity.m_resolved() - - assert quantity.m_proxy_value == value - assert ( - quantity.m_proxy_type.target_section_def.name - == def_type().target_section_def.name - ) - assert quantity.m_proxy_section == section - assert resolved_quantity.name == expected_name # test serialization serialized_section = section.m_to_dict() @@ -438,11 +428,3 @@ def test_user_author(def_type, value, expected_name): # test deserialization deserialized_section = UserAuthorSection().m_from_dict(serialized_section) deserialized_quantity = deserialized_section.quantity - resolved_deserialized_quantity = deserialized_quantity.m_resolved() - - assert deserialized_quantity.m_proxy_value == value - assert ( - deserialized_quantity.m_proxy_type.target_section_def.name - == def_type().target_section_def.name - ) - assert resolved_deserialized_quantity.name == expected_name diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index 0a674c08b5ca37e4d96b1c9c2f889de8b341f200..408ac84a21defca93ce0ca011e28e809310006c0 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -1160,7 +1160,7 @@ def test_read_metadata_from_file(proc_infra, user1, user2, tmp): assert entry_metadata.comment == comment[i] assert entry_metadata.references == references[i] assert entry_metadata.external_id == external_ids[i] - coauthors = [a.m_proxy_resolve() for a in entry_metadata.coauthors] + coauthors = entry_metadata.coauthors assert len(coauthors) == len(expected_coauthors) for j in range(len(coauthors)): assert coauthors[j].user_id == expected_coauthors[j].user_id