diff --git a/nomad/cli/admin/admin.py b/nomad/cli/admin/admin.py index 3fa3595070e64b23426b0e85ee4a6833c154eed7..d54a502f6260dba776f8cd3aa1f7d52b956fa632 100644 --- a/nomad/cli/admin/admin.py +++ b/nomad/cli/admin/admin.py @@ -234,6 +234,9 @@ def index_materials(threads, code, dry, in_place, n, source): from nomad.search import material_document from nomad.datamodel.material import Material, Calculation, Method, Properties, IdealizedStructure, Energies, Workflow, Bulk + def include_es(quantity, section): + return hasattr(quantity, 'a_search') or hasattr(quantity, 'a_elastic') + def create_entry(material, calc, in_place): """Creates an ES update operation that inserts the full material info if entry does not exists, otherwise only adds the calculation into the @@ -245,11 +248,11 @@ def index_materials(threads, code, dry, in_place, n, source): entry['_id'] = material.material_id entry['_type'] = 'doc' entry['_source'] = { - "upsert": material.m_to_dict(include_defaults=False, partial="es"), + "upsert": material.m_to_dict(include_defaults=False, include=include_es), "doc_as_upsert": False, "script": { "params": { - "calc": calc.m_to_dict(include_defaults=False, partial="es") + "calc": calc.m_to_dict(include_defaults=False, include=include_es) }, } } diff --git a/nomad/metainfo/elasticsearch_extension.py b/nomad/metainfo/elasticsearch_extension.py index 328473991824e5a5249a401a038c3b6c9db6a733..677b8d4a913bccaef6bcd423d0d90944022ba509 100644 --- a/nomad/metainfo/elasticsearch_extension.py +++ b/nomad/metainfo/elasticsearch_extension.py @@ -186,7 +186,7 @@ class DocumentType(): return root.m_to_dict( with_meta=False, include_defaults=True, include_derived=True, resolve_references=True, - partial=lambda property_, section: property_ in self.indexed_properties) + exclude=lambda property_, section: property_ not in self.indexed_properties) def create_mapping(self, section_def: Section): ''' diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index 5ea829467c5bcbed7aed8d6969404bd6c2bc0d56..f4bebc3de736c3f13de0de206efcabd884867854 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -1187,15 +1187,25 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas include_derived: bool = False, resolve_references: bool = False, categories: List[Union['Category', Type['MCategory']]] = None, - partial: TypingCallable[['Definition', 'MSection'], bool] = None) -> Dict[str, Any]: + include: TypingCallable[['Definition', 'MSection'], bool] = None, + exclude: TypingCallable[['Definition', 'MSection'], bool] = None, + transform: TypingCallable[['Definition', 'MSection', Any], Any] = None) -> Dict[str, Any]: ''' - Returns the data of this section as a json serializeable dictionary. + Returns the data of this section as a (json serializeable) dictionary. + + With its default configuration, it is the opposite to :func:`MSection.m_from_dict`. + + There are a lot of ways to customize the behavior, e.g. to generate JSON for + databases, searchengines, etc. Arguments: with_meta: Include information about the section definition and the sections position in its parent. include_defaults: Include default values of unset quantities. include_derived: Include values of derived quantities. + resolve_references: + Treat references as the sections and values they represent. References + must not create circles; there is no check and danger of endless looping. categories: A list of category classes or category definitions that is used to filter the included quantities and sub sections. Only applied to properties of this section, not on sub-sections. Is overwritten @@ -1212,35 +1222,60 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas Partial is applied recursively on sub-sections. Overrides categories. + include: A function that determines if a property (quantity or sub-section) will + be included in the results. It takes the property definition and the current + section as arguments. The function returns true for including and false for + excluding the property. Include is applied recursively on sub-sections. + Overrides categories. + exclude: A function that determines if a property (quantity or sub-section) will + be excluded from the results. It takes the property definition and the current + section as arguments. The function returns true for excluding and false for + including the property. Exclude is applied recursively on sub-sections. + Overrides categories. + transform: A function that determines serialized quantity values. It takes the + quantity definition, current section, and the default serialized + value as arguments. Depending where this is used, you might have to ensure + that the result is JSON-serializable. By default values are serialized to + JSON according to the quantity type. ''' - # determine partial for sub-sections and partial based on categories - if partial is not None: - if partial == "es": - partial = lambda d, s: hasattr(d, "a_search") or hasattr(d, "a_search") - if partial == "mongo": - partial = lambda d, s: hasattr(d, "a_mongo") - child_partial = partial - else: + + kwargs: Dict[str, Any] = dict( + with_meta=with_meta, + include_defaults=include_defaults, + include_derived=include_derived, + resolve_references=resolve_references, + exclude=exclude, + transform=transform) + + assert not (include is not None and exclude is not None), 'You can only include or exclude, not both.' + + if include is not None: + def exclude(*args, **kwargs): # pylint: disable=function-redefined + return not include(*args, **kwargs) + + kwargs['exclude'] = exclude + + elif exclude is None: if categories is None: - partial = lambda *args, **kwargs: True - child_partial = lambda *args, **kwargs: True + def exclude(prop, section): # pylint: disable=function-redefined + return False + + kwargs['exclude'] = exclude else: - category_defs: List[Category] = None - if categories is not None: - category_defs = [] - for category in categories: - if issubclass(category, MCategory): # type: ignore - category_defs.append(category.m_def) # type: ignore - elif isinstance(category, Category): - category_defs.append(category) - else: - raise TypeError('%s is not a category' % category) + category_defs: List[Category] = [] + for category in categories: + if issubclass(category, MCategory): # type: ignore + category_defs.append(category.m_def) # type: ignore + elif isinstance(category, Category): + category_defs.append(category) + else: + raise TypeError('%s is not a category' % category) - partial = lambda definition, *args, **kwargs: any( - definition in category.get_all_definitions() - for category in category_defs) - child_partial = lambda *args, **kwargs: True + def exclude(prop, section): # pylint: disable=function-redefined + return not any( + prop in category.get_all_definitions() + for category in category_defs) def serialize_quantity(quantity, is_set, is_derived): quantity_type = quantity.type @@ -1254,12 +1289,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas if resolve_references: assert not isinstance(quantity_type, QuantityReference) value = value.m_resolved() - return value.m_to_dict( - with_meta=with_meta, - include_defaults=include_defaults, - include_derived=include_derived, - resolve_references=resolve_references, - partial=child_partial) + return value.m_to_dict(**kwargs) elif isinstance(value, MProxy): if value.m_proxy_resolved is not None: @@ -1318,16 +1348,16 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas 'Do not know how to serialize data with type %s for quantity %s' % (quantity_type, quantity)) + serialize_value = serialize + quantity_type = quantity.type if resolve_references and isinstance(quantity_type, QuantityReference): - serialize_value = serialize - - def _serialize(value: Any): + def serialize_reference(value: Any): value = getattr(value.m_resolved(), quantity_type.target_quantity_def.name) return serialize_value(value) - serialize = _serialize + serialize = serialize_reference if is_set: value = self.__dict__[quantity.name] @@ -1336,6 +1366,12 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas else: value = quantity.default + if transform is not None: + def serialize_and_transform(value: Any): + return transform(quantity, self, serialize_value(value)) + + serialize = serialize_and_transform + if isinstance(quantity_type, np.dtype): return serialize(value) elif len(quantity.shape) == 0: @@ -1356,7 +1392,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas # quantities for name, quantity in self.m_def.all_quantities.items(): - if not partial(quantity, self): + if exclude(quantity, self): continue try: @@ -1379,27 +1415,18 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas # sub sections for name, sub_section_def in self.m_def.all_sub_sections.items(): - if not partial(sub_section_def, self): + if exclude(sub_section_def, self): continue if sub_section_def.repeats: if self.m_sub_section_count(sub_section_def) > 0: yield name, [ - None if item is None else item.m_to_dict( - with_meta=with_meta, - include_defaults=include_defaults, - include_derived=include_derived, - partial=child_partial) + None if item is None else item.m_to_dict(**kwargs) for item in self.m_get_sub_sections(sub_section_def)] else: sub_section = self.m_get_sub_section(sub_section_def, -1) if sub_section is not None: - yield name, sub_section.m_to_dict( - with_meta=with_meta, - include_defaults=include_defaults, - include_derived=include_derived, - resolve_references=resolve_references, - partial=child_partial) + yield name, sub_section.m_to_dict(**kwargs) return {key: value for key, value in items()} diff --git a/tests/metainfo/test_metainfo.py b/tests/metainfo/test_metainfo.py index 8fcf168aa58fed8ae658377892a6fcefb1acc4d3..6b1ae26131f8daa17b0f6a78cac53af8b64ad818 100644 --- a/tests/metainfo/test_metainfo.py +++ b/tests/metainfo/test_metainfo.py @@ -468,36 +468,6 @@ class TestM1: assert system.atom_labels == ['H', 'H', 'O'] assert isinstance(system.atom_positions, pint.quantity._Quantity) - def test_to_dict(self, example_data): - dct = example_data.m_to_dict() - new_example_data = Run.m_from_dict(dct) - - self.assert_example_data(new_example_data) - - def test_to_dict_category_filter(self, example_data: Run): - system = example_data.systems[0] - system.system_type = 'bulk' - dct = system.m_to_dict(categories=[SystemHash]) - assert 'atom_labels' in dct - assert 'n_atoms' not in dct # derived - assert 'system_type' not in dct # not system hash - - def test_to_dict_defaults(self, example_data): - dct = example_data.m_to_dict() - assert 'nomad_version' not in dct['parsing'] - assert 'n_atoms' not in dct['systems'][0] - - dct = example_data.m_to_dict(include_defaults=True) - assert 'nomad_version' in dct['parsing'] - assert 'n_atoms' not in dct['systems'][0] - - def test_to_dict_resolve_references(self, example_data): - scc = example_data.m_create(SCC) - scc.system = example_data.systems[0] - - data = scc.m_to_dict(resolve_references=True) - assert data['system'] == example_data.systems[0].m_to_dict() - def test_derived(self): system = System() diff --git a/tests/metainfo/test_to_dict.py b/tests/metainfo/test_to_dict.py new file mode 100644 index 0000000000000000000000000000000000000000..8b20ce5e03c9a7a7331aa703de9e429e5bac8238 --- /dev/null +++ b/tests/metainfo/test_to_dict.py @@ -0,0 +1,135 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest +import numpy as np + +from nomad.metainfo import ( + MSection, MCategory, Quantity, SubSection) + +# resolve_references are tested in .test_references +# type specific serialization is tested in .test_quantities + + +class Category(MCategory): + pass + + +class Abstract(MSection): + scalar = Quantity(type=str, categories=[Category]) + many = Quantity(type=str, shape=['*']) + matrix = Quantity(type=np.dtype(np.float64), shape=['3', '3']) + + +class Child(Abstract): + pass + + +class Root(Abstract): + quantity = Quantity() + default = Quantity(type=str, default='test_value') + derived = Quantity(type=str, derived=lambda *args, **kwargs: 'test_value') + + child = SubSection(sub_section=Child.m_def, categories=[Category]) + children = SubSection(sub_section=Child.m_def, repeats=True, categories=[Category]) + + +values = dict( + scalar='test_value', + many=['test_value_1', 'test_value_2'], + matrix=[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]) + +expected_child = dict(**values) +expected_root = dict( + child=expected_child, + children=[expected_child, expected_child], + **values) + + +@pytest.fixture +def example(): + root = Root(**values) + root.m_create(Child, Root.child, **values) + for _ in range(0, 2): + root.m_create(Child, Root.children, **values) + + return root + + +def test_plain(example): + assert example.m_to_dict() == expected_root + + +def test_with_meta(example): + assert example.m_to_dict(with_meta=True) == dict( + m_def='Root', + child=dict(m_def='Child', m_parent_sub_section='child', **expected_child), + children=[ + dict(m_def='Child', m_parent_sub_section='children', m_parent_index=0, **expected_child), + dict(m_def='Child', m_parent_sub_section='children', m_parent_index=1, **expected_child)], + **values) + + +def test_include_defaults(example): + assert example.m_to_dict(include_defaults=True) == dict( + default='test_value', **expected_root) + + +def test_derived(example): + assert example.m_to_dict(include_derived=True) == dict( + derived='test_value', **expected_root) + + +@pytest.mark.parametrize('include', [True, False]) +def test_exclude_include(example, include: bool): + def filter_function(prop, section): + if isinstance(prop, Quantity) and section.m_def == Root.m_def: + return not include + + if prop == Root.children: + return not include + + return include + + if include: + kwargs = dict(include=filter_function) + else: + kwargs = dict(exclude=filter_function) + + assert example.m_to_dict(**kwargs) == dict( + child=expected_child) + + +def test_categories(example): + root = dict(**expected_root) + del(root['many']) + del(root['matrix']) + + assert example.m_to_dict(categories=[Category]) == root + + +def test_transform(example): + def transform(quantity, section, value): + if quantity == Abstract.scalar and section.m_def == Root.m_def: + return 'other_value' + + return value + + root = dict(**expected_root) + root.update(scalar='other_value') + assert example.m_to_dict(transform=transform) == root