diff --git a/nomad/metainfo/elasticsearch_extension.py b/nomad/metainfo/elasticsearch_extension.py index 53e37e650a27514c1bc7e2df250eae5d9b72ae08..328473991824e5a5249a401a038c3b6c9db6a733 100644 --- a/nomad/metainfo/elasticsearch_extension.py +++ b/nomad/metainfo/elasticsearch_extension.py @@ -59,13 +59,13 @@ Here is a small metainfo example: type=Datetime, a_elasticsearch=Elasticsearch()) - results = SubSection(sub_section=Results.m_def) + results = SubSection(sub_section=Results.m_def, a_elasticsearch=Elasticsearch()) class Results(MSection): - material = SubSection(sub_section=Material.m_def) - properties = SubSection(sub_section=Properties.m_def) + material = SubSection(sub_section=Material.m_def, a_elasticsearch=Elasticsearch()) + properties = SubSection(sub_section=Properties.m_def, a_elasticsearch=Elasticsearch()) class Material(MSection): @@ -143,6 +143,8 @@ required from the metainfo are: - the ``results.material`` sub-section has a ``material_id`` - the ``results.material`` sub-section has no property called ``entries`` +This extension resolves references during indexing and basically treats referenced +sub-sections as if they were direct sub-sections. .. autofunction:: index_entry .. autofunction:: index_entries @@ -162,7 +164,7 @@ from nomad import config from .metainfo import ( Section, Quantity, MSection, MEnum, Datetime, Reference, DefinitionAnnotation, - Definition) + Definition, MetainfoError, QuantityReference) class DocumentType(): @@ -183,6 +185,7 @@ class DocumentType(): ''' return root.m_to_dict( with_meta=False, include_defaults=True, include_derived=True, + resolve_references=True, partial=lambda property_, section: property_ in self.indexed_properties) def create_mapping(self, section_def: Section): @@ -193,23 +196,36 @@ class DocumentType(): ''' mappings: Dict[str, Any] = {} - for quanity_def in section_def.all_quantities.values(): - elasticsearch_annotations = quanity_def.m_get_annotations(Elasticsearch, as_list=True) + for quantity_def in section_def.all_quantities.values(): + elasticsearch_annotations = quantity_def.m_get_annotations(Elasticsearch, as_list=True) for elasticsearch_annotation in elasticsearch_annotations: - if self != entry_type and elasticsearch_annotation.doc_type != self: + is_section_reference = isinstance(quantity_def.type, Reference) and not isinstance(quantity_def.type, QuantityReference) + if not is_section_reference and self != entry_type and elasticsearch_annotation.doc_type != self: continue - mapping = mappings.setdefault(elasticsearch_annotation.property_name, {}) - fields = elasticsearch_annotation.fields - if len(fields) > 0: - mapping.setdefault('fields', {}).update(**fields) + if is_section_reference: + # Treat referenced sections as sub-sections + assert quantity_def.type.target_section_def is not None + assert quantity_def.is_scalar + reference_mapping = self.create_mapping(cast(Section, quantity_def.type.target_section_def)) + if len(reference_mapping['properties']) > 0: + mappings[quantity_def.name] = reference_mapping else: - mapping.update(**elasticsearch_annotation.mapping) + mapping = mappings.setdefault(elasticsearch_annotation.property_name, {}) + fields = elasticsearch_annotation.fields + if len(fields) > 0: + mapping.setdefault('fields', {}).update(**fields) - self.indexed_properties.add(quanity_def) + else: + mapping.update(**elasticsearch_annotation.mapping) + + self.indexed_properties.add(quantity_def) for sub_section_def in section_def.all_sub_sections.values(): + if sub_section_def.m_get_annotations(Elasticsearch) is None: + continue + assert not sub_section_def.repeats, 'elasticsearch fields in repeating sub sections are not supported' sub_section_mapping = self.create_mapping(sub_section_def.sub_section) if len(sub_section_mapping['properties']) > 0: @@ -313,13 +329,7 @@ class Elasticsearch(DefinitionAnnotation): self._field = field self.doc_type = doc_type - @property - def mapping(self): - if self._mapping is not None: - return self._mapping - - quantity = cast(Quantity, self.definition) - + def _compute_mapping(self, quantity: Quantity): if quantity.type == str: return dict(type='keyword') elif quantity.type in [float, np.float64] and quantity.is_scalar: @@ -334,14 +344,23 @@ class Elasticsearch(DefinitionAnnotation): return dict(type='boolean') elif quantity.type == Datetime: return dict(type='date') + elif isinstance(quantity.type, QuantityReference): + return self._compute_mapping(quantity.type.target_quantity_def) elif isinstance(quantity.type, Reference): - raise NotImplementedError('References are not yet implemented') + raise MetainfoError('References cannot be indexed.') elif isinstance(quantity.type, MEnum): return dict(type='keyword') else: raise NotImplementedError( 'Quantity type %s for quantity %s is not supported.' % (quantity.type, quantity)) + @property + def mapping(self): + if self._mapping is not None: + return self._mapping + + return self._compute_mapping(cast(Quantity, self.definition)) + @property def fields(self): if self._field is None: diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index 2ca048a7693481583ee7e485de05def0f61d41c1..5ea829467c5bcbed7aed8d6969404bd6c2bc0d56 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -1185,6 +1185,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas self, with_meta: bool = False, include_defaults: bool = False, include_derived: bool = False, + resolve_references: bool = False, categories: List[Union['Category', Type['MCategory']]] = None, partial: TypingCallable[['Definition', 'MSection'], bool] = None) -> Dict[str, Any]: ''' @@ -1245,10 +1246,22 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas quantity_type = quantity.type serialize: TypingCallable[[Any], Any] = str - if isinstance(quantity_type, Reference): + if resolve_references and isinstance(quantity_type, QuantityReference): + quantity_type = quantity_type.target_quantity_def.type + if isinstance(quantity_type, Reference): def reference_serialize(value): - if isinstance(value, MProxy): + if resolve_references: + assert not isinstance(quantity_type, QuantityReference) + value = value.m_resolved() + return value.m_to_dict( + with_meta=with_meta, + include_defaults=include_defaults, + include_derived=include_derived, + resolve_references=resolve_references, + partial=child_partial) + + elif isinstance(value, MProxy): if value.m_proxy_resolved is not None: return quantity_type.serialize(self, quantity, value) else: @@ -1305,6 +1318,17 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas 'Do not know how to serialize data with type %s for quantity %s' % (quantity_type, quantity)) + quantity_type = quantity.type + if resolve_references and isinstance(quantity_type, QuantityReference): + serialize_value = serialize + + def _serialize(value: Any): + value = getattr(value.m_resolved(), quantity_type.target_quantity_def.name) + + return serialize_value(value) + + serialize = _serialize + if is_set: value = self.__dict__[quantity.name] elif is_derived: @@ -1374,6 +1398,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas with_meta=with_meta, include_defaults=include_defaults, include_derived=include_derived, + resolve_references=resolve_references, partial=child_partial) return {key: value for key, value in items()} diff --git a/tests/metainfo/test_elasticsearch_extension.py b/tests/metainfo/test_elasticsearch_extension.py index 16b6b4a5ab926d1fb1f7ca8dd624f0bb413a609f..3642d2aba95035088390706660160fd10d2d782b 100644 --- a/tests/metainfo/test_elasticsearch_extension.py +++ b/tests/metainfo/test_elasticsearch_extension.py @@ -18,6 +18,7 @@ from typing import List import pytest +import numpy as np from nomad import config from nomad.metainfo import MSection, Quantity, SubSection, Datetime @@ -45,6 +46,21 @@ class Material(MSection): a_elasticsearch=(Elasticsearch(material_type))) +class Data(MSection): + n_points = Quantity( + type=int, + derived=lambda data: len(data.points[0]) if data.points is not None else 0, + a_elasticseach=Elasticsearch(material_entry_type)) + + points = Quantity(type=np.dtype(np.float64), shape=['*', '*']) + + n_series = Quantity( + type=int, + derived=lambda data: len(data.series) if data.series is not None else 0) + + series = Quantity(type=np.dtype(np.float64), shape=['*', '*']) + + class Properties(MSection): available_properties = Quantity( @@ -55,11 +71,15 @@ class Properties(MSection): type=float, unit='J', a_elasticsearch=Elasticsearch(material_entry_type)) + data = Quantity(type=Data, a_elasticsearch=Elasticsearch()) + + n_series = Quantity(type=Data.n_series, a_elasticsearch=Elasticsearch()) + class Results(MSection): - material = SubSection(sub_section=Material.m_def) - properties = SubSection(sub_section=Properties.m_def) + material = SubSection(sub_section=Material.m_def, a_eleasticsearch=Elasticsearch()) + properties = SubSection(sub_section=Properties.m_def, a_eleasticsearch=Elasticsearch()) class Entry(MSection): @@ -72,7 +92,8 @@ class Entry(MSection): type=Datetime, a_elasticsearch=Elasticsearch()) - results = SubSection(sub_section=Results.m_def) + results = SubSection(sub_section=Results.m_def, a_eleasticsearch=Elasticsearch()) + data = SubSection(sub_section=Data.m_def) def assert_mapping(mapping: dict, path: str, es_type: str, field: str = None): @@ -181,6 +202,8 @@ def test_mappings(indices): assert_mapping(entry_mapping, 'results.material.formula', 'text', 'text') assert_mapping(entry_mapping, 'results.properties.available_properties', 'keyword') assert_mapping(entry_mapping, 'results.properties.band_gap', 'double') + assert_mapping(entry_mapping, 'results.properties.data.n_points', 'integer') + assert_mapping(entry_mapping, 'results.properties.n_series', 'integer') assert_mapping(material_mapping, 'material_id', 'keyword') assert_mapping(material_mapping, 'formula', 'keyword') @@ -189,6 +212,55 @@ def test_mappings(indices): assert_mapping(material_mapping, 'entries.entry_id', 'keyword') assert_mapping(material_mapping, 'entries.upload_time', None) assert_mapping(material_mapping, 'entries.results.properties.available_properties', 'keyword') + assert_mapping(material_mapping, 'entries.results.properties.data.n_points', 'integer') + + +def test_index_docs(indices): + entry = Entry(entry_id='test_entry_id') + data = entry.m_create(Data, points=[[0.1, 0.2], [1.1, 1.2]]) + results = entry.m_create(Results) + results.m_create( + Material, + material_id='test_material_id', + formula='H20', springer_labels=['water']) + results.m_create( + Properties, + data=data, n_series=data, band_gap=1e-12, available_properties=['data', 'band_grap']) + + entry_doc = entry_type.create_index_doc(entry) + material_entry_doc = material_entry_type.create_index_doc(entry) + + assert entry_doc == { + 'entry_id': 'test_entry_id', + 'results': { + 'material': { + 'material_id': 'test_material_id', + 'formula': 'H20', + 'springer_labels': ['water'] + }, + 'properties': { + 'available_properties': ['data', 'band_grap'], + 'band_gap': 1e-12, + 'data': { + 'n_points': 2 + }, + 'n_series': 0 + } + } + } + + assert material_entry_doc == { + 'entry_id': 'test_entry_id', + 'results': { + 'properties': { + 'available_properties': ['data', 'band_grap'], + 'band_gap': 1e-12, + 'data': { + 'n_points': 2 + }, + } + } + } def test_index_entry(elastic_client, indices, example_entry): diff --git a/tests/metainfo/test_metainfo.py b/tests/metainfo/test_metainfo.py index 473d991b36c6a532178ab735de91b57e63c60c53..8fcf168aa58fed8ae658377892a6fcefb1acc4d3 100644 --- a/tests/metainfo/test_metainfo.py +++ b/tests/metainfo/test_metainfo.py @@ -491,6 +491,13 @@ class TestM1: assert 'nomad_version' in dct['parsing'] assert 'n_atoms' not in dct['systems'][0] + def test_to_dict_resolve_references(self, example_data): + scc = example_data.m_create(SCC) + scc.system = example_data.systems[0] + + data = scc.m_to_dict(resolve_references=True) + assert data['system'] == example_data.systems[0].m_to_dict() + def test_derived(self): system = System() diff --git a/tests/metainfo/test_references.py b/tests/metainfo/test_references.py index 31eb6c2fae200b4dbb544ad32ba0c1eb52c7beb4..4c70cc07f86dae0ce6feec821fab122506d6c1d2 100644 --- a/tests/metainfo/test_references.py +++ b/tests/metainfo/test_references.py @@ -90,6 +90,10 @@ def assert_data(example_data): assert example_data.referencing.quantity_reference == 'test_value' assert example_data.referencing.m_to_dict()['quantity_reference'] == '/referenced/str_quantity' + assert example_data.referencing.m_is_set(Referencing.section_reference) + assert example_data.referencing.m_is_set(Referencing.section_reference_list) + assert example_data.referencing.m_is_set(Referencing.quantity_reference) + assert_properties(example_data) example_data_serialized = example_data.m_to_dict(with_meta=True) @@ -132,3 +136,33 @@ def test_quantity_proxy(example_data): assert example_data.referencing.quantity_reference == 'test_value' assert_data(example_data) + + +def test_resolve_references(example_data): + assert example_data.m_to_dict(resolve_references=True) == { + 'referenced': { + 'str_quantity': 'test_value' + }, + 'referenceds': [ + { + 'str_quantity': 'test_value' + }, + { + 'str_quantity': 'test_value' + } + ], + 'referencing': { + 'section_reference': { + 'str_quantity': 'test_value' + }, + 'section_reference_list': [ + { + 'str_quantity': 'test_value' + }, + { + 'str_quantity': 'test_value' + } + ], + 'quantity_reference': 'test_value' + } + }