From 29b75b20583df2e1d479f3c70dd6503b9edf6fef Mon Sep 17 00:00:00 2001 From: Markus Scheidgen <markus.scheidgen@gmail.com> Date: Thu, 5 May 2022 09:34:37 +0200 Subject: [PATCH] Proper generation of m_def for referenced custom schema definitions. #826 --- nomad/client/processing.py | 4 +-- nomad/datamodel/context.py | 3 +- nomad/metainfo/metainfo.py | 25 ++++++++----- nomad/parsing/parser.py | 2 ++ nomad/parsing/parsers.py | 13 ++++--- tests/data/metainfo/inter-entry.archive.json | 10 ++++++ tests/data/metainfo/intra-entry.archive.json | 38 ++++++++++++++++++++ tests/data/metainfo/schema.archive.json | 30 ++++++++++++++++ tests/metainfo/test_metainfo.py | 4 +++ tests/metainfo/test_references.py | 15 ++++++++ 10 files changed, 129 insertions(+), 15 deletions(-) create mode 100644 tests/data/metainfo/inter-entry.archive.json create mode 100644 tests/data/metainfo/intra-entry.archive.json create mode 100644 tests/data/metainfo/schema.archive.json diff --git a/nomad/client/processing.py b/nomad/client/processing.py index dea9c55b91..2b40eb33cb 100644 --- a/nomad/client/processing.py +++ b/nomad/client/processing.py @@ -32,7 +32,7 @@ def parse( backend_factory: typing.Callable = None, strict: bool = True, logger=None, - server_context: bool = False) -> typing.List[datamodel.EntryArchive]: + **kwargs) -> typing.List[datamodel.EntryArchive]: ''' Run the given parser on the provided mainfile. If parser_name is given, we only try to match this parser, otherwise we try to match all parsers. @@ -57,7 +57,7 @@ def parse( if hasattr(parser, 'backend_factory'): setattr(parser, 'backend_factory', backend_factory) - entry_archives = parsers.run_parser(mainfile_path, parser, mainfile_keys, logger, server_context) + entry_archives = parsers.run_parser(mainfile_path, parser, mainfile_keys, logger, **kwargs) logger.info('ran parser') return entry_archives diff --git a/nomad/datamodel/context.py b/nomad/datamodel/context.py index 267fc543c6..813dfd4721 100644 --- a/nomad/datamodel/context.py +++ b/nomad/datamodel/context.py @@ -26,7 +26,6 @@ import requests from nomad import utils, config from nomad.metainfo import Context as MetainfoContext, MSection, Quantity, MetainfoReferenceError from nomad.datamodel import EntryArchive -from nomad.parsing.parser import ArchiveParser class Context(MetainfoContext): @@ -256,11 +255,13 @@ class ClientContext(Context): return EntryArchive.m_from_dict(response.json()['data']['archive'], m_context=self) def load_raw_file(self, path: str, upload_id: str, installation_url: str) -> MSection: + # TODO currently upload_id might be None if upload_id is None: # try to find a local file, useful when the context is used for local parsing file_path = os.path.join(self.local_dir, path) if os.path.exists(file_path): + from nomad.parsing.parser import ArchiveParser with open(file_path, 'rt') as f: archive = EntryArchive(m_context=self) ArchiveParser().parse_file(file_path, f, archive) diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index 10d978b354..e932687818 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -1779,7 +1779,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas def m_def_reference(): qualified_name = self.m_def.qualified_name() - if qualified_name.startswith('*.'): + if qualified_name.startswith('entry_id:'): # This is not from a python module, use archive reference instead return self.m_def.m_root().m_context.create_reference(self, None, self.m_def) @@ -2554,14 +2554,11 @@ class Definition(MSection): return super().m_is_set(quantity_def) def qualified_name(self): - names = [] - current = self - while current is not None and current.m_follows(Definition.m_def): - name = current.name - names.append(name if name else '*') - current = current.m_parent + name = self.name if self.name else '*' + if self.m_parent and self.m_parent.m_follows(Definition.m_def): + return f'{self.m_parent.qualified_name()}.{name}' - return '.'.join(reversed(names)) + return name def on_set(self, quantity_def, value): if quantity_def == Definition.categories: @@ -3335,6 +3332,7 @@ class Package(Definition): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.errors, self.warnings = [], [] + self.archive = None def __init_metainfo__(self): super().__init_metainfo__() @@ -3396,6 +3394,17 @@ class Package(Definition): return super(Package, cls).m_from_dict(data, **kwargs) + def qualified_name(self): + if self.archive: + # If the package was defined within a regular uploaded archive file, we + # use its id, which is a globally unique identifier for the package. + if self.archive.metadata and self.archive.metadata.entry_id: + return f'entry_id:{self.archive.metadata.entry_id}' + else: + return f'entry_id:*' + + return super().qualified_name() + class Category(Definition): ''' Categories allow to organize metainfo definitions (not metainfo data like sections do) diff --git a/nomad/parsing/parser.py b/nomad/parsing/parser.py index 64bfd900c3..fbe0d530ae 100644 --- a/nomad/parsing/parser.py +++ b/nomad/parsing/parser.py @@ -335,6 +335,8 @@ class ArchiveParser(MatchingParser): for warning in warnings: logger.warn('Validation warning', details=warning) + archive.definitions.archive = archive + del archive_data['definitions'] archive.m_update_from_dict(archive_data) diff --git a/nomad/parsing/parsers.py b/nomad/parsing/parsers.py index 8fc243fe35..48cc7a3259 100644 --- a/nomad/parsing/parsers.py +++ b/nomad/parsing/parsers.py @@ -17,7 +17,7 @@ # import os.path -from typing import Tuple, List, Dict +from typing import Tuple, List, Dict, Any from nomad import config from nomad.datamodel import EntryArchive, EntryMetadata, results @@ -143,7 +143,7 @@ class ParserContext(Context): def run_parser( mainfile_path: str, parser: Parser, mainfile_keys: List[str] = None, - logger=None, server_context: bool = False) -> List[EntryArchive]: + logger=None, server_context: bool = False, metadata: Dict[str, Any] = None) -> List[EntryArchive]: ''' Parses a file, given the path, the parser, and mainfile_keys, as returned by :func:`match_parser`, and returns the resulting EntryArchive objects. Parsers that have @@ -156,8 +156,13 @@ def run_parser( entry_archive = EntryArchive(m_context=ParserContext(directory)) else: entry_archive = EntryArchive(m_context=ClientContext(local_dir=directory)) - metadata = entry_archive.m_create(EntryMetadata) - metadata.mainfile = mainfile_path + + if metadata is not None: + entry_archive.metadata = EntryMetadata(**metadata) + else: + entry_archive.metadata = EntryMetadata() + entry_archive.metadata.mainfile = mainfile_path + entry_archives = [entry_archive] if mainfile_keys: child_archives = {} diff --git a/tests/data/metainfo/inter-entry.archive.json b/tests/data/metainfo/inter-entry.archive.json new file mode 100644 index 0000000000..f48b8ce6cc --- /dev/null +++ b/tests/data/metainfo/inter-entry.archive.json @@ -0,0 +1,10 @@ +{ + "data": { + "m_def": "../upload/raw/schema.archive.json#/definitions/section_definitions/0", + "my_quantity": "test_value", + "datetime_list": [ + "2022-04-01", + "2022-04-02" + ] + } +} \ No newline at end of file diff --git a/tests/data/metainfo/intra-entry.archive.json b/tests/data/metainfo/intra-entry.archive.json new file mode 100644 index 0000000000..149753668c --- /dev/null +++ b/tests/data/metainfo/intra-entry.archive.json @@ -0,0 +1,38 @@ +{ + "definitions": { + "name": "test_package_name", + "section_definitions": [ + { + "name": "MySection", + "base_sections": [ + "nomad.datamodel.data.EntryData" + ], + "quantities": [ + { + "name": "my_quantity", + "type": { + "type_kind": "python", + "type_data": "str" + } + }, + { + "name": "datetime_list", + "type": { + "type_kind": "custom", + "type_data": "nomad.metainfo.metainfo.Datetime" + }, + "shape": ["*"] + } + ] + } + ] + }, + "data": { + "m_def": "#/definitions/section_definitions/0", + "my_quantity": "test_value", + "datetime_list": [ + "2022-04-01", + "2022-04-02" + ] + } +} \ No newline at end of file diff --git a/tests/data/metainfo/schema.archive.json b/tests/data/metainfo/schema.archive.json new file mode 100644 index 0000000000..26dd78e0ac --- /dev/null +++ b/tests/data/metainfo/schema.archive.json @@ -0,0 +1,30 @@ +{ + "definitions": { + "name": "test_package_name", + "section_definitions": [ + { + "name": "MySection", + "base_sections": [ + "nomad.datamodel.data.EntryData" + ], + "quantities": [ + { + "name": "my_quantity", + "type": { + "type_kind": "python", + "type_data": "str" + } + }, + { + "name": "datetime_list", + "type": { + "type_kind": "custom", + "type_data": "nomad.metainfo.metainfo.Datetime" + }, + "shape": ["*"] + } + ] + } + ] + } +} \ No newline at end of file diff --git a/tests/metainfo/test_metainfo.py b/tests/metainfo/test_metainfo.py index 439555968b..c091cb6002 100644 --- a/tests/metainfo/test_metainfo.py +++ b/tests/metainfo/test_metainfo.py @@ -268,6 +268,10 @@ class TestM2: pkg.init_metainfo() assert len(pkg.warnings) > 0 + # TODO + @pytest.mark.skip(reason=( + 'We disabled the constraint that is tested here, because some Nexus definitions ' + 'are violating it.')) def test_higher_shapes_require_dtype(self): class TestSection(MSection): # pylint: disable=unused-variable test = Quantity(type=int, shape=[3, 3]) diff --git a/tests/metainfo/test_references.py b/tests/metainfo/test_references.py index 71c1d00bc1..2097a052d8 100644 --- a/tests/metainfo/test_references.py +++ b/tests/metainfo/test_references.py @@ -18,6 +18,7 @@ from typing import cast import pytest +import os.path from nomad.metainfo import ( MSection, Quantity, Section, SubSection, MProxy, Reference, QuantityReference, File, @@ -323,3 +324,17 @@ def test_def_reference(): assert result.m_to_dict() == { 'test_quantity': 'TestValue' } + + +@pytest.mark.parametrize('mainfile', [ + 'intra-entry', 'inter-entry' +]) +def test_parse_with_references(mainfile): + from nomad.client import parse, normalize_all + entry_archive = parse( + os.path.join(os.path.dirname(__file__), f'../data/metainfo/{mainfile}.archive.json'), + metadata=dict(entry_id='test_entry_id'))[0] + normalize_all(entry_archive) + + m_def = entry_archive.m_to_dict()['data']['m_def'] + assert '#/definitions/' in m_def -- GitLab