diff --git a/nomad/client/processing.py b/nomad/client/processing.py index dea9c55b9168f6bb2fa44bd4ab03ad2572cae585..2b40eb33cbbc60e0f13f1053249648899ce41baf 100644 --- a/nomad/client/processing.py +++ b/nomad/client/processing.py @@ -32,7 +32,7 @@ def parse( backend_factory: typing.Callable = None, strict: bool = True, logger=None, - server_context: bool = False) -> typing.List[datamodel.EntryArchive]: + **kwargs) -> typing.List[datamodel.EntryArchive]: ''' Run the given parser on the provided mainfile. If parser_name is given, we only try to match this parser, otherwise we try to match all parsers. @@ -57,7 +57,7 @@ def parse( if hasattr(parser, 'backend_factory'): setattr(parser, 'backend_factory', backend_factory) - entry_archives = parsers.run_parser(mainfile_path, parser, mainfile_keys, logger, server_context) + entry_archives = parsers.run_parser(mainfile_path, parser, mainfile_keys, logger, **kwargs) logger.info('ran parser') return entry_archives diff --git a/nomad/datamodel/context.py b/nomad/datamodel/context.py index 267fc543c6a74e12159b45d2e6b488ea5421fbe1..813dfd47210d65ca1cc1fdeab850dec8aa444791 100644 --- a/nomad/datamodel/context.py +++ b/nomad/datamodel/context.py @@ -26,7 +26,6 @@ import requests from nomad import utils, config from nomad.metainfo import Context as MetainfoContext, MSection, Quantity, MetainfoReferenceError from nomad.datamodel import EntryArchive -from nomad.parsing.parser import ArchiveParser class Context(MetainfoContext): @@ -256,11 +255,13 @@ class ClientContext(Context): return EntryArchive.m_from_dict(response.json()['data']['archive'], m_context=self) def load_raw_file(self, path: str, upload_id: str, installation_url: str) -> MSection: + # TODO currently upload_id might be None if upload_id is None: # try to find a local file, useful when the context is used for local parsing file_path = os.path.join(self.local_dir, path) if os.path.exists(file_path): + from nomad.parsing.parser import ArchiveParser with open(file_path, 'rt') as f: archive = EntryArchive(m_context=self) ArchiveParser().parse_file(file_path, f, archive) diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index 10d978b3549a80aed2e984e8fd2d7d3f6cc0f2b4..e932687818cb506da5f05ef0dfd8c595c77827a1 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -1779,7 +1779,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas def m_def_reference(): qualified_name = self.m_def.qualified_name() - if qualified_name.startswith('*.'): + if qualified_name.startswith('entry_id:'): # This is not from a python module, use archive reference instead return self.m_def.m_root().m_context.create_reference(self, None, self.m_def) @@ -2554,14 +2554,11 @@ class Definition(MSection): return super().m_is_set(quantity_def) def qualified_name(self): - names = [] - current = self - while current is not None and current.m_follows(Definition.m_def): - name = current.name - names.append(name if name else '*') - current = current.m_parent + name = self.name if self.name else '*' + if self.m_parent and self.m_parent.m_follows(Definition.m_def): + return f'{self.m_parent.qualified_name()}.{name}' - return '.'.join(reversed(names)) + return name def on_set(self, quantity_def, value): if quantity_def == Definition.categories: @@ -3335,6 +3332,7 @@ class Package(Definition): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.errors, self.warnings = [], [] + self.archive = None def __init_metainfo__(self): super().__init_metainfo__() @@ -3396,6 +3394,17 @@ class Package(Definition): return super(Package, cls).m_from_dict(data, **kwargs) + def qualified_name(self): + if self.archive: + # If the package was defined within a regular uploaded archive file, we + # use its id, which is a globally unique identifier for the package. + if self.archive.metadata and self.archive.metadata.entry_id: + return f'entry_id:{self.archive.metadata.entry_id}' + else: + return f'entry_id:*' + + return super().qualified_name() + class Category(Definition): ''' Categories allow to organize metainfo definitions (not metainfo data like sections do) diff --git a/nomad/parsing/parser.py b/nomad/parsing/parser.py index 64bfd900c3effdfca5e85afad61f1b2fc95fe9d7..fbe0d530ae5fc767beb1f87991e77301d5db45c2 100644 --- a/nomad/parsing/parser.py +++ b/nomad/parsing/parser.py @@ -335,6 +335,8 @@ class ArchiveParser(MatchingParser): for warning in warnings: logger.warn('Validation warning', details=warning) + archive.definitions.archive = archive + del archive_data['definitions'] archive.m_update_from_dict(archive_data) diff --git a/nomad/parsing/parsers.py b/nomad/parsing/parsers.py index 8fc243fe3538066a2f0e5bd50919ff4f4b998269..48cc7a3259cbdadc3bbf3bdcc6e525cab783949e 100644 --- a/nomad/parsing/parsers.py +++ b/nomad/parsing/parsers.py @@ -17,7 +17,7 @@ # import os.path -from typing import Tuple, List, Dict +from typing import Tuple, List, Dict, Any from nomad import config from nomad.datamodel import EntryArchive, EntryMetadata, results @@ -143,7 +143,7 @@ class ParserContext(Context): def run_parser( mainfile_path: str, parser: Parser, mainfile_keys: List[str] = None, - logger=None, server_context: bool = False) -> List[EntryArchive]: + logger=None, server_context: bool = False, metadata: Dict[str, Any] = None) -> List[EntryArchive]: ''' Parses a file, given the path, the parser, and mainfile_keys, as returned by :func:`match_parser`, and returns the resulting EntryArchive objects. Parsers that have @@ -156,8 +156,13 @@ def run_parser( entry_archive = EntryArchive(m_context=ParserContext(directory)) else: entry_archive = EntryArchive(m_context=ClientContext(local_dir=directory)) - metadata = entry_archive.m_create(EntryMetadata) - metadata.mainfile = mainfile_path + + if metadata is not None: + entry_archive.metadata = EntryMetadata(**metadata) + else: + entry_archive.metadata = EntryMetadata() + entry_archive.metadata.mainfile = mainfile_path + entry_archives = [entry_archive] if mainfile_keys: child_archives = {} diff --git a/tests/data/metainfo/inter-entry.archive.json b/tests/data/metainfo/inter-entry.archive.json new file mode 100644 index 0000000000000000000000000000000000000000..f48b8ce6cc7a8b0b96301f44c34faebdd9a13631 --- /dev/null +++ b/tests/data/metainfo/inter-entry.archive.json @@ -0,0 +1,10 @@ +{ + "data": { + "m_def": "../upload/raw/schema.archive.json#/definitions/section_definitions/0", + "my_quantity": "test_value", + "datetime_list": [ + "2022-04-01", + "2022-04-02" + ] + } +} \ No newline at end of file diff --git a/tests/data/metainfo/intra-entry.archive.json b/tests/data/metainfo/intra-entry.archive.json new file mode 100644 index 0000000000000000000000000000000000000000..149753668cab26380a3ed296a8bac2acea0090a9 --- /dev/null +++ b/tests/data/metainfo/intra-entry.archive.json @@ -0,0 +1,38 @@ +{ + "definitions": { + "name": "test_package_name", + "section_definitions": [ + { + "name": "MySection", + "base_sections": [ + "nomad.datamodel.data.EntryData" + ], + "quantities": [ + { + "name": "my_quantity", + "type": { + "type_kind": "python", + "type_data": "str" + } + }, + { + "name": "datetime_list", + "type": { + "type_kind": "custom", + "type_data": "nomad.metainfo.metainfo.Datetime" + }, + "shape": ["*"] + } + ] + } + ] + }, + "data": { + "m_def": "#/definitions/section_definitions/0", + "my_quantity": "test_value", + "datetime_list": [ + "2022-04-01", + "2022-04-02" + ] + } +} \ No newline at end of file diff --git a/tests/data/metainfo/schema.archive.json b/tests/data/metainfo/schema.archive.json new file mode 100644 index 0000000000000000000000000000000000000000..26dd78e0ac53647ebf660c4d69ad3271e2a81c70 --- /dev/null +++ b/tests/data/metainfo/schema.archive.json @@ -0,0 +1,30 @@ +{ + "definitions": { + "name": "test_package_name", + "section_definitions": [ + { + "name": "MySection", + "base_sections": [ + "nomad.datamodel.data.EntryData" + ], + "quantities": [ + { + "name": "my_quantity", + "type": { + "type_kind": "python", + "type_data": "str" + } + }, + { + "name": "datetime_list", + "type": { + "type_kind": "custom", + "type_data": "nomad.metainfo.metainfo.Datetime" + }, + "shape": ["*"] + } + ] + } + ] + } +} \ No newline at end of file diff --git a/tests/metainfo/test_metainfo.py b/tests/metainfo/test_metainfo.py index 439555968bc19958bb066505d97ba3aa7b5b0be1..c091cb6002f7e0f7ddac255a2dc98d5dd5daa9b0 100644 --- a/tests/metainfo/test_metainfo.py +++ b/tests/metainfo/test_metainfo.py @@ -268,6 +268,10 @@ class TestM2: pkg.init_metainfo() assert len(pkg.warnings) > 0 + # TODO + @pytest.mark.skip(reason=( + 'We disabled the constraint that is tested here, because some Nexus definitions ' + 'are violating it.')) def test_higher_shapes_require_dtype(self): class TestSection(MSection): # pylint: disable=unused-variable test = Quantity(type=int, shape=[3, 3]) diff --git a/tests/metainfo/test_references.py b/tests/metainfo/test_references.py index 71c1d00bc1e14c3b9f54da854f8aed5d902e4ae3..2097a052d8d309a72606d5e6b33d2242bcdd3dcd 100644 --- a/tests/metainfo/test_references.py +++ b/tests/metainfo/test_references.py @@ -18,6 +18,7 @@ from typing import cast import pytest +import os.path from nomad.metainfo import ( MSection, Quantity, Section, SubSection, MProxy, Reference, QuantityReference, File, @@ -323,3 +324,17 @@ def test_def_reference(): assert result.m_to_dict() == { 'test_quantity': 'TestValue' } + + +@pytest.mark.parametrize('mainfile', [ + 'intra-entry', 'inter-entry' +]) +def test_parse_with_references(mainfile): + from nomad.client import parse, normalize_all + entry_archive = parse( + os.path.join(os.path.dirname(__file__), f'../data/metainfo/{mainfile}.archive.json'), + metadata=dict(entry_id='test_entry_id'))[0] + normalize_all(entry_archive) + + m_def = entry_archive.m_to_dict()['data']['m_def'] + assert '#/definitions/' in m_def