diff --git a/nomad/graph/graph_reader.py b/nomad/graph/graph_reader.py index 84ee4b7f169926fddb6321f01e24312b37421bdb..cdf50b86c418e9fcf42a95180b478da5a1f127c3 100644 --- a/nomad/graph/graph_reader.py +++ b/nomad/graph/graph_reader.py @@ -58,6 +58,7 @@ from nomad.archive import ArchiveDict, ArchiveList, to_json from nomad.archive.storage_v2 import ArchiveDict as ArchiveDictNew from nomad.archive.storage_v2 import ArchiveList as ArchiveListNew from nomad.datamodel import Dataset, EntryArchive, ServerContext, User +from nomad.datamodel.metainfo.plot import PlotlyFigure from nomad.datamodel.util import parse_path from nomad.files import RawPathInfo, UploadFiles from nomad.graph.lazy_wrapper import ( @@ -259,7 +260,7 @@ class GraphNode: raise ArchiveError(f'Circular reference detected: {reference_url}.') try: - target = await self.__goto_path(self.archive_root, path_stack) + target = await _goto_path(self.archive_root, path_stack) except (KeyError, IndexError): raise ArchiveError(f'Archive {self.entry_id} does not contain {reference}.') @@ -317,7 +318,7 @@ class GraphNode: try: # now go to the target path - other_target = await self.__goto_path(other_archive_root, path_stack) + other_target = await _goto_path(other_archive_root, path_stack) except (KeyError, IndexError): raise ArchiveError(f'Archive {other_entry_id} does not contain {path}.') @@ -352,16 +353,14 @@ class GraphNode: result_root=self.ref_result_root, ) - @staticmethod - async def __goto_path(target_root: ArchiveDict | dict, path_stack: list) -> Any: - """ - Go to the specified path in the data. - """ - for key in path_stack: - target_root = await goto_child( - target_root, int(key) if key.isdigit() else key - ) - return target_root + +async def _goto_path(target_root: GenericDict, path_stack: list) -> Any: + """ + Go to the specified path in the data. + """ + for key in path_stack: + target_root = await goto_child(target_root, int(key) if key.isdigit() else key) + return target_root async def _if_exists(target_root: dict, path_stack: list) -> bool: @@ -424,7 +423,7 @@ def _convert_ref_to_path_string(ref: str, upload_id: str = None) -> str: def _to_response_config(config: RequestConfig, exclude: list = None, **kwargs): - response_config = config.dict(exclude_unset=True, exclude_none=True) + response_config = config.model_dump(exclude_unset=True, exclude_none=True) for item in ('include', 'exclude'): if isinstance(x := response_config.pop(item, None), frozenset): @@ -989,7 +988,7 @@ class GeneralReader: @staticmethod async def _overwrite_upload(item: Upload): plain_dict = orjson.loads( - upload_to_pydantic(item, include_total_count=False).json() + upload_to_pydantic(item, include_total_count=False).model_dump_json() ) plain_dict.pop('entries', None) cached_item = CachedUpload(item) @@ -1026,7 +1025,7 @@ class GeneralReader: @staticmethod def _overwrite_entry(item: Entry): - plain_dict = orjson.loads(entry_to_pydantic(item).json()) + plain_dict = orjson.loads(entry_to_pydantic(item).model_dump_json()) plain_dict.pop('entry_metadata', None) if mainfile := plain_dict.pop('mainfile', None): plain_dict['mainfile_path'] = mainfile @@ -1334,7 +1333,7 @@ class MongoReader(GeneralReader): if config.query: assert isinstance(config.query, Metadata) - search_query = config.query.dict(exclude_none=True) # type: ignore + search_query = config.query.model_dump(exclude_none=True) # type: ignore if config.query.owner: search_params['owner'] = config.query.owner @@ -1391,7 +1390,9 @@ class MongoReader(GeneralReader): references |= Q(references__regex=item) mongo_query &= references - return config.query.dict(exclude_unset=True), self.entries.filter(mongo_query) + return config.query.model_dump(exclude_unset=True), self.entries.filter( + mongo_query + ) async def _query_uploads(self, config: RequestConfig): if not config.query: @@ -1424,7 +1425,9 @@ class MongoReader(GeneralReader): elif config.query.is_owned is False: mongo_query &= Q(main_author__ne=self.auth_user_id) - return config.query.dict(exclude_unset=True), self.uploads.filter(mongo_query) + return config.query.model_dump(exclude_unset=True), self.uploads.filter( + mongo_query + ) async def _query_datasets(self, config: RequestConfig): if not config.query: @@ -1448,7 +1451,9 @@ class MongoReader(GeneralReader): dataset_name=re.compile(rf'^{config.query.prefix}.*$', re.IGNORECASE) ) - return config.query.dict(exclude_unset=True), self.datasets.filter(mongo_query) + return config.query.model_dump(exclude_unset=True), self.datasets.filter( + mongo_query + ) @staticmethod async def _query_groups(config: RequestConfig): @@ -1457,7 +1462,7 @@ class MongoReader(GeneralReader): if isinstance(config.query, UserGroupQuery) else UserGroupQuery() ) - return query.dict(exclude_unset=True), MongoUserGroup.get_by_query(query) + return query.model_dump(exclude_unset=True), MongoUserGroup.get_by_query(query) async def _normalise( self, mongo_result, config: RequestConfig, transformer: Callable @@ -1473,7 +1478,7 @@ class MongoReader(GeneralReader): elif isinstance(config.pagination, Pagination): pagination_response = PaginationResponse( total=mongo_result.count() if mongo_result else 0, - **config.pagination.dict(), + **config.pagination.model_dump(), ) if transformer is None: @@ -1732,7 +1737,7 @@ class MongoReader(GeneralReader): filtered, child_config, transformer ) if pagination is not None: - pagination_dict = pagination.dict() + pagination_dict = pagination.model_dump() if pagination_dict.get('order_by', None) == 'mainfile': pagination_dict['order_by'] = 'mainfile_path' await _populate_result( @@ -2403,7 +2408,7 @@ class FileSystemReader(GeneralReader): if config.pagination is not None: assert isinstance(config.pagination, RawDirPagination) start: int = config.pagination.get_simple_index() - pagination: dict = config.pagination.dict(exclude_none=True) + pagination: dict = config.pagination.model_dump(exclude_none=True) else: start = 0 pagination = dict(page=1, page_size=10, order='asc') @@ -2710,9 +2715,9 @@ class ArchiveReader(ArchiveLikeReader): if isinstance(value, RequestConfig): # this is a leaf, resolve it according to the config - await self._resolve( - child(current_path=child_path, archive=child_archive), value - ) + child_node = child(current_path=child_path, archive=child_archive) + await self._resolve_figure(child_node, node, value) + await self._resolve(child_node, value) elif isinstance(value, dict): # this is a nested query, keep walking down the tree async def __walk(__path, __archive): @@ -2736,6 +2741,48 @@ class ArchiveReader(ArchiveLikeReader): # should never reach here raise ConfigError(f'Invalid required config: {value}.') + @staticmethod + async def _resolve_figure( + child: GraphNode, parent: GraphNode, config: RequestConfig + ): + """ + Ensure a Figure object is properly resolved. + """ + if not isinstance( + child.definition, SubSection + ) or not child.definition.sub_section.m_follows( + PlotlyFigure.m_def, self_as_definition=True + ): + return + + if config.directive is not DirectiveType.resolved: + return + + async def _visit(_node): + if isinstance(_node, GenericDict): + for v in _node.values(): + await _visit(v) + elif isinstance(_node, GenericList): + if len(_node) > 0 and isinstance(_node[0], int | float): + # looks like a data array + return + for v in _node: + await _visit(v) + elif isinstance(_node, str): + _node = [x for x in _node.lstrip('#').split('/') if x] + if _node and await _if_exists(parent.archive, _node): + await _populate_result( + parent.result_root, + parent.current_path + _node, + await _goto_path(parent.archive, _node), + ) + + if not isinstance(child.archive, GenericList): + return await _visit(child.archive) + + for i in _normalise_index(config.index, len(child.archive)): + await _visit(child.archive[i]) + async def _resolve( self, node: GraphNode, @@ -2852,6 +2899,7 @@ class ArchiveReader(ArchiveLikeReader): node.result_root, child_node.current_path, child_node.archive ) else: + await self._resolve_figure(child_node, node, child_config) await self._resolve(child_node, child_config) async def _check_definition( @@ -3418,7 +3466,7 @@ class MetainfoBrowser(DefinitionReader): # we use the class member to cache the response # it will be written to the result tree later # we do not direct perform writing here to avoid turning all methods async - self._pagination_response = default_pagination.dict() + self._pagination_response = default_pagination.model_dump() self._pagination_response['total'] = total return all_keys diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index 4ab86f86ffb5ec5fd9ffe45d74b15a00d786b8b8..c6deb3a24fe1d294f17925b924f322a23da80ea3 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -1776,13 +1776,24 @@ class MSection(metaclass=MObjectMeta): # todo: mypy bug https://github.com/python/mypy/issues/14458 return cast(cls, self) # type: ignore - def m_follows(self, definition: Section) -> bool: + def m_follows( + self, definition: Section, *, self_as_definition: bool = False + ) -> bool: """ Determines if this section's definition is or is derived from the given definition. + + Arguments: + definition: The definition to check against. + self_as_definition: If True, the current section is considered as a definition + instead of a data section. This is useful when directly checking the definitions + without the need to create a dummy data section. """ if not isinstance(definition, Section): raise TypeError(f'{definition} is not an instance of class Section.') - return definition in itertools.chain(self.m_def.all_base_sections, [self.m_def]) + + target = self if self_as_definition else self.m_def + + return definition in itertools.chain(target.all_base_sections, [target]) def m_to_dict( self, diff --git a/nomad/utils/exampledata.py b/nomad/utils/exampledata.py index 4603a39b65b32d6c9e5afa016c34e1ed1275981d..8778de74a5b5885a0af8f3ee04024cded81273a3 100644 --- a/nomad/utils/exampledata.py +++ b/nomad/utils/exampledata.py @@ -328,7 +328,11 @@ class ExampleData: if archive is not None: entry_archive.m_update(**archive) - if entry_archive.results.material.material_id is None: + if ( + entry_archive.results + and entry_archive.results.material + and entry_archive.results.material.material_id is None + ): entry_archive.results.material.material_id = material_id self.archives[entry_id] = entry_archive diff --git a/tests/graph/test_graph_reader.py b/tests/graph/test_graph_reader.py index ad033aec9c837fda3d1e9f9784d4b8a750f62803..946d196ba39c17d8c585d7240d6366c3e5bc9cf8 100644 --- a/tests/graph/test_graph_reader.py +++ b/tests/graph/test_graph_reader.py @@ -34,7 +34,7 @@ from nomad.graph.graph_reader import ( ) from nomad.graph.lazy_wrapper import LazyWrapper from nomad.utils.exampledata import ExampleData -from tests.normalizing.conftest import simulationworkflowschema +from tests.normalizing.conftest import run_processing, simulationworkflowschema from tests.utils import ListWithSortKey @@ -4034,3 +4034,113 @@ def json_dict(): 'results': {'calculation_result_ref': '/run/0/calculation/1'}, }, } + + +@pytest.fixture(scope='function') +def example_data_with_figure(proc_infra, user1): + data = ExampleData(main_author=user1) + + data.create_upload( + upload_id='id_published_with_ref', upload_name='name_published', published=False + ) + + directory = 'tests/data/datamodel/metainfo/plotly' + mainfile = 'plotly.schema.archive.yaml' + data.create_entry( + upload_id='id_published_with_ref', + entry_id='id_plotly', + entry_archive=run_processing(directory, mainfile), + ) + + for archive in data.archives.values(): + archive.metadata.apply_archive_metadata(archive) + + data.save(with_files=True, with_es=True, with_mongo=True) + + yield data + + data.delete() + + +@pytest.mark.parametrize( + 'query,result', + [ + # plain get default quantities + # the references are not resolved + pytest.param( + { + Token.ARCHIVE: { + 'data': { + 'figures[0]': { + 'm_request': { + 'directive': 'plain', + }, + } + } + }, + }, + { + 'archive': { + 'data': { + 'figures': [ + { + 'label': 'graph object 1', + 'figure': { + 'data': {'x': '#xArr', 'y': '#xArr'}, + 'layout': { + 'title': {'text': 'Plot in section level'}, + 'xaxis': {'title': {'text': 'x data'}}, + 'yaxis': {'title': {'text': 'y data'}}, + }, + }, + } + ] + } + } + }, + id='plain-read', + ), + pytest.param( + { + Token.ARCHIVE: { + 'data': { + 'figures[0]': { + 'm_request': { + 'directive': 'resolved', + }, + } + } + }, + }, + { + 'archive': { + 'data': { + 'xArr': [1.1, 2.0, 3.0, 4.0, 5.0], + 'figures': [ + { + 'label': 'graph object 1', + 'figure': { + 'data': {'x': '#xArr', 'y': '#xArr'}, + 'layout': { + 'title': {'text': 'Plot in section level'}, + 'xaxis': {'title': {'text': 'x data'}}, + 'yaxis': {'title': {'text': 'y data'}}, + }, + }, + } + ], + } + } + }, + id='read-resolved', + ), + ], +) +def test_figure_resolution(user1, example_data_with_figure, query, result): + def __entry_print(required, *, result=None): + with EntryReader(required, user=user1) as reader: + response = reader.sync_read('id_plotly') + if result: + assert_dict(response, result) + + __entry_print(query, result=result)