diff --git a/README.md b/README.md index 9be6726e08f8f776ccb9443659b98f25fb55d122..726bf268719163c240dc7b5f9ce4ad5c11416f66 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,9 @@ contributing, and API reference. Omitted versions are plain bugfix releases with only minor changes and fixes. +### v0.10.6 +- support for NOMAD fields in optimade + ### v0.10.4 - new "basic" parser to cover codes without proper parser - removed old nomad-coe parser dependencies diff --git a/nomad/app/optimade/common.py b/nomad/app/optimade/common.py new file mode 100644 index 0000000000000000000000000000000000000000..224c98ea6291654d9121694861300c54c7012d6d --- /dev/null +++ b/nomad/app/optimade/common.py @@ -0,0 +1,40 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from typing import Tuple, Generator + +from nomad.metainfo.search_extension import Search +from nomad.search import search_quantities + + +def provider_specific_fields() -> Generator[Tuple[str, Search], None, None]: + for search_quantity in search_quantities.values(): + nmd_name = search_quantity.qualified_name + nmd_name_split = nmd_name.split('.') + + if len(nmd_name_split) == 1: + # plain metadata + pass + elif not nmd_name_split[0] in ['dft', 'encyclopedia']: + # other domains fields that do not make sense in the optimade context + continue + elif len(nmd_name_split) > 2 and nmd_name_split[1] == 'optimade': + # these are already in optimade + continue + + opt_name = nmd_name.replace('.', '_') + yield opt_name, search_quantity diff --git a/nomad/app/optimade/elasticsearch.py b/nomad/app/optimade/elasticsearch.py index 18604840d04d33a8762ad10ff64090a8f311faa6..c543ead4cb3cdf0fbc4820ff472ee84d96197f6c 100644 --- a/nomad/app/optimade/elasticsearch.py +++ b/nomad/app/optimade/elasticsearch.py @@ -1,30 +1,89 @@ -from typing import Tuple, List, Union, Dict, Set, Any +from typing import Optional, Tuple, List, Union, Dict, Set, Any from fastapi import HTTPException +from pydantic import create_model from elasticsearch_dsl import Search, Q +from datetime import datetime +import numpy as np from optimade.filterparser import LarkParser from optimade.server.entry_collections import EntryCollection from optimade.server.query_params import EntryListingQueryParams, SingleEntryQueryParams from optimade.server.exceptions import BadRequest from optimade.server.mappers import StructureMapper -from optimade.models import StructureResource +from optimade.models import StructureResource, StructureResourceAttributes +from optimade.models.utils import OptimadeField, SupportLevel +from optimade.server.schemas import ENTRY_INFO_SCHEMAS -from nomad import datamodel, files, search, utils +from nomad import datamodel, files, search, utils, metainfo from nomad.normalizing.optimade import ( optimade_chemical_formula_reduced, optimade_chemical_formula_anonymous, optimade_chemical_formula_hill) from .filterparser import _get_transformer as get_transformer +from .common import provider_specific_fields logger = utils.get_logger(__name__) +float64 = np.dtype('float64') + + +class StructureResourceAttributesByAlias(StructureResourceAttributes): + def dict(self, *args, **kwargs): + kwargs['by_alias'] = True + return super().dict(*args, **kwargs) + + +def create_nomad_structure_resource_attributes_cls(): + fields: Dict[str, Tuple[type, OptimadeField]] = {} + + for name, search_quantity in provider_specific_fields(): + quantity = search_quantity.definition + + pydantic_type: type + if not quantity.is_scalar: + pydantic_type = list + elif quantity.type in [str, int, float, bool]: + pydantic_type = quantity.type if quantity.type != float64 else float + elif quantity.type == metainfo.Datetime: + pydantic_type = datetime + elif isinstance(quantity.type, metainfo.MEnum): + pydantic_type = str + elif isinstance(quantity.type, metainfo.Reference): + continue + else: + raise NotImplementedError('Search quantity type not support in optimade API') + + field = Optional[pydantic_type], OptimadeField( + None, + alias=f'_nmd_{name}', + sortable=False, + description=quantity.description if quantity.description else 'Not available. Will be added soon.', + support=SupportLevel.OPTIONAL, + queryable=SupportLevel.OPTIONAL) + + fields[f'nmd_{name}'] = field + + return create_model( + 'NomadStructureResourceAttributes', + __base__=StructureResourceAttributesByAlias, + **fields) + + +NomadStructureResourceAttributes = create_nomad_structure_resource_attributes_cls() + + +class NomadStructureResource(StructureResource): + attributes: NomadStructureResourceAttributes # type: ignore + + +ENTRY_INFO_SCHEMAS['structures'] = NomadStructureResource.schema class StructureCollection(EntryCollection): def __init__(self): super().__init__( - resource_cls=StructureResource, + resource_cls=NomadStructureResource, resource_mapper=StructureMapper, transformer=get_transformer(nomad_properties='dft', without_prefix=False)) diff --git a/nomad/app/optimade/filterparser.py b/nomad/app/optimade/filterparser.py index 2cab798bd620d5b17be2408a66e594cb483084b6..b0e43155ae05bd8f53c8bb8ac7062323fc1f6f00 100644 --- a/nomad/app/optimade/filterparser.py +++ b/nomad/app/optimade/filterparser.py @@ -25,7 +25,7 @@ from optimade.filtertransformers.elasticsearch import ( Quantity, ElasticTransformer as OPTElasticTransformer) from optimade.models import CHEMICAL_SYMBOLS, ATOMIC_NUMBERS -from nomad.search import search_quantities +from .common import provider_specific_fields _parser = LarkParser(version=(0, 10, 1)) @@ -58,14 +58,7 @@ def _get_transformer(nomad_properties, without_prefix): quantities['elements_ratios'].nested_quantity = quantities['elements_ratios'] if nomad_properties is not None: - for search_quantity in search_quantities.values(): - name = search_quantity.name - if '.' in name: - if name.startswith(nomad_properties): - name = name[len(nomad_properties) + 1:] - else: - continue - + for name, search_quantity in provider_specific_fields(): names = ['_nmd_' + name] if without_prefix: names.append(name) diff --git a/tests/app/test_optimade.py b/tests/app/test_optimade.py index b684da761e52964e212c0580d59d637da434de3b..3afdcc5eccbe8b97ba32c5862b7d0809a0992af6 100644 --- a/tests/app/test_optimade.py +++ b/tests/app/test_optimade.py @@ -22,6 +22,7 @@ import pytest from nomad.processing import Upload from nomad import search from nomad.app.optimade import parse_filter +from nomad.app.optimade.common import provider_specific_fields from tests.conftest import clear_elastic, clear_raw_files @@ -128,7 +129,9 @@ def example_structures(elastic_infra, mongo_infra, raw_files_infra): ('chemical_formula_anonymous starts with "A"', -1), ('elements HAS ONY "H", "O"', -1), ('last_modified >= "2009-02-01T20:07:00Z"', 3), - ('species_at_sites HAS "C"', 1) + ('species_at_sites HAS "C"', 1), + ('_nmd_dft_system = "molecule / cluster"', 3), + ('_nmd_encyclopedia_material_formula = "H20"', 0) ]) def test_optimade_parser(example_structures, query, results): if results >= 0: @@ -280,6 +283,15 @@ def test_structure_endpoint(client, example_structures): assert len(attr.get('dimension_types')) == 3 +def test_nmd_properties_info(client): + rv = client.get('/optimade/info/structures') + assert rv.status_code == 200 + data = rv.json() + assert '_nmd_dft_system' in data['data']['properties'] + assert '_nmd_encyclopedia_material_formula' in data['data']['properties'] + assert '_nmd_atoms' in data['data']['properties'] + + def test_nmd_properties(client, example_structures): rv = client.get('/optimade/structures/%s' % 'test_calc_id_1?response_fields=_nmd_atoms,_nmd_dft_system,_nmd_doesnotexist') assert rv.status_code == 200 @@ -290,3 +302,16 @@ def test_nmd_properties(client, example_structures): assert attr.get('_nmd_atoms') == ['H', 'O'] assert '_nmd_dft_system' in attr assert '_nmd_doesnotexist' not in attr + + +def test_nmd_properties_include_all(client, example_structures): + all_fields = [f'_nmd_{name}' for name, _ in provider_specific_fields()] + rv = client.get(f'/optimade/structures/test_calc_id_1?response_fields={",".join(all_fields)}') + assert rv.status_code == 200 + data = rv.json() + assert data.get('data') is not None + attr = data['data'].get('attributes') + assert attr is not None + assert attr.get('_nmd_atoms') == ['H', 'O'] + assert '_nmd_dft_system' in attr + assert '_nmd_encyclopedia_material_formula' in attr