diff --git a/nomad/app/optimade/__init__.py b/nomad/app/optimade/__init__.py index f1f98b9f8274fe5ff19594ec78acbdc8bf0d51a9..00dec54515ca649fca61e449f88d60cacf17155f 100644 --- a/nomad/app/optimade/__init__.py +++ b/nomad/app/optimade/__init__.py @@ -2,6 +2,10 @@ import os import sys import importlib +import warnings + +warnings.filterwarnings('ignore', message=r'v0\.17 of the `optimade` package.*') + # patch optimade python tools config (patched module most be outside this module to force import before optimade) os.environ['OPTIMADE_CONFIG_FILE'] = os.path.join(os.path.dirname(__file__), 'optimade_config.json') @@ -13,7 +17,25 @@ sys.modules['optimade.server.logger'] = importlib.import_module('nomad.app.optim from nomad import config, utils # nopep8 from optimade.server.config import CONFIG # nopep8 CONFIG.root_path = '%s/optimade' % config.services.api_base_path -CONFIG.base_url = config.api_url(api='optimade') +CONFIG.base_url = '%s://%s' % ( + 'https' if config.services.https else 'http', + config.services.api_host.strip('/')) + + +from .common import provider_specific_fields, create_provider_field # nopep8 + + +CONFIG.provider_fields = dict( + structures=[ + create_provider_field(name, quantity.annotation.definition) + for name, quantity in provider_specific_fields().items() + ] + [ + dict(name='archive_url', description='', type='string', sortable=False), + dict(name='entry_page_url', description='', type='string', sortable=False), + dict(name='raw_file_download_url', description='', type='string', sortable=False) + ] +) + from optimade.server import main as optimade # nopep8 from optimade.server.routers import structures # nopep8 @@ -62,5 +84,13 @@ def general_exception(request, exc, status_code=500, **kwargs): setattr(exception_handlers, 'general_exception', general_exception) + +@optimade.app.on_event('startup') +async def startup_event(): + from optimade.server.warnings import OptimadeWarning + import warnings + + warnings.filterwarnings('ignore', category=OptimadeWarning) + # "export" the app object optimade_app = optimade.app diff --git a/nomad/app/optimade/common.py b/nomad/app/optimade/common.py index 66d8f97eb69ff5f63297603f9a7893abef050582..cbc55710665aad1600d43b39c22ee9895c4cf510 100644 --- a/nomad/app/optimade/common.py +++ b/nomad/app/optimade/common.py @@ -17,14 +17,44 @@ # from typing import Dict, cast +import numpy as np -from nomad.metainfo.metainfo import Quantity, Reference +from nomad.metainfo.metainfo import Quantity, Reference, Datetime, MEnum from nomad.metainfo.elasticsearch_extension import SearchQuantity, entry_type _provider_specific_fields: Dict[str, SearchQuantity] = None +def create_provider_field(name, definition): + type = None + if not definition.is_scalar: + type = 'list' + elif definition.type == str or isinstance(definition.type, MEnum): + type = 'string' + elif definition.type == bool: + type = 'boolean' + elif definition.type == Datetime: + type = 'timestamp' + elif isinstance(definition.type, np.dtype) or definition.type == float: + type = 'float' + elif definition.type == int: + type = 'integer' + else: + raise NotImplementedError( + f'Optimade provider field with NOMAD type {definition.type} not implemented.') + + description = definition.description + if not description: + description = 'no description available' + + return dict( + name=name, + description=description, + type=type, + sortable=False) + + def provider_specific_fields() -> Dict[str, SearchQuantity]: global _provider_specific_fields diff --git a/nomad/app/optimade/elasticsearch.py b/nomad/app/optimade/elasticsearch.py index 2215d63db91b93e86a2d08fcac85271272add21c..1e5e4ce695c5e3a46ffdcb9545c10309d59af24b 100644 --- a/nomad/app/optimade/elasticsearch.py +++ b/nomad/app/optimade/elasticsearch.py @@ -1,23 +1,17 @@ -from typing import Optional, Tuple, List, Union, Dict, Set, Any +from typing import List, Dict, Set, Any from elasticsearch_dsl import Q -from fastapi import HTTPException -from pydantic import create_model -from datetime import datetime -import numpy as np from optimade.filterparser import LarkParser from optimade.server.entry_collections import EntryCollection -from optimade.server.query_params import EntryListingQueryParams, SingleEntryQueryParams from optimade.server.exceptions import BadRequest from optimade.server.mappers import StructureMapper -from optimade.models import StructureResource, StructureResourceAttributes -from optimade.models.utils import OptimadeField, SupportLevel -from optimade.server.schemas import ENTRY_INFO_SCHEMAS +from optimade.server.mappers.entries import classproperty +from optimade.models import StructureResource from nomad.units import ureg from nomad.search import search from nomad.app.v1.models import MetadataPagination, MetadataRequired -from nomad import datamodel, files, utils, metainfo, config +from nomad import datamodel, files, utils, config from nomad.normalizing.optimade import ( optimade_chemical_formula_reduced, optimade_chemical_formula_anonymous, optimade_chemical_formula_hill) @@ -27,94 +21,31 @@ from .common import provider_specific_fields logger = utils.get_logger(__name__) -float64 = np.dtype('float64') -int64 = np.dtype('int64') -int32 = np.dtype(np.int32) - - -class StructureResourceAttributesByAlias(StructureResourceAttributes): - nmd_entry_page_url: Optional[str] = OptimadeField( - None, - alias='_nmd_entry_page_url', - description='The url for the NOMAD gui entry page for this structure.', - support=SupportLevel.OPTIONAL) - - nmd_raw_file_download_url: Optional[str] = OptimadeField( - None, - alias='_nmd_raw_file_download_url', - description='The url to download all entry raw files as .zip file.', - support=SupportLevel.OPTIONAL) - - nmd_archive_url: Optional[str] = OptimadeField( - None, - alias='_nmd_archive_url', - description='The url to the NOMAD archive json of this structure.', - support=SupportLevel.OPTIONAL) - - def dict(self, *args, **kwargs): - kwargs['by_alias'] = True - return super().dict(*args, **kwargs) - - -def create_nomad_structure_resource_attributes_cls(): - fields: Dict[str, Tuple[type, OptimadeField]] = {} - - for name, search_quantity in provider_specific_fields().items(): - quantity = search_quantity.definition - - pydantic_type: type - if not quantity.is_scalar: - pydantic_type = list - elif quantity.type == int32: - pydantic_type = int - elif quantity.type in [str, int, float, bool]: - if quantity.type == float64: - pydantic_type = float - elif quantity.type == int64: - pydantic_type = int - else: - pydantic_type = quantity.type - elif quantity.type == metainfo.Datetime: - pydantic_type = datetime - elif isinstance(quantity.type, metainfo.MEnum): - pydantic_type = str - elif isinstance(quantity.type, metainfo.Reference): - continue - else: - raise NotImplementedError('Search quantity type not support in optimade API') - - field = Optional[pydantic_type], OptimadeField( - None, - alias=f'_nmd_{name}', - sortable=False, - description=quantity.description if quantity.description else 'Not available. Will be added soon.', - support=SupportLevel.OPTIONAL, - queryable=SupportLevel.OPTIONAL) - - fields[f'nmd_{name}'] = field - - return create_model( - 'NomadStructureResourceAttributes', - __base__=StructureResourceAttributesByAlias, - **fields) - -NomadStructureResourceAttributes = create_nomad_structure_resource_attributes_cls() +class NomadStructureMapper(StructureMapper): + @classmethod + def deserialize(cls, results): + # We are not doing this here, but will do it in the overwritten StructureCollection + # find method below + return results -class NomadStructureResource(StructureResource): - attributes: NomadStructureResourceAttributes # type: ignore + @classproperty + def ALL_ATTRIBUTES(cls) -> Set[str]: # pylint: disable=no-self-argument + result = getattr(cls, '_ALL_ATTRIBUTES', None) + if result is None: + result = StructureMapper.ALL_ATTRIBUTES # pylint: disable=no-member + cls._ALL_ATTRIBUTES = result - -ENTRY_INFO_SCHEMAS['structures'] = NomadStructureResource.schema + return result class StructureCollection(EntryCollection): def __init__(self): super().__init__( - resource_cls=NomadStructureResource, - resource_mapper=StructureMapper, + resource_cls=StructureResource, + resource_mapper=NomadStructureMapper, transformer=get_transformer(without_prefix=False)) self.parser = LarkParser(version=(1, 0, 0), variant="default") @@ -137,36 +68,29 @@ class StructureCollection(EntryCollection): # This seams solely mongodb specific raise NotImplementedError() - def find( - self, - params: Union[EntryListingQueryParams, SingleEntryQueryParams]) \ - -> Tuple[List[StructureResource], int, bool, set]: - - criteria = self.handle_query_params(params) - single_entry = isinstance(params, SingleEntryQueryParams) - response_fields = criteria.pop("fields") - - results, data_returned, more_data_available = self._run_db_query( - criteria, single_entry=isinstance(params, SingleEntryQueryParams) - ) + def find(self, params): - results = self._es_to_optimade_results(results, response_fields=response_fields) - - if single_entry: - results = results[0] if results else None + ( + results, + data_returned, + more_data_available, + exclude_fields, + include_fields + ) = super().find(params) - if data_returned > 1: - raise HTTPException( - status_code=404, - detail=f'Instead of a single entry, {data_returned} entries were found') + if isinstance(results, list): + results = self._es_to_optimade_results(results, response_fields=include_fields) + else: + results = self._es_to_optimade_result(results, response_fields=include_fields) - exclude_fields = self.all_fields - response_fields + results = StructureMapper.deserialize(results) return ( results, data_returned, more_data_available, exclude_fields, + include_fields ) def _check_aliases(self, aliases): @@ -175,7 +99,10 @@ class StructureCollection(EntryCollection): def _es_to_optimade_result( self, es_result: dict, response_fields: Set[str], - upload_files_cache: Dict[str, files.UploadFiles]) -> StructureResource: + upload_files_cache: Dict[str, files.UploadFiles] = None) -> StructureResource: + + if upload_files_cache is None: + upload_files_cache = {} entry_id, upload_id = es_result['entry_id'], es_result['upload_id'] upload_files = upload_files_cache.get(upload_id) @@ -195,22 +122,19 @@ class StructureCollection(EntryCollection): return None entry_archive_reader = archive_reader[entry_id] - archive = datamodel.EntryArchive( - metadata=datamodel.EntryMetadata.m_from_dict( - entry_archive_reader['metadata'].to_dict()) - ) + archive = { + 'metadata': entry_archive_reader['metadata'].to_dict()} # Lazy load results if only if results provider specfic field is requested def get_results(): - if not archive.results: - archive.results = datamodel.Results.m_from_dict( - entry_archive_reader['results'].to_dict()) - return archive.results + if 'results' not in archive: + archive['results'] = entry_archive_reader['results'].to_dict() - attrs = archive.metadata.optimade.m_to_dict() + attrs = archive['metadata'].get('optimade', {}) attrs['immutable_id'] = entry_id - attrs['last_modified'] = archive.metadata.upload_create_time + attrs['id'] = entry_id + attrs['last_modified'] = archive['metadata']['upload_create_time'] # TODO this should be removed, once all data is reprocessed with the right normalization attrs['chemical_formula_reduced'] = optimade_chemical_formula_reduced( @@ -233,15 +157,15 @@ class StructureCollection(EntryCollection): continue if request_field == '_nmd_archive_url': - attrs[request_field] = config.api_url() + f'/archive/{upload_id}/{entry_id}' + attrs[request_field[5:]] = config.api_url() + f'/archive/{upload_id}/{entry_id}' continue if request_field == '_nmd_entry_page_url': - attrs[request_field] = config.gui_url(f'entry/id/{upload_id}/{entry_id}') + attrs[request_field[5:]] = config.gui_url(f'entry/id/{upload_id}/{entry_id}') continue if request_field == '_nmd_raw_file_download_url': - attrs[request_field] = config.api_url() + f'/raw/calc/{upload_id}/{entry_id}' + attrs[request_field[5:]] = config.api_url() + f'/raw/calc/{upload_id}/{entry_id}' continue search_quantity = provider_specific_fields().get(request_field[5:]) @@ -261,7 +185,7 @@ class StructureCollection(EntryCollection): value = None break section = section[0] - value = getattr(section, segment) + value = section[segment] section = value # Empty values are not stored and only the magnitude of @@ -269,18 +193,14 @@ class StructureCollection(EntryCollection): if value is not None: if isinstance(value, ureg.Quantity): value = value.magnitude - attrs[request_field] = value + attrs[request_field[5:]] = value except Exception: # TODO there a few things that can go wrong. Most notable the search # quantity might have a path with repeated sections. This won't be # handled right now. pass - return self.resource_cls( - type='structures', - id=entry_id, - attributes=attrs, - relationships=None) + return attrs def _es_to_optimade_results(self, es_results: List[dict], response_fields: Set[str]): upload_files_cache: Dict[str, files.UploadFiles] = {} diff --git a/nomad/app/optimade/filterparser.py b/nomad/app/optimade/filterparser.py index 4ab273ee8389962435935269207bbfecbbaf359f..1a58b605faa3d45996e97732a90b8f3e3bb6102c 100644 --- a/nomad/app/optimade/filterparser.py +++ b/nomad/app/optimade/filterparser.py @@ -22,12 +22,12 @@ from cachetools import cached from optimade.filterparser import LarkParser from optimade.filtertransformers.elasticsearch import ( - Quantity, ElasticTransformer as OPTElasticTransformer, _cmp_operators) + ElasticsearchQuantity as Quantity, ElasticTransformer as OPTElasticTransformer) from .common import provider_specific_fields -_parser = LarkParser(version=(0, 10, 1)) +_parser = LarkParser(version=(1, 0, 1)) class FilterException(Exception): @@ -40,16 +40,16 @@ def _get_transformer(without_prefix): from nomad.datamodel import OptimadeEntry quantities: Dict[str, Quantity] = { q.name: Quantity( - q.name, es_field='optimade.%s' % q.name, + q.name, backend_field='optimade.%s' % q.name, elastic_mapping_type=q.a_elasticsearch.mapping['type']) for q in OptimadeEntry.m_def.all_quantities.values() if 'elasticsearch' in q.m_annotations} - quantities['id'] = Quantity('id', es_field='entry_id', elastic_mapping_type='keyword') - quantities['immutable_id'] = Quantity('immutable_id', es_field='entry_id', elastic_mapping_type='keyword') + quantities['id'] = Quantity('id', backend_field='entry_id', elastic_mapping_type='keyword') + quantities['immutable_id'] = Quantity('immutable_id', backend_field='entry_id', elastic_mapping_type='keyword') quantities['last_modified'] = Quantity( - 'last_modified', es_field='upload_create_time', elastic_mapping_type='date') + 'last_modified', backend_field='upload_create_time', elastic_mapping_type='date') quantities['elements'].length_quantity = quantities['nelements'] quantities['elements'].nested_quantity = quantities['elements_ratios'] @@ -64,10 +64,10 @@ def _get_transformer(without_prefix): if name not in quantities: quantities[name] = Quantity( name, - es_field=search_quantity.search_field, + backend_field=search_quantity.search_field, elastic_mapping_type=search_quantity.mapping['type']) - return ElasticTransformer(quantities=quantities.values()) + return ElasticTransformer(quantities=quantities) def parse_filter(filter_str: str, without_prefix=False) -> Q: @@ -104,8 +104,8 @@ class ElasticTransformer(OPTElasticTransformer): operator, and value """ field = self._field(quantity, nested=nested) - if op in _cmp_operators: - return Q("range", **{field: {_cmp_operators[op]: value}}) + if op in self.operator_map: + return Q("range", **{field: {self.operator_map[op]: value}}) if quantity.elastic_mapping_type == 'text': query_type = "match" @@ -136,7 +136,7 @@ class ElasticTransformer(OPTElasticTransformer): raise Exception('HAS ONLY is not supported by %s' % quantity.name) has_all = super()._has_query_op(quantities, 'HAS ALL', predicate_zip_list) - has_length = Q('term', **{quantity.length_quantity.es_field: len(predicate_zip_list)}) + has_length = Q('term', **{quantity.length_quantity.backend_field: len(predicate_zip_list)}) return has_all & has_length else: diff --git a/nomad/config.py b/nomad/config.py index c4299b1e5bdadad1c3896657f379b86e7cd9a956..d1fdbb802d997f3c051864a1710f17e0028c24cb 100644 --- a/nomad/config.py +++ b/nomad/config.py @@ -56,6 +56,7 @@ except ImportError: warnings.filterwarnings('ignore', message='numpy.dtype size changed') warnings.filterwarnings('ignore', message='numpy.ufunc size changed') +warnings.filterwarnings('ignore', category=DeprecationWarning) class NomadConfig(dict): diff --git a/nomad/normalizing/optimade.py b/nomad/normalizing/optimade.py index 17476003e751658140760aeb56acb6ecef4746ba..be6696ce4640e927c5bc893d11a66108b8acbf48 100644 --- a/nomad/normalizing/optimade.py +++ b/nomad/normalizing/optimade.py @@ -66,7 +66,7 @@ def optimade_chemical_formula_reduced(formula: str): return formula try: - ase_formula = ase.formula.Formula(formula).count() + ase_formula = ase.formula.Formula(formula).reduce()[0].count() result_formula = '' for element in sorted(ase_formula.keys()): result_formula += element diff --git a/requirements.txt b/requirements.txt index 78e4cdc6cdc136a0afabbe8223fd87d2278324a6..9936308ecd6a0204349f6b49dff0f32d6e2a224d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,13 +14,13 @@ docstring-parser==0.12 Pint==0.17 orjson==3.6.0 click==7.1.2 -requests==2.26.0 +requests==2.27.1 pytz==2021.1 aniso8601==7.0.0 ase==3.19.0 python-keycloak==0.26.1 elasticsearch-dsl==7.4.0 -pydantic==1.8.2 +pydantic==1.9.1 jmespath==0.10.0 httpx==0.22.0 memoization==0.4.0 @@ -46,13 +46,13 @@ asr==0.4.1 bitarray==2.3.5 # [infrastructure] -optimade[mongo]==0.14.0 +optimade[mongo]==0.18.0 structlog==20.1.0 elasticsearch==7.17.1 msgpack==0.5.6 celery[redis]==4.4.7 mongoengine==0.19.1 -pymongo==3.11.4 +pymongo==3.12.1 Werkzeug==0.16.1 itsdangerous==1.1.0 passlib==1.7.4 @@ -80,7 +80,7 @@ python-json-logger==2.0.2 recommonmark==0.7.1 jinja2==2.11.3 rdflib==5.0.0 -fastapi==0.63.0 +fastapi==0.65.3 uvicorn[standard]==0.13.4 python-multipart==0.0.5 validators==0.18.2 diff --git a/tests/app/test_optimade.py b/tests/app/test_optimade.py index c329dded451f82bdee1228544747695943e374df..56b8dd275c59dd98ac27bf1504687640ffa57566 100644 --- a/tests/app/test_optimade.py +++ b/tests/app/test_optimade.py @@ -306,7 +306,7 @@ def test_nmd_properties(client, example_structures): assert attr.get('_nmd_results_material_elements') == ['H', 'O'] assert '_nmd_results_material_structural_type' in attr - assert '_nmd_doesnotexist' not in attr + assert attr['_nmd_doesnotexist'] is None assert '_nmd_archive_url' in attr