diff --git a/gui/src/components/api.js b/gui/src/components/api.js index 0a9382f28cc0df91618ffd219535e124758cd215..aa381d0e609da2216570446aa4920c4c01f39ad0 100644 --- a/gui/src/components/api.js +++ b/gui/src/components/api.js @@ -282,7 +282,7 @@ class Api { const result = await this.axios.post( `${searchTarget}/query`, { - exclude: ['atoms', 'only_atoms', 'files', 'dft.quantities', 'dft.optimade', 'dft.labels', 'dft.geometries'], + exclude: ['atoms', 'only_atoms', 'files', 'quantities', 'dft.quantities', 'optimade', 'dft.labels', 'dft.geometries'], ...search }, auth diff --git a/gui/src/components/domainData.js b/gui/src/components/domainData.js index 012e7783a3813b520934f2193bcb0fc29f005dd0..674b556794b4a985642d03ceb5f9194e7a10abfd 100644 --- a/gui/src/components/domainData.js +++ b/gui/src/components/domainData.js @@ -91,8 +91,8 @@ export const domainData = ({ mainfile: {}, calc_hash: {}, formula: {}, - 'dft.optimade': {}, - 'dft.quantities': {}, + optimade: {}, + quantities: {}, 'dft.spacegroup': {}, 'dft.spacegroup_symbol': {}, 'dft.labels': {}, diff --git a/nomad/app/flask/api/common.py b/nomad/app/flask/api/common.py index 31eeb0a6ffd3f5c3e3cea8a06b1470b33bb6223f..115fc8c930a645bdcdd7f3990e1704137d9ffafb 100644 --- a/nomad/app/flask/api/common.py +++ b/nomad/app/flask/api/common.py @@ -179,7 +179,7 @@ def add_search_parameters(request_parser): 'until_time', type=lambda x: rfc3339DateTime.parse(x), help='A yyyy-MM-ddTHH:mm:ss (RFC3339) maximum entry time (e.g. upload time)') request_parser.add_argument( - 'dft.optimade', type=str, + 'optimade', type=str, help='A search query in the optimade filter language.') request_parser.add_argument( 'query', type=str, @@ -229,7 +229,7 @@ def apply_search_parameters(search_request: search.SearchRequest, args: Dict[str # optimade try: - optimade = args.get('dft.optimade', None) + optimade = args.get('optimade', None) if optimade is not None: q = filterparser.parse_filter( optimade, nomad_properties=domain, without_prefix=True) @@ -418,7 +418,7 @@ def _filter_api_query(query): if key in search.search_quantities and (key != 'domain' or value != config.meta.default_domain) } - for key in ['dft.optimade']: + for key in ['optimade']: if key in query: result[key] = query[key] diff --git a/nomad/app/optimade/common.py b/nomad/app/optimade/common.py index 1b422cfbb25b325034b567aa88ae6f5f061e64b7..66d8f97eb69ff5f63297603f9a7893abef050582 100644 --- a/nomad/app/optimade/common.py +++ b/nomad/app/optimade/common.py @@ -15,27 +15,44 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from typing import Tuple, Generator, cast + +from typing import Dict, cast from nomad.metainfo.metainfo import Quantity, Reference -from nomad.metainfo.search_extension import Search -from nomad.search.v0 import search_quantities +from nomad.metainfo.elasticsearch_extension import SearchQuantity, entry_type + + +_provider_specific_fields: Dict[str, SearchQuantity] = None + +def provider_specific_fields() -> Dict[str, SearchQuantity]: + global _provider_specific_fields -def provider_specific_fields() -> Generator[Tuple[str, Search], None, None]: - for search_quantity in search_quantities.values(): + if _provider_specific_fields is not None: + return _provider_specific_fields + + _provider_specific_fields = {} + + if len(entry_type.quantities) == 0: + # TODO this is necessary, because the mappings are only created after the + # ES index with initialized in infrastructure. But this is called during + # optimade import. Detangle mapping creation from index creation! + from nomad.datamodel.datamodel import EntryArchive + entry_type.create_mapping(EntryArchive.m_def) + + for qualified_name, search_quantity in entry_type.quantities.items(): quantity = cast(Quantity, search_quantity.definition) if isinstance(quantity.type, Reference): # we can't yet support those continue - nmd_name = search_quantity.qualified_name + nmd_name = qualified_name nmd_name_split = nmd_name.split('.') if len(nmd_name_split) == 1: # plain metadata pass - elif not nmd_name_split[0] in ['dft', 'encyclopedia']: + elif not nmd_name_split[0] in ['results']: # other domains fields that do not make sense in the optimade context continue elif len(nmd_name_split) > 2 and nmd_name_split[1] == 'optimade': @@ -43,4 +60,6 @@ def provider_specific_fields() -> Generator[Tuple[str, Search], None, None]: continue opt_name = nmd_name.replace('.', '_') - yield opt_name, search_quantity + _provider_specific_fields[opt_name] = search_quantity + + return _provider_specific_fields diff --git a/nomad/app/optimade/elasticsearch.py b/nomad/app/optimade/elasticsearch.py index a96da7fcb68786e81f72e62909ad6be3ae0c60a0..377c2cce1d698913ef5f3944627f38eeeafec491 100644 --- a/nomad/app/optimade/elasticsearch.py +++ b/nomad/app/optimade/elasticsearch.py @@ -1,7 +1,7 @@ from typing import Optional, Tuple, List, Union, Dict, Set, Any +from elasticsearch_dsl import Q from fastapi import HTTPException from pydantic import create_model -from elasticsearch_dsl import Search, Q from datetime import datetime import numpy as np @@ -14,7 +14,8 @@ from optimade.models import StructureResource, StructureResourceAttributes from optimade.models.utils import OptimadeField, SupportLevel from optimade.server.schemas import ENTRY_INFO_SCHEMAS -from nomad.search import v0 as search +from nomad.search.v1 import search +from nomad.app.v1.models import MetadataPagination, MetadataRequired from nomad import datamodel, files, utils, metainfo, config from nomad.normalizing.optimade import ( optimade_chemical_formula_reduced, optimade_chemical_formula_anonymous, @@ -26,6 +27,8 @@ from .common import provider_specific_fields logger = utils.get_logger(__name__) float64 = np.dtype('float64') +int64 = np.dtype('int64') +int32 = np.dtype(np.int32) class StructureResourceAttributesByAlias(StructureResourceAttributes): @@ -55,14 +58,21 @@ class StructureResourceAttributesByAlias(StructureResourceAttributes): def create_nomad_structure_resource_attributes_cls(): fields: Dict[str, Tuple[type, OptimadeField]] = {} - for name, search_quantity in provider_specific_fields(): + for name, search_quantity in provider_specific_fields().items(): quantity = search_quantity.definition pydantic_type: type if not quantity.is_scalar: pydantic_type = list + elif quantity.type == int32: + pydantic_type = int elif quantity.type in [str, int, float, bool]: - pydantic_type = quantity.type if quantity.type != float64 else float + if quantity.type == float64: + pydantic_type = float + elif quantity.type == int64: + pydantic_type = int + else: + pydantic_type = quantity.type elif quantity.type == metainfo.Datetime: pydantic_type = datetime elif isinstance(quantity.type, metainfo.MEnum): @@ -112,18 +122,15 @@ class StructureCollection(EntryCollection): self._check_aliases(self.resource_mapper.all_aliases()) self._check_aliases(self.resource_mapper.all_length_aliases()) - self.client = Search - - def _base_search_request(self): - request = search.SearchRequest().owner('public', None) - request.search_parameter('processed', True) - # TODO use the elastic annotations when done - request.query(Q('exists', field='dft.optimade.elements')) - return request + def _base_search_query(self) -> Q: + return Q('exists', field='optimade.elements') & Q('term', processed=True) def __len__(self) -> int: # TODO cache - return self._base_search_request().execute()['total'] + return search( + owner='public', + query=self._base_search_query(), + pagination=MetadataPagination(page_size=0)).pagination.total def count(self, **kwargs) -> int: # This seams solely mongodb specific @@ -169,7 +176,7 @@ class StructureCollection(EntryCollection): response_fields: Set[str], upload_files_cache: Dict[str, files.UploadFiles]) -> StructureResource: - calc_id, upload_id = es_result['calc_id'], es_result['upload_id'] + entry_id, upload_id = es_result['entry_id'], es_result['upload_id'] upload_files = upload_files_cache.get(upload_id) if upload_files is None: @@ -181,18 +188,28 @@ class StructureCollection(EntryCollection): upload_files_cache[upload_id] = upload_files try: - archive = upload_files.read_archive(calc_id) + archive_reader = upload_files.read_archive(entry_id) except KeyError: - logger.error('missing archive entry', upload_id=upload_id, calc_id=calc_id) + logger.error('missing archive entry', upload_id=upload_id, entry_id=entry_id) return None - metadata = archive[calc_id]['metadata'].to_dict() - entry = datamodel.EntryMetadata.m_from_dict(metadata) + entry_archive_reader = archive_reader[entry_id] + archive = datamodel.EntryArchive( + metadata=datamodel.EntryMetadata.m_from_dict( + entry_archive_reader['metadata'].to_dict()) + ) + + # Lazy load results if only if results provider specfic field is requested + def get_results(): + if not archive.results: + archive.results = datamodel.Results.m_from_dict( + entry_archive_reader['results'].to_dict()) + return archive.results - attrs = entry.dft.optimade.m_to_dict() + attrs = archive.metadata.optimade.m_to_dict() - attrs['immutable_id'] = calc_id - attrs['last_modified'] = entry.last_processing if entry.last_processing is not None else entry.upload_time + attrs['immutable_id'] = entry_id + attrs['last_modified'] = archive.metadata.last_processing if archive.metadata.last_processing is not None else archive.metadata.upload_time # TODO this should be removed, once all data is reprocessed with the right normalization attrs['chemical_formula_reduced'] = optimade_chemical_formula_reduced( @@ -215,29 +232,42 @@ class StructureCollection(EntryCollection): continue if request_field == '_nmd_archive_url': - attrs[request_field] = config.api_url() + f'/archive/{upload_id}/{calc_id}' + attrs[request_field] = config.api_url() + f'/archive/{upload_id}/{entry_id}' continue if request_field == '_nmd_entry_page_url': - attrs[request_field] = config.gui_url(f'entry/id/{upload_id}/{calc_id}') + attrs[request_field] = config.gui_url(f'entry/id/{upload_id}/{entry_id}') continue if request_field == '_nmd_raw_file_download_url': - attrs[request_field] = config.api_url() + f'/raw/calc/{upload_id}/{calc_id}' + attrs[request_field] = config.api_url() + f'/raw/calc/{upload_id}/{entry_id}' + continue + + search_quantity = provider_specific_fields().get(request_field[5:]) + if search_quantity is None: + # if unknown properties where provided, we will ignore them as per + # optimade spec continue try: - if request_field.startswith('_nmd_dft_'): - attrs[request_field] = getattr(entry.dft, request_field[9:]) - else: - attrs[request_field] = getattr(entry, request_field[5:]) - except AttributeError: - # if unknown properties where provided, we will ignore them + path = search_quantity.qualified_name.split('.') + if path[0] == 'results': + get_results() + section = archive + for segment in path: + value = getattr(section, segment) + section = value + + attrs[request_field] = value + except Exception: + # TODO there a few things that can go wrong. Most notable the search + # quantity might have a path with repeated sections. This won't be + # handles right now. pass return self.resource_cls( type='structures', - id=entry.calc_id, + id=entry_id, attributes=attrs, relationships=None) @@ -266,19 +296,26 @@ class StructureCollection(EntryCollection): if not sort_quantity_a_optimade.sortable: raise BadRequest(detail='Unable to sort on field %s' % sort) - search_request = self._base_search_request().include('calc_id', 'upload_id') + search_query = self._base_search_query() + + filter = criteria.get('filter') + if filter: + search_query &= filter - if criteria.get("filter", False): - search_request.query(criteria["filter"]) + es_response = search( + owner='public', + query=search_query, + required=MetadataRequired(include=['entry_id', 'upload_id']), + pagination=MetadataPagination( + page_size=criteria['limit'], + page_offset=criteria.get('skip', 0), + order='asc' if order == 1 else 'desc', + order_by=f'optimade.{sort}' + )) - es_response = search_request.execute_paginated( - page_offset=criteria.get('skip', 0), - per_page=criteria['limit'], - order=order, - order_by='dft.optimade.%s' % sort) - results = es_response['results'] + results = es_response.data - data_returned = es_response['pagination']['total'] + data_returned = es_response.pagination.total more_data_available = data_returned >= criteria.get('skip', 0) + criteria['limit'] return results, data_returned, more_data_available diff --git a/nomad/app/optimade/filterparser.py b/nomad/app/optimade/filterparser.py index b0e43155ae05bd8f53c8bb8ac7062323fc1f6f00..24300051468a3f7320b3af28e15e145e327b4479 100644 --- a/nomad/app/optimade/filterparser.py +++ b/nomad/app/optimade/filterparser.py @@ -17,12 +17,12 @@ # from typing import Dict -from elasticsearch_dsl import Q, Date +from elasticsearch_dsl import Q from cachetools import cached from optimade.filterparser import LarkParser from optimade.filtertransformers.elasticsearch import ( - Quantity, ElasticTransformer as OPTElasticTransformer) + Quantity, ElasticTransformer as OPTElasticTransformer, _cmp_operators) from optimade.models import CHEMICAL_SYMBOLS, ATOMIC_NUMBERS from .common import provider_specific_fields @@ -41,24 +41,24 @@ def _get_transformer(nomad_properties, without_prefix): from nomad.datamodel import OptimadeEntry quantities: Dict[str, Quantity] = { q.name: Quantity( - q.name, es_field='dft.optimade.%s' % q.name, - elastic_mapping_type=q.a_search.mapping.__class__) + q.name, es_field='optimade.%s' % q.name, + elastic_mapping_type=q.a_elasticsearch.mapping['type']) for q in OptimadeEntry.m_def.all_quantities.values() - if 'search' in q.m_annotations} + if 'elasticsearch' in q.m_annotations} - quantities['id'] = Quantity('id', es_field='calc_id') - quantities['immutable_id'] = Quantity('immutable_id', es_field='calc_id') + quantities['id'] = Quantity('id', es_field='entry_id', elastic_mapping_type='keyword') + quantities['immutable_id'] = Quantity('immutable_id', es_field='entry_id', elastic_mapping_type='keyword') quantities['last_modified'] = Quantity( - 'last_modified', es_field='upload_time', elastic_mapping_type=Date) + 'last_modified', es_field='upload_time', elastic_mapping_type='date') quantities['elements'].length_quantity = quantities['nelements'] - quantities['elements'].has_only_quantity = Quantity(name='only_atoms') + quantities['elements'].has_only_quantity = Quantity(name='only_atoms', elastic_mapping_type='keyword') quantities['elements'].nested_quantity = quantities['elements_ratios'] quantities['elements_ratios'].nested_quantity = quantities['elements_ratios'] if nomad_properties is not None: - for name, search_quantity in provider_specific_fields(): + for name, search_quantity in provider_specific_fields().items(): names = ['_nmd_' + name] if without_prefix: names.append(name) @@ -68,7 +68,7 @@ def _get_transformer(nomad_properties, without_prefix): quantities[name] = Quantity( name, es_field=search_quantity.search_field, - elastic_mapping_type=search_quantity.mapping.__class__) + elastic_mapping_type=search_quantity.mapping['type']) return ElasticTransformer(quantities=quantities.values()) @@ -101,6 +101,30 @@ def parse_filter(filter_str: str, nomad_properties='dft', without_prefix=False) class ElasticTransformer(OPTElasticTransformer): + def _query_op(self, quantity, op, value, nested=None): + """ + Return a range, match, or term query for the given quantity, comparison + operator, and value + """ + field = self._field(quantity, nested=nested) + if op in _cmp_operators: + return Q("range", **{field: {_cmp_operators[op]: value}}) + + if quantity.elastic_mapping_type == 'text': + query_type = "match" + elif quantity.elastic_mapping_type in ['keyword', 'integer', 'float', 'bool']: + query_type = "term" + else: + raise NotImplementedError("Quantity has unsupported ES field type") + + if op in ["=", ""]: + return Q(query_type, **{field: value}) + + if op == "!=": + return ~Q( # pylint: disable=invalid-unary-operand-type + query_type, **{field: value} + ) + def _has_query_op(self, quantities, op, predicate_zip_list): # We override this to add 'HAS ONLY' support. if op == 'HAS ONLY': diff --git a/nomad/datamodel/datamodel.py b/nomad/datamodel/datamodel.py index 359c203091efb8e11bef680c6006437a6a805bc5..dfbf2488bfa12ef6f0509af667aefced655f3d85 100644 --- a/nomad/datamodel/datamodel.py +++ b/nomad/datamodel/datamodel.py @@ -34,6 +34,7 @@ from nomad.metainfo.elasticsearch_extension import Elasticsearch, material_entry from .dft import DFTMetadata from .ems import EMSMetadata from .qcms import QCMSMetadata +from .optimade import OptimadeEntry # This is usually defined automatically when the first metainfo definition is evaluated, but # due to the next imports requireing the m_package already, this would be too late. @@ -665,6 +666,11 @@ class EntryMetadata(metainfo.MSection): description='The number of atoms in the entry\'s material', a_search=Search()) + optimade = metainfo.SubSection( + sub_section=OptimadeEntry, + description='Metadata used for the optimade API.', + a_elasticsearch=Elasticsearch(entry_type)) + n_quantities = metainfo.Quantity( type=int, default=0, description='Number of metainfo quantities parsed from the entry.') diff --git a/nomad/datamodel/dft.py b/nomad/datamodel/dft.py index 25f13a382ca4259fc3855cf9f7b3f7ef477681ce..d9375bb032419437b56d95b1d0dc796683bfb52f 100644 --- a/nomad/datamodel/dft.py +++ b/nomad/datamodel/dft.py @@ -26,7 +26,6 @@ from nomad import config, utils from nomad.metainfo import MSection, Section, Quantity, MEnum, SubSection from nomad.metainfo.search_extension import Search -from .optimade import OptimadeEntry from .metainfo.workflow import Workflow from .metainfo.common import FastAccess from .metainfo.simulation.run import Run @@ -265,11 +264,6 @@ class DFTMetadata(MSection): many_and='append', statistic_size=20, statistic_order='_count')) - optimade = SubSection( - sub_section=OptimadeEntry, - description='Metadata used for the optimade API.', - a_search=Search()) - workflow = Quantity(type=Workflow, a_search=Search()) def code_name_from_parser(self): diff --git a/nomad/datamodel/optimade.py b/nomad/datamodel/optimade.py index b42b5e0b9a7baf9127d34b515cbca518b8bf5148..c75da98610f492c2dca32896049753f125e04de6 100644 --- a/nomad/datamodel/optimade.py +++ b/nomad/datamodel/optimade.py @@ -17,31 +17,42 @@ # from ase.data import chemical_symbols -from elasticsearch_dsl import Keyword, Float, InnerDoc, Nested, Integer import numpy as np from nomad.units import ureg from nomad.metainfo import MSection, Section, Quantity, SubSection, MEnum, DefinitionAnnotation -from nomad.metainfo.search_extension import Search +from nomad.metainfo.elasticsearch_extension import Elasticsearch -# TODO move the module - def optimade_links(section: str): return [ 'https://github.com/Materials-Consortia/OPTiMaDe/blob/develop/optimade.md#%s' % section] -class ElementRatio(InnerDoc): - elements = Keyword() - elements_ratios = Float() +class ElementRatio: @staticmethod def from_structure_entry(entry: 'OptimadeEntry'): return [ - ElementRatio(elements=entry.elements[i], elements_ratios=entry.elements_ratios[i]) - for i in range(0, entry.nelements)] + { + 'elements': entry.elements[i], + 'elements_ratios': entry.elements_ratios[i] + } + for i in range(0, entry.nelements) + ] + + mapping = { + 'type': 'nested', + 'properties': { + 'elements': { + 'type': 'keyword' + }, + 'elements_ratios': { + 'type': 'float' + } + } + } class Optimade(DefinitionAnnotation): @@ -124,7 +135,7 @@ class OptimadeEntry(MSection): elements = Quantity( type=MEnum(chemical_symbols), shape=['1..*'], links=optimade_links('h.6.2.1'), - a_search=Search(), + a_elasticsearch=Elasticsearch(), a_optimade=Optimade(query=True, entry=True, sortable=False, type='list'), description=''' Names of the different elements present in the structure. @@ -133,7 +144,7 @@ class OptimadeEntry(MSection): nelements = Quantity( type=int, default=0, links=optimade_links('h.6.2.2'), - a_search=Search(), + a_elasticsearch=Elasticsearch(), a_optimade=Optimade(query=True, entry=True, sortable=True, type='integer'), description=''' Number of different elements in the structure as an integer. @@ -142,7 +153,8 @@ class OptimadeEntry(MSection): elements_ratios = Quantity( type=float, shape=['nelements'], links=optimade_links('h.6.2.3'), - a_search=Search(mapping=Nested(ElementRatio), value=ElementRatio.from_structure_entry), + a_elasticsearch=Elasticsearch( + mapping=ElementRatio.mapping, value=ElementRatio.from_structure_entry), a_optimade=Optimade(query=True, entry=True, sortable=False, type='list'), description=''' Relative proportions of different elements in the structure. @@ -151,7 +163,7 @@ class OptimadeEntry(MSection): chemical_formula_descriptive = Quantity( type=str, links=optimade_links('h.6.2.4'), - a_search=Search(), + a_elasticsearch=Elasticsearch(), a_optimade=Optimade(query=True, entry=True, sortable=True, type='string'), description=''' The chemical formula for a structure as a string in a form chosen by the API @@ -161,7 +173,7 @@ class OptimadeEntry(MSection): chemical_formula_reduced = Quantity( type=str, links=optimade_links('h.6.2.5'), - a_search=Search(), + a_elasticsearch=Elasticsearch(), a_optimade=Optimade(query=True, entry=True, sortable=True, type='string'), description=''' The reduced chemical formula for a structure as a string with element symbols and @@ -171,7 +183,7 @@ class OptimadeEntry(MSection): chemical_formula_hill = Quantity( type=str, links=optimade_links('h.6.2.6'), - a_search=Search(), + a_elasticsearch=Elasticsearch(), a_optimade=Optimade(query=True, entry=False, sortable=True, type='string'), description=''' The chemical formula for a structure in Hill form with element symbols followed by @@ -181,7 +193,7 @@ class OptimadeEntry(MSection): chemical_formula_anonymous = Quantity( type=str, links=optimade_links('h.6.2.7'), - a_search=Search(), + a_elasticsearch=Elasticsearch(), a_optimade=Optimade(query=True, entry=True, sortable=True, type='string'), description=''' The anonymous formula is the chemical_formula_reduced, but where the elements are @@ -205,7 +217,7 @@ class OptimadeEntry(MSection): nperiodic_dimensions = Quantity( type=int, derived=lambda a: sum(a.dimension_types), links=optimade_links('h.6.2.8'), - a_search=Search(mapping=Integer()), + a_elasticsearch=Elasticsearch(), a_optimade=Optimade(query=True, entry=True, sortable=True, type='integer'), description=''' An integer specifying the number of periodic dimensions in the structure, equivalent @@ -232,7 +244,7 @@ class OptimadeEntry(MSection): nsites = Quantity( type=int, default=0, links=optimade_links('h.6.2.11'), - a_search=Search(), + a_elasticsearch=Elasticsearch(), a_optimade=Optimade(query=True, entry=True, sortable=True, type='integer'), description=''' An integer specifying the length of the cartesian_site_positions property. ''') @@ -240,7 +252,7 @@ class OptimadeEntry(MSection): species_at_sites = Quantity( type=str, shape=['nsites'], links=optimade_links('h.6.2.12'), - a_search=Search(), + a_elasticsearch=Elasticsearch(), a_optimade=Optimade(query=False, entry=True, sortable=False, type='list'), description=''' Name of the species at each site (where values for sites are specified with the same order of the cartesian_site_positions property). The properties of the species are @@ -252,7 +264,7 @@ class OptimadeEntry(MSection): structure_features = Quantity( type=MEnum(['disorder', 'unknown_positions', 'assemblies']), shape=['1..*'], default=[], links=optimade_links('h.6.2.15'), - a_search=Search(), + a_elasticsearch=Elasticsearch(), a_optimade=Optimade(query=True, entry=True, sortable=False, type='list'), description=''' A list of strings that flag which special features are used by the structure. diff --git a/nomad/metainfo/elasticsearch_extension.py b/nomad/metainfo/elasticsearch_extension.py index 60c3de57b2febf2df6900b47a1fbac473c238900..39dc8816007848ae96e44e50827004df8779f795 100644 --- a/nomad/metainfo/elasticsearch_extension.py +++ b/nomad/metainfo/elasticsearch_extension.py @@ -164,7 +164,7 @@ import re from nomad import config, utils from .metainfo import ( - Section, Quantity, MSection, MEnum, Datetime, Reference, DefinitionAnnotation, + MSectionBound, Section, Quantity, MSection, MEnum, Datetime, Reference, DefinitionAnnotation, Definition, QuantityReference) @@ -536,7 +536,7 @@ class Elasticsearch(DefinitionAnnotation): mapping: Union[str, Dict[str, Any]] = None, field: str = None, es_field: str = None, - value: Callable[[MSection], Any] = None, + value: Callable[[MSectionBound], Any] = None, index: bool = True, values: List[str] = None, default_aggregation_size: int = None, diff --git a/nomad/normalizing/optimade.py b/nomad/normalizing/optimade.py index 81644b50ca11977bd5595eb94114402875bb674c..5a575763a6db0d6c1f0185f1f8f683ae02092cf8 100644 --- a/nomad/normalizing/optimade.py +++ b/nomad/normalizing/optimade.py @@ -27,24 +27,25 @@ from collections import OrderedDict from nomad.normalizing.normalizer import SystemBasedNormalizer from nomad.units import ureg -from nomad.datamodel import OptimadeEntry, Species, DFTMetadata, EntryMetadata +from nomad.datamodel import OptimadeEntry, Species, EntryMetadata from nomad.datamodel.metainfo.simulation.system import Atoms, System species_re = re.compile(r'^([A-Z][a-z]?)(\d*)$') +# TODO this should be the default and not necessary def transform_to_v1(entry: EntryMetadata) -> EntryMetadata: ''' Transformation function to use during re-indexing of entries with outdated optimade format. Fixes formulas and periodic dimensions, removed entries with X in formula. ''' - optimade = entry.dft.optimade if entry.dft is not None else None + optimade = entry.optimade if optimade is None: return entry if optimade.chemical_formula_reduced is None or 'X' in optimade.chemical_formula_reduced: - entry.dft.m_remove_sub_section(DFTMetadata.optimade, -1) + entry.m_remove_sub_section(EntryMetadata.optimade, -1) return entry optimade.chemical_formula_reduced = optimade_chemical_formula_reduced(optimade.chemical_formula_reduced) @@ -133,9 +134,7 @@ class OptimadeNormalizer(SystemBasedNormalizer): if self.entry_archive.metadata is None: self.entry_archive.m_create(EntryMetadata) - if self.entry_archive.metadata.dft is None: - self.entry_archive.metadata.m_create(DFTMetadata) - optimade = self.entry_archive.metadata.dft.m_create(OptimadeEntry) + optimade = self.entry_archive.metadata.m_create(OptimadeEntry) def get_value(quantity_def, default: Any = None, numpy: bool = False, unit=None, source: Any = None) -> Any: try: diff --git a/nomad/normalizing/results.py b/nomad/normalizing/results.py index 21564ec775cf8f6c4a33e66437ade48f14603639..baed8bdb2735291899bb88828134f83c97c78acc 100644 --- a/nomad/normalizing/results.py +++ b/nomad/normalizing/results.py @@ -200,7 +200,7 @@ class ResultsNormalizer(Normalizer): except Exception: encyclopedia = None try: - optimade = self.entry_archive.metadata.dft.optimade + optimade = self.entry_archive.metadata.optimade except Exception: optimade = None diff --git a/ops/tests/loadtest_search.py b/ops/tests/loadtest_search.py index 1bdfa1f74447898bf3b387aacd058901d1ff2d54..cc64b10e50910f73962fd65f918836ecb0fbbbb5 100644 --- a/ops/tests/loadtest_search.py +++ b/ops/tests/loadtest_search.py @@ -50,12 +50,12 @@ chemical_symbols = [ # These are the API requests from the search UI with various tabs and statistics query_params = [ - 'page=1&per_page=10&order_by=upload_time&order=-1&domain=dft&owner=public&atoms=Co&statistics=atoms&exclude=atoms,only_atoms,dft.files,dft.quantities,dft.optimade,dft.labels,dft.geometries', - 'page=1&per_page=10&order_by=upload_time&order=-1&domain=dft&owner=public&statistics=dft.labels_springer_compound_class&statistics=dft.system&statistics=dft.crystal_system&statistics=dft.compound_type&exclude=atoms,only_atoms,dft.files,dft.quantities,dft.optimade,dft.labels,dft.geometries', - 'page=1&per_page=10&order_by=upload_time&order=-1&domain=dft&owner=public&statistics=dft.code_name&statistics=dft.basis_set&statistics=dft.xc_functional&exclude=atoms,only_atoms,dft.files,dft.quantities,dft.optimade,dft.labels,dft.geometries', - 'page=1&per_page=10&order_by=upload_time&order=-1&domain=dft&owner=public&statistics=dft.searchable_quantities&statistics=dft.labels_springer_classification&statistics=dft.workflow.workflow_type&exclude=atoms,only_atoms,dft.files,dft.quantities,dft.optimade,dft.labels,dft.geometries', - 'page=1&per_page=10&order_by=upload_time&order=-1&domain=dft&owner=public&statistics=dft.searchable_quantities&statistics=dft.labels_springer_classification&statistics=dft.workflow.workflow_type&exclude=atoms,only_atoms,dft.files,dft.quantities,dft.optimade,dft.labels,dft.geometries&datasets_grouped=true', - 'page=1&per_page=10&order_by=upload_time&order=-1&domain=dft&owner=public&metrics=dft.calculations&statistics=atoms&exclude=atoms,only_atoms,dft.files,dft.quantities,dft.optimade,dft.labels,dft.geometries&datasets_grouped=true' + 'page=1&per_page=10&order_by=upload_time&order=-1&domain=dft&owner=public&atoms=Co&statistics=atoms&exclude=atoms,only_atoms,dft.files,quantities,optimade,dft.labels,dft.geometries', + 'page=1&per_page=10&order_by=upload_time&order=-1&domain=dft&owner=public&statistics=dft.labels_springer_compound_class&statistics=dft.system&statistics=dft.crystal_system&statistics=dft.compound_type&exclude=atoms,only_atoms,dft.files,quantities,optimade,dft.labels,dft.geometries', + 'page=1&per_page=10&order_by=upload_time&order=-1&domain=dft&owner=public&statistics=dft.code_name&statistics=dft.basis_set&statistics=dft.xc_functional&exclude=atoms,only_atoms,dft.files,quantities,optimade,dft.labels,dft.geometries', + 'page=1&per_page=10&order_by=upload_time&order=-1&domain=dft&owner=public&statistics=dft.searchable_quantities&statistics=dft.labels_springer_classification&statistics=dft.workflow.workflow_type&exclude=atoms,only_atoms,dft.files,quantities,optimade,dft.labels,dft.geometries', + 'page=1&per_page=10&order_by=upload_time&order=-1&domain=dft&owner=public&statistics=dft.searchable_quantities&statistics=dft.labels_springer_classification&statistics=dft.workflow.workflow_type&exclude=atoms,only_atoms,dft.files,quantities,optimade,dft.labels,dft.geometries&datasets_grouped=true', + 'page=1&per_page=10&order_by=upload_time&order=-1&domain=dft&owner=public&metrics=dft.calculations&statistics=atoms&exclude=atoms,only_atoms,dft.files,quantities,optimade,dft.labels,dft.geometries&datasets_grouped=true' ] diff --git a/ops/tests/ping.py b/ops/tests/ping.py index 73fc1762c8fda87cd4bd734a781af0650ad831a1..6ea7e16e9a3b29f2eec1a9d994e26f42e563a2b9 100644 --- a/ops/tests/ping.py +++ b/ops/tests/ping.py @@ -35,7 +35,7 @@ while True: response = requests.get('%s%s%s%s' % ( base_url, '/repo/', - '?page=1&per_page=10&order_by=upload_time&order=-1&domain=dft&owner=public&statistics=atoms&exclude=atoms,only_atoms,dft.files,dft.quantities,dft.optimade,dft.labels,dft.geometries', + '?page=1&per_page=10&order_by=upload_time&order=-1&domain=dft&owner=public&statistics=atoms&exclude=atoms,only_atoms,dft.files,quantities,optimade,dft.labels,dft.geometries', atoms)) end = time.time() print('PING – %s – %f - %s' % (response.status_code, end - start, datetime.now())) diff --git a/tests/app/flask/test_api.py b/tests/app/flask/test_api.py index 9cede476daba83939a30c2d2a20954f8c40b5448..1ee1b1fa3e70854657f62ed3a5f9ff19a39fa9d4 100644 --- a/tests/app/flask/test_api.py +++ b/tests/app/flask/test_api.py @@ -1152,7 +1152,7 @@ class TestRepo(): assert value in statistics['dft.system'] def test_search_exclude(self, api, example_elastic_calcs, no_warn): - rv = api.get('/repo/?exclude=atoms,only_atoms,dft.optimade,dft.quantities') + rv = api.get('/repo/?exclude=atoms,only_atoms,optimade,dft.quantities') assert rv.status_code == 200 result = utils.flat(json.loads(rv.data)['results'][0]) assert 'atoms' not in result @@ -1425,21 +1425,21 @@ class TestRepo(): def test_optimade(self, api, non_empty_processed, test_user_auth): rv = api.get( - '/repo/?%s' % urlencode({'owner': 'all', 'dft.optimade': 'nelements >= 1'}), + '/repo/?%s' % urlencode({'owner': 'all', 'optimade': 'nelements >= 1'}), headers=test_user_auth) assert rv.status_code == 200 data = json.loads(rv.data) assert data['pagination']['total'] > 0 rv = api.get( - '/repo/?%s' % urlencode({'owner': 'all', 'dft.optimade': 'nelements = 23'}), + '/repo/?%s' % urlencode({'owner': 'all', 'optimade': 'nelements = 23'}), headers=test_user_auth) assert rv.status_code == 200 data = json.loads(rv.data) assert data['pagination']['total'] == 0 rv = api.get( - '/repo/?%s' % urlencode({'owner': 'all', 'dft.optimade': 'this is not optimade'}), + '/repo/?%s' % urlencode({'owner': 'all', 'optimade': 'this is not optimade'}), headers=test_user_auth) assert rv.status_code == 400 diff --git a/tests/app/test_optimade.py b/tests/app/test_optimade.py index ed6d229ee23f0c75b9538bbbf6a7dd760d85a90b..dd46f92fe89e22c0579725df4a322535359d8069 100644 --- a/tests/app/test_optimade.py +++ b/tests/app/test_optimade.py @@ -33,10 +33,10 @@ def test_get_entry(published: Upload): calc_id = list(published.calcs)[0].calc_id with published.upload_files.read_archive(calc_id) as archive: data = archive[calc_id] - assert data['metadata']['dft']['optimade'] is not None + assert data['metadata']['optimade'] is not None search_result = search.SearchRequest().search_parameter('calc_id', calc_id).execute_paginated()['results'][0] - assert 'dft.optimade.chemical_formula_hill' in utils.flat(search_result) + assert 'optimade.chemical_formula_hill' in utils.flat(search_result) def test_no_optimade(mongo, elastic, raw_files, client): @@ -129,8 +129,8 @@ def example_structures(elastic_infra, mongo_infra, raw_files_infra): ('elements HAS ONY "H", "O"', -1), ('last_modified >= "2009-02-01T20:07:00Z"', 3), ('species_at_sites HAS "C"', 1), - ('_nmd_dft_system = "molecule / cluster"', 3), - ('_nmd_encyclopedia_material_formula = "H20"', 0) + ('_nmd_results_material_structural_type = "molecule / cluster"', 3), + ('_nmd_results_material_chemical_formula_reduced = "H20"', 0) ]) def test_optimade_parser(example_structures, query, results): if results >= 0: @@ -282,37 +282,38 @@ def test_structure_endpoint(client, example_structures): assert len(attr.get('dimension_types')) == 3 -def test_nmd_properties_info(client): +def test_nmd_properties_info(client, example_structures): rv = client.get('/optimade/info/structures') assert rv.status_code == 200 data = rv.json() - assert '_nmd_dft_system' in data['data']['properties'] - assert '_nmd_encyclopedia_material_formula' in data['data']['properties'] - assert '_nmd_atoms' in data['data']['properties'] + assert '_nmd_results_material_structural_type' in data['data']['properties'] + assert '_nmd_results_material_chemical_formula_reduced' in data['data']['properties'] + assert '_nmd_results_material_elements' in data['data']['properties'] assert '_nmd_archive_url' in data['data']['properties'] def test_nmd_properties(client, example_structures): - rv = client.get('/optimade/structures/%s' % 'test_calc_id_1?response_fields=_nmd_atoms,_nmd_dft_system,_nmd_doesnotexist,_nmd_archive_url') + rv = client.get('/optimade/structures/%s' % 'test_calc_id_1?response_fields=_nmd_results_material_elements,_nmd_results_material_structural_type,_nmd_doesnotexist,_nmd_archive_url') assert rv.status_code == 200 data = rv.json() assert data.get('data') is not None attr = data['data'].get('attributes') assert attr is not None - assert attr.get('_nmd_atoms') == ['H', 'O'] - assert '_nmd_dft_system' in attr + + assert attr.get('_nmd_results_material_elements') == ['H', 'O'] + assert '_nmd_results_material_structural_type' in attr assert '_nmd_doesnotexist' not in attr assert '_nmd_archive_url' in attr def test_nmd_properties_include_all(client, example_structures): - all_fields = [f'_nmd_{name}' for name, _ in provider_specific_fields()] + all_fields = [f'_nmd_{name}' for name in provider_specific_fields()] rv = client.get(f'/optimade/structures/test_calc_id_1?response_fields={",".join(all_fields)}') assert rv.status_code == 200 data = rv.json() assert data.get('data') is not None attr = data['data'].get('attributes') assert attr is not None - assert attr.get('_nmd_atoms') == ['H', 'O'] - assert '_nmd_dft_system' in attr - assert '_nmd_encyclopedia_material_formula' in attr + assert attr.get('_nmd_results_material_elements') == ['H', 'O'] + assert '_nmd_results_material_structural_type' in attr + assert '_nmd_results_material_chemical_formula_reduced' in attr diff --git a/tests/metainfo/test_elasticsearch_extension.py b/tests/metainfo/test_elasticsearch_extension.py index 52e0ab5b1b78594aa8879ae7e00944e8a34e983f..788a98db20a97047488ffe7c1e2296d2b9b30caa 100644 --- a/tests/metainfo/test_elasticsearch_extension.py +++ b/tests/metainfo/test_elasticsearch_extension.py @@ -106,7 +106,7 @@ class Entry(MSection): mainfile = Quantity( type=str, - a_elasticsearch=Elasticsearch(index=False, value=lambda _: 'other_mainfile')) + a_elasticsearch=Elasticsearch(index=False, value=lambda _: 'other_mainfile')) # type: ignore files = Quantity( type=str, shape=['*'], diff --git a/tests/search/test_v0.py b/tests/search/test_v0.py index 12b2eb4f067e1a9b1726342beedcb50d9ce9c792..01babe6e0be26d609b3e961e2cd747e77f942fb7 100644 --- a/tests/search/test_v0.py +++ b/tests/search/test_v0.py @@ -50,7 +50,6 @@ def test_index_normalized_calc(elastic, normalized: datamodel.EntryArchive): assert 'calc_id' in entry assert 'atoms' in entry assert 'dft.code_name' in entry - assert 'dft.optimade.elements_ratios' in entry def test_index_normalized_calc_with_metadata( diff --git a/tests/utils.py b/tests/utils.py index fb0b06c4cfbefd63d25c95b8a5a78fe48ebc4235..4fcd7174faefba9a1141fa5c82f763687fdc759e 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -429,7 +429,8 @@ class ExampleData: entry_metadata.apply_domain_metadata(archive) if not optimade: - entry_metadata.dft.optimade = None + entry_metadata.optimade = None + entry_metadata.quantities.remove('metadata.optimade') if metadata is not None: kwargs = metadata