diff --git a/.gitmodules b/.gitmodules index f3a3b8e1a796eec12e652857174b79d34101b007..aa9080e391ad325cfe299006099772d3486acd5d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -138,9 +138,6 @@ path = dependencies/parsers/onetep url = https://github.com/nomad-coe/nomad-parser-onetep.git branch = master -[submodule "dependencies/optimade-python-tools"] - path = dependencies/optimade-python-tools - url = https://github.com/markus1978/optimade-python-tools.git [submodule "dependencies/parsers/namd"] path = dependencies/parsers/namd url = https://github.com/nomad-coe/nomad-parser-namd.git diff --git a/dependencies/optimade-python-tools b/dependencies/optimade-python-tools deleted file mode 160000 index b5731ab61f5ef0d019426523b8b21ad4c82596a2..0000000000000000000000000000000000000000 --- a/dependencies/optimade-python-tools +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b5731ab61f5ef0d019426523b8b21ad4c82596a2 diff --git a/nomad/app/flask/api/materialtransformer.py b/nomad/app/flask/api/materialtransformer.py index 579427bd521cff38f0f770b646f17954062ecdbe..9992a171684f34a43bac4f26111241ab5ab9f67a 100644 --- a/nomad/app/flask/api/materialtransformer.py +++ b/nomad/app/flask/api/materialtransformer.py @@ -1,9 +1,11 @@ from typing import Callable from lark import v_args from elasticsearch_dsl import Q, Text, Keyword, Integer, Field, Boolean -from optimade.filtertransformers.elasticsearch import Quantity, ElasticTransformer +from optimade.filtertransformers.elasticsearch import Quantity from nomad.atomutils import get_hill_decomposition +from nomad.app.optimade.filterparser import ElasticTransformer + _cmp_operators = {">": "gt", ">=": "gte", "<": "lt", "<=": "lte"} _rev_cmp_operators = {">": "<", ">=": "<=", "<": ">", "<=": "=>"} @@ -63,7 +65,8 @@ class MQuantity(Quantity): class MElasticTransformer(ElasticTransformer): - """A specialized Optimade/Lark transformer for handling material queries. + ''' + A specialized Optimade/Lark transformer for handling material queries. Provides mostly the same functionality as optimade.filtertransformers.elasticsearch.ElasticTransformer, but has additions that make nested queries and parameter conversions possible. @@ -73,31 +76,7 @@ class MElasticTransformer(ElasticTransformer): Arguments: quantities: A list of :class:`MQuantity`s that describe how optimade (and other) quantities are mapped to the elasticsearch index. - """ - - def _query_op(self, quantity, op, value, nested=None): - """ - Return a range, match, or term query for the given quantity, comparison - operator, and value - """ - field = self._field(quantity, nested=nested) - if op in _cmp_operators: - return Q("range", **{field: {_cmp_operators[op]: value}}) - - if quantity.elastic_mapping_type == Text: - query_type = "match" - elif quantity.elastic_mapping_type in [Keyword, Integer, Boolean]: - query_type = "term" - else: - raise NotImplementedError("Quantity has unsupported ES field type") - - if op in ["=", ""]: - return Q(query_type, **{field: value}) - - if op == "!=": - return ~Q( # pylint: disable=invalid-unary-operand-type - query_type, **{field: value} - ) + ''' def _has_query_op(self, quantities, op, predicate_zip_list): """ diff --git a/nomad/app/optimade/__init__.py b/nomad/app/optimade/__init__.py index b98541bb9b634ac457eb948f406b166add012e35..7428af554da46d6cceb3fc8f1f6388b2d7664de9 100644 --- a/nomad/app/optimade/__init__.py +++ b/nomad/app/optimade/__init__.py @@ -37,8 +37,10 @@ for name, collection in ENTRY_COLLECTIONS.items(): # patch the structure collection with out elasticsearch implementation from .elasticsearch import ElasticsearchStructureCollection # nopep8 +# from optimade.server.entry_collections.elasticsearch import ElasticCollection from .filterparser import parse_filter # nopep8 + structures.structures_coll = ElasticsearchStructureCollection() optimade.add_major_version_base_url(optimade.app) diff --git a/nomad/app/optimade/elasticsearch.py b/nomad/app/optimade/elasticsearch.py index 19210e9eceab76de60cee69e217d3dea202ad42f..baf0b834c99e1960bad8279ec91b22723106765e 100644 --- a/nomad/app/optimade/elasticsearch.py +++ b/nomad/app/optimade/elasticsearch.py @@ -24,7 +24,6 @@ class ElasticsearchStructureCollection(EntryCollection): def __init__(self): super().__init__( - collection=config.elastic.index_name, resource_cls=StructureResource, resource_mapper=StructureMapper, transformer=get_transformer(nomad_properties='dft', without_prefix=False)) @@ -107,7 +106,7 @@ class ElasticsearchStructureCollection(EntryCollection): detail=f'Instead of a single entry, {nresults_now} entries were found') results = results[0] if results else None - return results, data_returned, more_data_available, all_fields - fields + return results, data_returned, more_data_available, self.all_fields - fields def _check_aliases(self, aliases): pass @@ -137,15 +136,10 @@ class ElasticsearchStructureCollection(EntryCollection): metadata = archive[calc_id]['section_metadata'].to_dict() entry = datamodel.EntryMetadata.m_from_dict(metadata) - def include(key): - return response_fields is None or (key in response_fields) or not key.startswith('_') - attrs = entry.dft.optimade.m_to_dict() - if include('immutable_id'): - attrs['immutable_id'] = calc_id - if include('last_modified'): - attrs['last_modified'] = entry.last_processing if entry.last_processing is not None else entry.upload_time + attrs['immutable_id'] = calc_id + attrs['last_modified'] = entry.last_processing if entry.last_processing is not None else entry.upload_time # TODO this should be removed, once all data is reprocessed with the right normalization attrs['chemical_formula_reduced'] = optimade_chemical_formula_reduced( @@ -162,8 +156,6 @@ class ElasticsearchStructureCollection(EntryCollection): elif isinstance(dimension_types, list): attrs['nperiodic_dimensions'] = sum(dimension_types) - attrs = {key: value for key, value in attrs.items() if include(key)} - if response_fields is not None: for request_field in response_fields: if not request_field.startswith('_nmd_'): @@ -198,3 +190,14 @@ class ElasticsearchStructureCollection(EntryCollection): upload_files.close() return optimade_results + + def _run_db_query(self, *args, **kwargs): + # We overwrite all the methods that use this, so that this should never be called. + # We just need to implement it, because its marked as @abstractmethod. + raise NotImplementedError() + + def insert(self, *args, **kwargs): + # This is used to insert test records during OPT tests. This should never be necessary + # on our implementation. We just need to implement it, because its marked as + # @abstractmethod. + raise NotImplementedError() diff --git a/nomad/app/optimade/filterparser.py b/nomad/app/optimade/filterparser.py index 88173ce150dd6576b134867dc3481030889c5893..f233403361f5c1e44c3917fa0f8496be7cfa5b68 100644 --- a/nomad/app/optimade/filterparser.py +++ b/nomad/app/optimade/filterparser.py @@ -21,7 +21,9 @@ from elasticsearch_dsl import Q from cachetools import cached from optimade.filterparser import LarkParser -from optimade.filtertransformers.elasticsearch import ElasticTransformer, Quantity +from optimade.filtertransformers.elasticsearch import ( + Quantity, ElasticTransformer as OPTElasticTransformer) +from optimade.models import CHEMICAL_SYMBOLS, ATOMIC_NUMBERS from nomad.search import search_quantities @@ -101,3 +103,53 @@ def parse_filter(filter_str: str, nomad_properties='dft', without_prefix=False) raise FilterException('Semantic error: %s' % str(e)) return query + + +class ElasticTransformer(OPTElasticTransformer): + def _has_query_op(self, quantities, op, predicate_zip_list): + # We override this to add 'HAS ONLY' support. + if op == 'HAS ONLY': + # HAS ONLY comes with heavy limitations, because there is no such thing + # in elastic search. Only supported for elements, where we can construct + # an anonymous 'formula' based on elements sorted by order number and + # can do a = comparision to check if all elements are contained + if len(quantities) > 1: + raise Exception('HAS ONLY is not supported with zip') + quantity = quantities[0] + + if quantity.has_only_quantity is None: + raise Exception('HAS ONLY is not supported by %s' % quantity.name) + + def values(): + for predicates in predicate_zip_list: + if len(predicates) != 1: + raise Exception('Tuples not supported in HAS ONLY') + op, value = predicates[0] + if op != '=': + raise Exception('Predicated not supported in HAS ONLY') + if not isinstance(value, str): + raise Exception('Only strings supported in HAS ONLY') + yield value + + try: + order_numbers = list([ATOMIC_NUMBERS[element] for element in values()]) + order_numbers.sort() + value = ''.join( + [CHEMICAL_SYMBOLS[number - 1] for number in order_numbers] + ) + except KeyError: + raise NotImplementedError('HAS ONLY is only supported for chemical symbols') + + return Q('term', **{quantity.has_only_quantity.name: value}) + + else: + return super()._has_query_op(quantities, op, predicate_zip_list) + + def property_zip_addon(self, args): + return args + + def value_zip(self, args): + return self.value_list(args) + + def value_zip_list(self, args): + return args diff --git a/requirements.txt b/requirements.txt index 78597e8858339a14cf450a2555a09377a5bb2b78..d69c6c3c10d7ec710f719f51791e88bcb9e88c3b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -43,7 +43,7 @@ lxml xarray # [infrastructure] -optimade +optimade==0.14.0 structlog==20.1.0 elasticsearch==6.4.0 msgpack<0.6.0