Commit 76190167 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Continued implementation of optimade filter transformer.

parent a05a1c44
Subproject commit 3c2874e4cedae2e8743984b11a0d9b0375a008af
Subproject commit 47394ea099e95d8ee19f1f1a0b0f6d26aea33036
......@@ -12,13 +12,13 @@ def optimade_links(section: str):
class ElementRatio(InnerDoc):
element = Keyword()
ratio = Float()
elements = Keyword()
elements_ratios = Float()
@staticmethod
def from_structure_entry(entry: 'OptimadeStructureEntry'):
return [
ElementRatio(element=entry.elements[i], ratio=entry.elements_ratios[i])
ElementRatio(elements=entry.elements[i], elements_ratios=entry.elements_ratios[i])
for i in range(0, entry.nelements)]
......@@ -63,7 +63,7 @@ class OptimadeStructureEntry(MObject):
chemical_formula_descriptive = Quantity(
type=str,
links=optimade_links('h.6.2.4'),
a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)),
a_elastic=dict(type=Text),
a_optimade=Optimade(query=True, entry=True),
description='''
The chemical formula for a structure as a string in a form chosen by the API
......@@ -73,7 +73,7 @@ class OptimadeStructureEntry(MObject):
chemical_formula_reduced = Quantity(
type=str,
links=optimade_links('h.6.2.5'),
a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)),
a_elastic=dict(type=Text),
a_optimade=Optimade(query=True, entry=True),
description='''
The reduced chemical formula for a structure as a string with element symbols and
......@@ -83,7 +83,7 @@ class OptimadeStructureEntry(MObject):
chemical_formula_hill = Quantity(
type=str,
links=optimade_links('h.6.2.6'),
a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)),
a_elastic=dict(type=Text),
a_optimade=Optimade(query=True, entry=False),
description='''
The chemical formula for a structure in Hill form with element symbols followed by
......@@ -93,7 +93,7 @@ class OptimadeStructureEntry(MObject):
chemical_formula_anonymous = Quantity(
type=str,
links=optimade_links('h.6.2.7'),
a_elastic=dict(type=Text, other_types=dict(keyword=Keyword)),
a_elastic=dict(type=Text),
a_optimade=Optimade(query=True, entry=True),
description='''
The anonymous formula is the chemical_formula_reduced, but where the elements are
......
import json
from optimade.filterparser import LarkParser
from lark import Transformer
from elasticsearch_dsl import Q, Text, Keyword, Integer
import ase.data
from nomad.processing import Upload
from nomad.search import SearchRequest
from nomad.metainfo.optimade import OptimadeStructureEntry
from nomad.metainfo import Quantity
def test_get_entry(published: Upload):
......@@ -20,49 +23,181 @@ def test_get_entry(published: Upload):
class ESTransformer(Transformer):
cmp_operators = {'>': 'gt', '>=': 'gte', '<': 'lt', '<=': 'lte'}
has_operators = {'ALL': 'must', 'ANY': 'should'}
length_quantities = {'elements': 'nelements', 'elements_rations': 'nelements', 'dimension_types': 'dimension_types'}
def _field(self, quantity, nested=None):
optimade_field_name = quantity
if nested is not None:
optimade_field_name = '%s.%s' % (nested, optimade_field_name)
return 'optimade.%s' % optimade_field_name
def _order_terms(self, l, r):
if isinstance(l, Quantity):
if isinstance(r, Quantity):
raise Exception('Cannot compare two quantities: %s, %s' % (l.name, r.name))
else:
return l, r
else:
if isinstance(r, Quantity):
return r, l
else:
raise Exception('Cannot compare two values: %s, %s' % (str(l), str(l)))
def __default__(self, tree, children, *args, **kwargs):
return children[0]
def and_expr(self, args):
if len(args) == 1:
return args[0]
return dict(op='AND', ops=list(args))
l, r = args
return l & r
def or_expr(self, args):
if len(args) == 1:
return args[0]
return dict(op='OR', ops=list(args))
l, r = args
return l | r
def not_expr(self, args):
if len(args) == 1:
return args[0]
o, = args
return ~o
def _query(self, quantity, o, value, nested=None):
field = self._field(quantity, nested=nested)
if o in ESTransformer.cmp_operators:
return Q('range', **{field: {ESTransformer.cmp_operators[o]: value}})
elastic_annotation = quantity.m_annotations.get('elastic', None)
if elastic_annotation['type'] == Text:
query_type = 'match'
elif elastic_annotation['type'] in [Keyword, Integer]:
query_type = 'term'
else:
raise NotImplementedError('Quantity has unsupported ES field type')
return dict(op='NOT', ops=list(args))
if o in ['=', '']:
return Q(query_type, **{field: value})
if o == '!=':
return ~Q(query_type, **{field: value}) # pylint: disable=invalid-unary-operand-type
raise Exception('Unknown operator %s' % o)
def cmp_op(self, args):
return dict(op=args[1], ops=[args[0], args[2]])
l, o, r = args
field, value = self._order_terms(l, r)
return self._query(field, o, value)
def list_op(self, args):
if len(args) == 3:
return dict(op='HAS', qualifier=args[1], ops=[args[0], args[2]])
else:
return dict(op='HAS', ops=[args[0], args[1]])
def has_op(self, args):
quantities, predicates = args
return self._has_query(quantities, predicates)
def _has_query(self, quantities, predicates):
if len(quantities) != len(predicates):
raise Exception(
'Tuple length does not match: %s <o> %s ' %
(':'.join(quantities), ':'.join(predicates)))
if len(quantities) == 1:
o, value = predicates[0]
return self._query(quantities[0], o, value)
if any(quantity.name not in ['elements', 'elements_ratios'] for quantity in quantities):
raise Exception('Expression with tuples are only supported for elements and elements_positions')
queries = [
self._query(field, o, value, nested='elements_ratios')
for field, (o, value) in zip(quantities, predicates)]
return Q('nested', path='elements_ratios', query=dict(bool=dict(must=queries)))
def has_list_op(self, args):
quantities, o, predicates_list = args
queries = [
self._has_query(quantities, predicates)
for predicates in predicates_list]
if o in ESTransformer.has_operators:
return Q('bool', **{ESTransformer.has_operators[o]: queries})
raise Exception('Unknown operator %s' % o)
def has_only_op(self, args):
quantity, lst = args
if quantity.name != 'elements':
raise Exception('HAS ONLY is only supported for elements')
def values():
for predicates in lst:
if len(predicates) != 1:
raise Exception('Tuples not supported in HAS ONLY')
op, value = predicates[0]
if op != '':
raise Exception('Predicated not supported in HAS ONLY')
if not isinstance(value, str):
raise Exception('Only strings supported in HAS ONLY')
yield value
try:
order_numbers = list([ase.data.atomic_numbers[element] for element in values()])
order_numbers.sort()
value = ''.join([ase.data.chemical_symbols[number] for number in order_numbers])
except KeyError as e:
raise Exception('Not a chemical symbol: %s' % str(e))
return Q('term', only_atoms=value)
def length(self, args):
quantity, = args
if quantity.name not in ESTransformer.length_quantities:
raise Exception('LENGTH is not supported for %s' % quantity.name)
return OptimadeStructureEntry.m_section.quantities[ESTransformer.length_quantities[quantity.name]]
def known_op(self, args):
return dict(op='KNOWN', qualifier=args[1], ops=[args[0]])
quantity, qualifier = args
query = Q('exists', field=self._field(quantity))
if qualifier == 'KNOWN':
return query
elif qualifier == 'UNKNOWN':
return ~query # pylint: disable=invalid-unary-operand-type
raise NotImplementedError
def _wildcard_query(self, quantity, wildcard):
return Q('wildcard', **{self._field(quantity): dict(value=wildcard)})
def contains_op(self, args):
quantity, value = args
return self._wildcard_query(quantity, '*%s*' % value)
def starts_op(self, args):
quantity, value = args
return self._wildcard_query(quantity, '%s*' % value)
def ends_op(self, args):
quantity, value = args
return self._wildcard_query(quantity, '*%s' % value)
def list(self, args):
return list(args)
def tuple(self, args):
def quantity_tuple(self, args):
return list(args)
def predicate_tuple(self, args):
return list(args)
def predicate(self, args):
if len(args) == 1:
return args[0]
return dict(pred=args[0], op=args[1])
return '', args[0]
else:
return args[0], args[1]
def quantity(self, args):
quantity_name = args[0]
......@@ -71,27 +206,34 @@ class ESTransformer(Transformer):
if quantity_def is None:
raise Exception('%s is not a known quantity' % quantity_name)
return quantity_def.name
elastic_annotation = quantity_def.m_annotations.get('elastic', None)
if elastic_annotation is None:
raise Exception('%s is not supported in queries' % quantity_name)
def literal(self, args):
literal = args[0]
return quantity_def
try:
int(literal)
except Exception:
pass
def int_literal(self, args):
return int(args[0])
try:
float(literal)
except Exception:
pass
def float_literal(self, args):
return float(args[0])
return literal.strip('"')
def string_literal(self, args):
return args[0].strip('"')
def test_optimade_parser():
def test_optimade_parser(published: Upload):
p = LarkParser(version=(0, 10, 0))
tree = p.parse('nelements < 3.4e-10 OR elements:elements_ratios HAS ALL "H":>1, "O":>2 AND (elements CONTAINS "H")')
tree = p.parse('''
LENGTH elements > 2 AND
elements:elements_ratios HAS ALL "H":>0.66,"H":<0.67 AND
elements:elements_ratios:elements_ratios HAS ALL "O":>0.33:<0.34 AND
(chemical_formula_reduced IS UNKNOWN OR chemical_formula_reduced CONTAINS "H2") AND
elements HAS ONLY "O", "H" AND
LENGTH dimension_types = 0
''')
transformer = ESTransformer()
result = transformer.transform(tree)
print(json.dumps(result, indent=2))
query = transformer.transform(tree)
result = SearchRequest(query=query).execute_paginated()
print(result)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment