From c728cf4b77146e9209d199d6d5ea7f89e26c7613 Mon Sep 17 00:00:00 2001
From: Markus Scheidgen <markus@dhcp-46-238.physik.hu-berlin.de>
Date: Fri, 27 Sep 2019 10:48:30 +0200
Subject: [PATCH] Completed optimade filterpaser and search tests.

---
 dependencies/optimade-python-tools |   2 +-
 nomad/app/optimade/__init__.py     |   2 +
 nomad/app/optimade/filterparser.py | 261 ++++++++++++++++++++++
 nomad/metainfo/optimade.py         |  10 +-
 nomad/normalizing/optimade.py      |   3 +-
 tests/app/test_optimade.py         | 335 ++++++++++-------------------
 tests/conftest.py                  |  11 +
 tests/test_parsing.py              |  11 -
 8 files changed, 393 insertions(+), 242 deletions(-)
 create mode 100644 nomad/app/optimade/filterparser.py

diff --git a/dependencies/optimade-python-tools b/dependencies/optimade-python-tools
index 47394ea099..58d6b6ea63 160000
--- a/dependencies/optimade-python-tools
+++ b/dependencies/optimade-python-tools
@@ -1 +1 @@
-Subproject commit 47394ea099e95d8ee19f1f1a0b0f6d26aea33036
+Subproject commit 58d6b6ea63a4758f719c73466a749eae2cd012e6
diff --git a/nomad/app/optimade/__init__.py b/nomad/app/optimade/__init__.py
index 945faf3e73..84d670088c 100644
--- a/nomad/app/optimade/__init__.py
+++ b/nomad/app/optimade/__init__.py
@@ -15,6 +15,8 @@
 from flask import Blueprint
 from flask_restplus import Api
 
+from .filterparser import parse_filter
+
 """
 The optimade implementation of NOMAD.
 """
diff --git a/nomad/app/optimade/filterparser.py b/nomad/app/optimade/filterparser.py
new file mode 100644
index 0000000000..50e18cbb46
--- /dev/null
+++ b/nomad/app/optimade/filterparser.py
@@ -0,0 +1,261 @@
+# Copyright 2018 Markus Scheidgen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an"AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from optimade.filterparser import LarkParser
+import lark
+from elasticsearch_dsl import Q, Text, Keyword, Integer
+import ase.data
+
+from nomad.metainfo.optimade import OptimadeStructureEntry
+from nomad.metainfo import Quantity
+
+
+class FilterException(Exception):
+    """ Raised on parsing a filter expression with syntactic of semantic errors. """
+    pass
+
+
+_cmp_operators = {'>': 'gt', '>=': 'gte', '<': 'lt', '<=': 'lte'}
+_rev_cmp_operators = {'>': '<', '>=': '<=', '<': '>', '<=': '=>'}
+_has_operators = {'ALL': 'must', 'ANY': 'should'}
+_length_quantities = {'elements': 'nelements', 'elements_rations': 'nelements', 'dimension_types': 'dimension_types'}
+
+
+class Transformer(lark.Transformer):
+    """ Transformer for the Lark parser generator used for the filterparser.
+
+    It translates the parse tree into an elastic search query.
+    """
+
+    def _field(self, quantity, nested=None):
+        optimade_field_name = quantity.name
+        if nested is not None:
+            optimade_field_name = '%s.%s' % (nested, optimade_field_name)
+        return 'optimade.%s' % optimade_field_name
+
+    def _order_terms(self, l, o, r):
+        if isinstance(l, Quantity):
+            if isinstance(r, Quantity):
+                raise Exception('Cannot compare two quantities: %s, %s' % (l.name, r.name))
+
+            return l, o, r
+        else:
+            if isinstance(r, Quantity):
+                o = _rev_cmp_operators.get(o, o)
+                return r, o, l
+
+            raise Exception('Cannot compare two values: %s, %s' % (str(l), str(l)))
+
+    def _query(self, quantity, o, value, nested=None):
+        field = self._field(quantity, nested=nested)
+        if o in _cmp_operators:
+            return Q('range', **{field: {_cmp_operators[o]: value}})
+
+        elastic_annotation = quantity.m_annotations.get('elastic', None)
+        if elastic_annotation['type'] == Text:
+            query_type = 'match'
+        elif elastic_annotation['type'] in [Keyword, Integer]:
+            query_type = 'term'
+        else:
+            raise NotImplementedError('Quantity has unsupported ES field type')
+
+        if o in ['=', '']:
+            return Q(query_type, **{field: value})
+
+        if o == '!=':
+            return ~Q(query_type, **{field: value})  # pylint: disable=invalid-unary-operand-type
+
+        raise Exception('Unknown operator %s' % o)
+
+    def _has_query(self, quantities, predicates):
+        if len(quantities) != len(predicates):
+            raise Exception(
+                'Tuple length does not match: %s <o> %s ' %
+                (':'.join(quantities), ':'.join(predicates)))
+
+        if len(quantities) == 1:
+            o, value = predicates[0]
+            return self._query(quantities[0], o, value)
+
+        if any(quantity.name not in ['elements', 'elements_ratios'] for quantity in quantities):
+            raise Exception('Expression with tuples are only supported for elements and elements_positions')
+
+        queries = [
+            self._query(field, o, value, nested='elements_ratios')
+            for field, (o, value) in zip(quantities, predicates)]
+
+        return Q('nested', path='optimade.elements_ratios', query=dict(bool=dict(must=queries)))
+
+    def _wildcard_query(self, quantity, wildcard):
+        return Q('wildcard', **{self._field(quantity): wildcard})
+
+    def __default__(self, tree, children, *args, **kwargs):
+        """ Default behavior for rules that only replace one symbol with another """
+        return children[0]
+
+    def and_expr(self, args):
+        if len(args) == 1:
+            return args[0]
+        l, r = args
+        return l & r
+
+    def or_expr(self, args):
+        if len(args) == 1:
+            return args[0]
+        l, r = args
+        return l | r
+
+    def not_expr(self, args):
+        o, = args
+        return ~o
+
+    def cmp_op(self, args):
+        l, o, r = args
+        field, o, value = self._order_terms(l, o, r)
+        return self._query(field, o, value)
+
+    def has_op(self, args):
+        quantities, predicates = args
+        return self._has_query(quantities, predicates)
+
+    def has_list_op(self, args):
+        quantities, o, predicates_list = args
+        queries = [
+            self._has_query(quantities, predicates)
+            for predicates in predicates_list]
+
+        if o in _has_operators:
+            return Q('bool', **{_has_operators[o]: queries})
+
+        raise Exception('Unknown operator %s' % o)
+
+    def has_only_op(self, args):
+        quantity, lst = args
+
+        if quantity.name != 'elements':
+            raise Exception('HAS ONLY is only supported for elements')
+
+        def values():
+            for predicates in lst:
+                if len(predicates) != 1:
+                    raise Exception('Tuples not supported in HAS ONLY')
+                op, value = predicates[0]
+                if op != '':
+                    raise Exception('Predicated not supported in HAS ONLY')
+                if not isinstance(value, str):
+                    raise Exception('Only strings supported in HAS ONLY')
+                yield value
+
+        try:
+            order_numbers = list([ase.data.atomic_numbers[element] for element in values()])
+            order_numbers.sort()
+            value = ''.join([ase.data.chemical_symbols[number] for number in order_numbers])
+        except KeyError as e:
+            raise Exception('Not a chemical symbol: %s' % str(e))
+
+        return Q('term', only_atoms=value)
+
+    def length(self, args):
+        quantity, = args
+        if quantity.name not in _length_quantities:
+            raise Exception('LENGTH is not supported for %s' % quantity.name)
+
+        return OptimadeStructureEntry.m_section.quantities[_length_quantities[quantity.name]]
+
+    def known_op(self, args):
+        quantity, qualifier = args
+        query = Q('exists', field=self._field(quantity))
+        if qualifier == 'KNOWN':
+            return query
+        elif qualifier == 'UNKNOWN':
+            return ~query  # pylint: disable=invalid-unary-operand-type
+
+        raise NotImplementedError
+
+    def contains_op(self, args):
+        quantity, value = args
+        return self._wildcard_query(quantity, '*%s*' % value)
+
+    def starts_op(self, args):
+        quantity, value = args
+        return self._wildcard_query(quantity, '%s*' % value)
+
+    def ends_op(self, args):
+        quantity, value = args
+        return self._wildcard_query(quantity, '*%s' % value)
+
+    def list(self, args):
+        return list(args)
+
+    def quantity_tuple(self, args):
+        return list(args)
+
+    def predicate_tuple(self, args):
+        return list(args)
+
+    def predicate(self, args):
+        if len(args) == 1:
+            return '', args[0]
+        else:
+            return args[0], args[1]
+
+    def quantity(self, args):
+        quantity_name = args[0]
+        quantity_def = OptimadeStructureEntry.m_section.quantities.get(quantity_name, None)
+
+        if quantity_def is None:
+            raise Exception('%s is not a known quantity' % quantity_name)
+
+        elastic_annotation = quantity_def.m_annotations.get('elastic', None)
+        if elastic_annotation is None:
+            raise Exception('%s is not supported in queries' % quantity_name)
+
+        return quantity_def
+
+    def int_literal(self, args):
+        return int(args[0])
+
+    def float_literal(self, args):
+        return float(args[0])
+
+    def string_literal(self, args):
+        return args[0].strip('"')
+
+
+_parser = LarkParser(version=(0, 10, 0))
+_transformer = Transformer()
+
+
+def parse_filter(filter_str: str) -> Q:
+    """ Parses the given optimade filter str and returns a suitable elastic search query.
+
+    Arguments:
+        filter_str: Can be direct user input with no prior processing.
+
+    Raises:
+        FilterException: If the given str cannot be parsed, or if there are any semantic
+            errors in the given expression.
+    """
+
+    try:
+        parse_tree = _parser.parse(filter_str)
+    except Exception as e:
+        raise FilterException('Syntax error: %s' % str(e))
+
+    try:
+        query = _transformer.transform(parse_tree)
+    except Exception as e:
+        raise FilterException('Semantic error: %s' % str(e))
+
+    return query
diff --git a/nomad/metainfo/optimade.py b/nomad/metainfo/optimade.py
index d928bed510..3d849f60c3 100644
--- a/nomad/metainfo/optimade.py
+++ b/nomad/metainfo/optimade.py
@@ -1,5 +1,5 @@
 from ase.data import chemical_symbols
-from elasticsearch_dsl import Keyword, Integer, Float, Text, InnerDoc, Nested
+from elasticsearch_dsl import Keyword, Integer, Float, InnerDoc, Nested
 import numpy as np
 
 from nomad.metainfo import MObject, Section, Quantity, Enum, units
@@ -63,7 +63,7 @@ class OptimadeStructureEntry(MObject):
     chemical_formula_descriptive = Quantity(
         type=str,
         links=optimade_links('h.6.2.4'),
-        a_elastic=dict(type=Text),
+        a_elastic=dict(type=Keyword),
         a_optimade=Optimade(query=True, entry=True),
         description='''
             The chemical formula for a structure as a string in a form chosen by the API
@@ -73,7 +73,7 @@ class OptimadeStructureEntry(MObject):
     chemical_formula_reduced = Quantity(
         type=str,
         links=optimade_links('h.6.2.5'),
-        a_elastic=dict(type=Text),
+        a_elastic=dict(type=Keyword),
         a_optimade=Optimade(query=True, entry=True),
         description='''
             The reduced chemical formula for a structure as a string with element symbols and
@@ -83,7 +83,7 @@ class OptimadeStructureEntry(MObject):
     chemical_formula_hill = Quantity(
         type=str,
         links=optimade_links('h.6.2.6'),
-        a_elastic=dict(type=Text),
+        a_elastic=dict(type=Keyword),
         a_optimade=Optimade(query=True, entry=False),
         description='''
             The chemical formula for a structure in Hill form with element symbols followed by
@@ -93,7 +93,7 @@ class OptimadeStructureEntry(MObject):
     chemical_formula_anonymous = Quantity(
         type=str,
         links=optimade_links('h.6.2.7'),
-        a_elastic=dict(type=Text),
+        a_elastic=dict(type=Keyword),
         a_optimade=Optimade(query=True, entry=True),
         description='''
             The anonymous formula is the chemical_formula_reduced, but where the elements are
diff --git a/nomad/normalizing/optimade.py b/nomad/normalizing/optimade.py
index ae41bc9c09..7594b2bd73 100644
--- a/nomad/normalizing/optimade.py
+++ b/nomad/normalizing/optimade.py
@@ -56,6 +56,7 @@ class OptimadeNormalizer(SystemBasedNormalizer):
 
         # elements
         atoms = normalized_atom_labels(nomad_species)
+        atom_count = len(atoms)
         atom_counts: Dict[str, int] = {}
         for atom in atoms:
             current = atom_counts.setdefault(atom, 0)
@@ -66,7 +67,7 @@ class OptimadeNormalizer(SystemBasedNormalizer):
         optimade.elements.sort()
         optimade.nelements = len(optimade.elements)
         optimade.elements_ratios = [
-            atom_counts[element] / optimade.nelements
+            atom_counts[element] / atom_count
             for element in optimade.elements]
 
         # formulas
diff --git a/tests/app/test_optimade.py b/tests/app/test_optimade.py
index b183e06c9c..0f551ec733 100644
--- a/tests/app/test_optimade.py
+++ b/tests/app/test_optimade.py
@@ -1,13 +1,31 @@
+# Copyright 2018 Markus Scheidgen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an"AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List
 import json
-from optimade.filterparser import LarkParser
-from lark import Transformer
-from elasticsearch_dsl import Q, Text, Keyword, Integer
-import ase.data
+import numpy as np
+import pytest
 
 from nomad.processing import Upload
-from nomad.search import SearchRequest
-from nomad.metainfo.optimade import OptimadeStructureEntry
-from nomad.metainfo import Quantity
+from nomad import search
+from nomad.parsing import LocalBackend
+from nomad.datamodel import CalcWithMetadata
+
+from nomad.app.optimade import parse_filter
+
+from tests.test_normalizing import run_normalize
+from tests.conftest import clear_elastic
 
 
 def test_get_entry(published: Upload):
@@ -17,223 +35,92 @@ def test_get_entry(published: Upload):
         data = json.load(f)
 
     assert 'OptimadeStructureEntry' in data
-    search_result = SearchRequest().search_parameter('calc_id', calc_id).execute_paginated()['results'][0]
+    search_result = search.SearchRequest().search_parameter('calc_id', calc_id).execute_paginated()['results'][0]
     assert 'optimade' in search_result
 
 
-class ESTransformer(Transformer):
-
-    cmp_operators = {'>': 'gt', '>=': 'gte', '<': 'lt', '<=': 'lte'}
-    has_operators = {'ALL': 'must', 'ANY': 'should'}
-    length_quantities = {'elements': 'nelements', 'elements_rations': 'nelements', 'dimension_types': 'dimension_types'}
-
-    def _field(self, quantity, nested=None):
-        optimade_field_name = quantity
-        if nested is not None:
-            optimade_field_name = '%s.%s' % (nested, optimade_field_name)
-        return 'optimade.%s' % optimade_field_name
-
-    def _order_terms(self, l, r):
-        if isinstance(l, Quantity):
-            if isinstance(r, Quantity):
-                raise Exception('Cannot compare two quantities: %s, %s' % (l.name, r.name))
-            else:
-                return l, r
-        else:
-            if isinstance(r, Quantity):
-                return r, l
-            else:
-                raise Exception('Cannot compare two values: %s, %s' % (str(l), str(l)))
-
-    def __default__(self, tree, children, *args, **kwargs):
-        return children[0]
-
-    def and_expr(self, args):
-        if len(args) == 1:
-            return args[0]
-        l, r = args
-        return l & r
-
-    def or_expr(self, args):
-        if len(args) == 1:
-            return args[0]
-        l, r = args
-        return l | r
-
-    def not_expr(self, args):
-        if len(args) == 1:
-            return args[0]
-        o, = args
-        return ~o
-
-    def _query(self, quantity, o, value, nested=None):
-        field = self._field(quantity, nested=nested)
-        if o in ESTransformer.cmp_operators:
-            return Q('range', **{field: {ESTransformer.cmp_operators[o]: value}})
-
-        elastic_annotation = quantity.m_annotations.get('elastic', None)
-        if elastic_annotation['type'] == Text:
-            query_type = 'match'
-        elif elastic_annotation['type'] in [Keyword, Integer]:
-            query_type = 'term'
-        else:
-            raise NotImplementedError('Quantity has unsupported ES field type')
-
-        if o in ['=', '']:
-            return Q(query_type, **{field: value})
-
-        if o == '!=':
-            return ~Q(query_type, **{field: value})  # pylint: disable=invalid-unary-operand-type
-
-        raise Exception('Unknown operator %s' % o)
-
-    def cmp_op(self, args):
-        l, o, r = args
-        field, value = self._order_terms(l, r)
-        return self._query(field, o, value)
-
-    def has_op(self, args):
-        quantities, predicates = args
-        return self._has_query(quantities, predicates)
-
-    def _has_query(self, quantities, predicates):
-        if len(quantities) != len(predicates):
-            raise Exception(
-                'Tuple length does not match: %s <o> %s ' %
-                (':'.join(quantities), ':'.join(predicates)))
-
-        if len(quantities) == 1:
-            o, value = predicates[0]
-            return self._query(quantities[0], o, value)
-
-        if any(quantity.name not in ['elements', 'elements_ratios'] for quantity in quantities):
-            raise Exception('Expression with tuples are only supported for elements and elements_positions')
-
-        queries = [
-            self._query(field, o, value, nested='elements_ratios')
-            for field, (o, value) in zip(quantities, predicates)]
-
-        return Q('nested', path='elements_ratios', query=dict(bool=dict(must=queries)))
-
-    def has_list_op(self, args):
-        quantities, o, predicates_list = args
-        queries = [
-            self._has_query(quantities, predicates)
-            for predicates in predicates_list]
-
-        if o in ESTransformer.has_operators:
-            return Q('bool', **{ESTransformer.has_operators[o]: queries})
-
-        raise Exception('Unknown operator %s' % o)
-
-    def has_only_op(self, args):
-        quantity, lst = args
-
-        if quantity.name != 'elements':
-            raise Exception('HAS ONLY is only supported for elements')
-
-        def values():
-            for predicates in lst:
-                if len(predicates) != 1:
-                    raise Exception('Tuples not supported in HAS ONLY')
-                op, value = predicates[0]
-                if op != '':
-                    raise Exception('Predicated not supported in HAS ONLY')
-                if not isinstance(value, str):
-                    raise Exception('Only strings supported in HAS ONLY')
-                yield value
-
-        try:
-            order_numbers = list([ase.data.atomic_numbers[element] for element in values()])
-            order_numbers.sort()
-            value = ''.join([ase.data.chemical_symbols[number] for number in order_numbers])
-        except KeyError as e:
-            raise Exception('Not a chemical symbol: %s' % str(e))
-
-        return Q('term', only_atoms=value)
-
-    def length(self, args):
-        quantity, = args
-        if quantity.name not in ESTransformer.length_quantities:
-            raise Exception('LENGTH is not supported for %s' % quantity.name)
-
-        return OptimadeStructureEntry.m_section.quantities[ESTransformer.length_quantities[quantity.name]]
-
-    def known_op(self, args):
-        quantity, qualifier = args
-        query = Q('exists', field=self._field(quantity))
-        if qualifier == 'KNOWN':
-            return query
-        elif qualifier == 'UNKNOWN':
-            return ~query  # pylint: disable=invalid-unary-operand-type
-
-        raise NotImplementedError
-
-    def _wildcard_query(self, quantity, wildcard):
-        return Q('wildcard', **{self._field(quantity): dict(value=wildcard)})
-
-    def contains_op(self, args):
-        quantity, value = args
-        return self._wildcard_query(quantity, '*%s*' % value)
-
-    def starts_op(self, args):
-        quantity, value = args
-        return self._wildcard_query(quantity, '%s*' % value)
-
-    def ends_op(self, args):
-        quantity, value = args
-        return self._wildcard_query(quantity, '*%s' % value)
-
-    def list(self, args):
-        return list(args)
-
-    def quantity_tuple(self, args):
-        return list(args)
-
-    def predicate_tuple(self, args):
-        return list(args)
-
-    def predicate(self, args):
-        if len(args) == 1:
-            return '', args[0]
-        else:
-            return args[0], args[1]
-
-    def quantity(self, args):
-        quantity_name = args[0]
-        quantity_def = OptimadeStructureEntry.m_section.quantities.get(quantity_name, None)
-
-        if quantity_def is None:
-            raise Exception('%s is not a known quantity' % quantity_name)
-
-        elastic_annotation = quantity_def.m_annotations.get('elastic', None)
-        if elastic_annotation is None:
-            raise Exception('%s is not supported in queries' % quantity_name)
-
-        return quantity_def
-
-    def int_literal(self, args):
-        return int(args[0])
-
-    def float_literal(self, args):
-        return float(args[0])
-
-    def string_literal(self, args):
-        return args[0].strip('"')
-
-
-def test_optimade_parser(published: Upload):
-    p = LarkParser(version=(0, 10, 0))
-    tree = p.parse('''
-        LENGTH elements > 2 AND
-        elements:elements_ratios HAS ALL "H":>0.66,"H":<0.67 AND
-        elements:elements_ratios:elements_ratios HAS ALL "O":>0.33:<0.34 AND
-        (chemical_formula_reduced IS UNKNOWN OR chemical_formula_reduced CONTAINS "H2") AND
-        elements HAS ONLY "O", "H" AND
-        LENGTH dimension_types = 0
-    ''')
-    transformer = ESTransformer()
-    query = transformer.transform(tree)
-
-    result = SearchRequest(query=query).execute_paginated()
-    print(result)
+def create_test_structure(meta_info, id: int, h: int, o: int, extra: List[str], periodicity: int):
+    atom_labels = ['H' for i in range(0, h)] + ['O' for i in range(0, o)] + extra
+    test_vector = np.array([0, 0, 0])
+
+    backend = LocalBackend(meta_info, False, True)  # type: ignore
+    backend.openSection('section_run')
+    backend.addValue('program_name', 'test_code')
+    backend.openSection('section_system')
+
+    backend.addArrayValues('atom_labels', np.array(atom_labels))
+    backend.addArrayValues(
+        'atom_positions', np.array([test_vector for i in range(0, len(atom_labels))]))
+    backend.addArrayValues(
+        'lattice_vectors', np.array([test_vector, test_vector, test_vector]))
+    backend.addArrayValues(
+        'configuration_periodic_dimensions',
+        np.array([True for _ in range(0, periodicity)] + [False for _ in range(periodicity, 3)]))
+
+    backend.closeSection('section_system', 0)
+    backend.closeSection('section_run', 0)
+
+    backend = run_normalize(backend)
+    calc = CalcWithMetadata(
+        upload_id='test_uload_id', calc_id='test_calc_id_%d' % id, mainfile='test_mainfile')
+    calc.apply_domain_metadata(backend)
+    search.Entry.from_calc_with_metadata(calc).save()
+
+
+@pytest.fixture(scope='module')
+def example_structures(meta_info, elastic_infra):
+    clear_elastic(elastic_infra)
+    create_test_structure(meta_info, 1, 2, 1, [], 0)
+    create_test_structure(meta_info, 2, 2, 1, ['C'], 0)
+    create_test_structure(meta_info, 3, 2, 1, [], 1)
+    create_test_structure(meta_info, 4, 1, 1, [], 0)
+    search.refresh()
+
+    yield
+    clear_elastic(elastic_infra)
+
+
+@pytest.mark.parametrize('query, results', [
+    ('nelements > 1', 4),
+    ('nelements >= 2', 4),
+    ('nelements > 2', 1),
+    ('nelements < 4', 4),
+    ('nelements < 3', 3),
+    ('nelements <= 3', 4),
+    ('nelements != 2', 1),
+    ('1 < nelements', 4),
+    ('elements HAS "H"', 4),
+    ('elements HAS ALL "H", "O"', 4),
+    ('elements HAS ALL "H", "C"', 1),
+    ('elements HAS ANY "H", "C"', 4),
+    ('elements HAS ANY "C"', 1),
+    ('elements HAS ONLY "C"', 0),
+    ('elements HAS ONLY "H", "O"', 3),
+    ('elements:elements_ratios HAS "H":>0.66', 2),
+    ('elements:elements_ratios HAS ALL "O":>0.33', 3),
+    ('elements:elements_ratios HAS ALL "O":>0.33,"O":<0.34', 2),
+    ('elements IS KNOWN', 4),
+    ('elements IS UNKNOWN', 0),
+    ('chemical_formula_reduced = "H2O"', 2),
+    ('chemical_formula_reduced CONTAINS "H2"', 3),
+    ('chemical_formula_reduced CONTAINS "H"', 4),
+    ('chemical_formula_reduced CONTAINS "C"', 1),
+    ('chemical_formula_reduced STARTS "H2"', 3),
+    ('chemical_formula_reduced STARTS WITH "H2"', 3),
+    ('chemical_formula_reduced ENDS WITH "C"', 1),
+    ('chemical_formula_reduced ENDS "C"', 1),
+    ('LENGTH elements = 2', 3),
+    ('LENGTH elements = 3', 1),
+    ('LENGTH dimension_types = 0', 3),
+    ('LENGTH dimension_types = 1', 1),
+    ('nelements = 2 AND LENGTH dimension_types = 1', 1),
+    ('nelements = 3 AND LENGTH dimension_types = 1', 0),
+    ('nelements = 3 OR LENGTH dimension_types = 1', 2),
+    ('nelements > 1 OR LENGTH dimension_types = 1 AND nelements = 2', 4),
+    ('(nelements > 1 OR LENGTH dimension_types = 1) AND nelements = 2', 3),
+    ('NOT LENGTH dimension_types = 1', 3)
+])
+def test_optimade_parser(example_structures, query, results):
+    query = parse_filter(query)
+    result = search.SearchRequest(query=query).execute_paginated()
+    assert result['pagination']['total'] == results
diff --git a/tests/conftest.py b/tests/conftest.py
index 4af92210b6..a5781ea80e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -30,6 +30,9 @@ import base64
 from bravado.client import SwaggerClient
 import elasticsearch.exceptions
 
+from nomadcore.local_meta_info import loadJsonFile
+import nomad_meta_info
+
 from nomad import config, infrastructure, parsing, processing, coe_repo, app
 
 from tests import test_parsing, test_normalizing
@@ -281,6 +284,14 @@ def postgres(postgres_infra):
     yield postgres_infra
 
 
+@pytest.fixture(scope='session')
+def meta_info():
+    file_dir = os.path.dirname(os.path.abspath(nomad_meta_info.__file__))
+    path = os.path.join(file_dir, 'all.nomadmetainfo.json')
+    meta_info, _ = loadJsonFile(path)
+    return meta_info
+
+
 @pytest.fixture(scope='module')
 def test_user(postgres_infra):
     from nomad import coe_repo
diff --git a/tests/test_parsing.py b/tests/test_parsing.py
index 021ce32391..93f24fe8f0 100644
--- a/tests/test_parsing.py
+++ b/tests/test_parsing.py
@@ -18,9 +18,6 @@ import numpy as np
 import pytest
 import os
 
-from nomadcore.local_meta_info import loadJsonFile
-import nomad_meta_info
-
 from nomad import utils, files
 from nomad.parsing import JSONStreamWriter, parser_dict, match_parser, BrokenParser
 from nomad.parsing import LocalBackend, BadContextURI
@@ -82,13 +79,6 @@ correct_num_output_files = 43
 
 class TestLocalBackend(object):
 
-    @pytest.fixture(scope='session')
-    def meta_info(self):
-        file_dir = os.path.dirname(os.path.abspath(nomad_meta_info.__file__))
-        path = os.path.join(file_dir, 'all.nomadmetainfo.json')
-        meta_info, _ = loadJsonFile(path)
-        return meta_info
-
     @pytest.fixture(scope='function')
     def backend(self, meta_info):
         return LocalBackend(meta_info, debug=True)
@@ -298,7 +288,6 @@ def parsed_template_example() -> LocalBackend:
         'parsers/template', 'tests/data/parsers/template.json')
 
 
-# Function used by normalizer tests.
 def parse_file(parser_name_and_mainfile) -> LocalBackend:
     parser_name, mainfile = parser_name_and_mainfile
     return run_parser(parser_name, mainfile)
-- 
GitLab