Commit db4cb204 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added _nmd_ properties to optimade. #325 #362

parent f6443458
Pipeline #76918 passed with stages
in 20 minutes and 38 seconds
......@@ -198,7 +198,8 @@ def apply_search_parameters(search_request: search.SearchRequest, args: Dict[str
try:
optimade = args.get('dft.optimade', None)
if optimade is not None:
q = filterparser.parse_filter(optimade)
q = filterparser.parse_filter(
optimade, nomad_properties=domain, without_prefix=True)
search_request.query(q)
except filterparser.FilterException as e:
abort(400, 'Could not parse optimade query: %s' % (str(e)))
......
......@@ -22,10 +22,10 @@ from nomad.datamodel import OptimadeEntry
from .api import api, url, base_request_args
from .models import json_api_single_response_model, entry_listing_endpoint_parser, Meta, \
Links as LinksModel, CalculationDataObject, single_entry_endpoint_parser, base_endpoint_parser, \
json_api_info_response_model, json_api_list_response_model, StructureObject, \
ToplevelLinks, \
json_api_structure_response_model, json_api_structures_response_model
Links as LinksModel, single_entry_endpoint_parser, base_endpoint_parser, \
json_api_info_response_model, json_api_list_response_model, EntryDataObject, \
ToplevelLinks, get_entry_properties, json_api_structure_response_model, \
json_api_structures_response_model
from .filterparser import parse_filter, FilterException
ns = api.namespace('v0', description='The version v0 API namespace with all OPTiMaDe endpoints.')
......@@ -100,7 +100,7 @@ class CalculationList(Resource):
available = result['pagination']['total']
results = to_calc_with_metadata(result['results'])
assert len(results) == len(result['results']), 'Mongodb and elasticsearch are not consistent'
assert len(results) == len(result['results']), 'archive and elasticsearch are not consistent'
return dict(
meta=Meta(
......@@ -114,7 +114,7 @@ class CalculationList(Resource):
page_number=page_number,
page_limit=page_limit,
sort=sort, filter=filter),
data=[CalculationDataObject(d, request_fields=request_fields) for d in results]
data=[EntryDataObject(d, request_fields=request_fields) for d in results]
), 200
......@@ -143,7 +143,7 @@ class Calculation(Resource):
return dict(
meta=Meta(query=request.url, returned=1),
data=CalculationDataObject(results[0], request_fields=request_fields)
data=EntryDataObject(results[0], request_fields=request_fields)
), 200
......@@ -159,9 +159,7 @@ class CalculationInfo(Resource):
result = {
'description': 'a calculation entry',
'properties': {
attr.name: dict(description=attr.description)
for attr in OptimadeEntry.m_def.all_properties.values()},
'properties': get_entry_properties(),
'formats': ['json'],
'output_fields_by_format': {
'json': list(OptimadeEntry.m_def.all_properties.keys())}
......@@ -343,7 +341,7 @@ class StructureList(Resource):
page_limit=page_limit,
sort=sort, filter=filter
),
data=[StructureObject(d, request_fields) for d in results]
data=[EntryDataObject(d, request_fields) for d in results]
), 200
......@@ -372,7 +370,7 @@ class Structure(Resource):
return dict(
meta=Meta(query=request.url, returned=1),
data=StructureObject(results[0], request_fields=request_fields)
data=EntryDataObject(results[0], request_fields=request_fields)
), 200
......@@ -388,9 +386,7 @@ class StructuresInfo(Resource):
result = {
'description': 'a structure entry',
'properties': {
attr.name: dict(description=attr.description)
for attr in OptimadeEntry.m_def.all_properties.values()},
'properties': get_entry_properties(),
'formats': ['json'],
'output_fields_by_format': {
'json': list(OptimadeEntry.m_def.all_properties.keys())}
......
......@@ -14,50 +14,75 @@
from typing import Dict
from elasticsearch_dsl import Q
from cachetools import cached
from optimade.filterparser import LarkParser
from optimade.filtertransformers.elasticsearch import ElasticTransformer, Quantity
from nomad.search import search_quantities
_parser = LarkParser(version=(0, 10, 1))
class FilterException(Exception):
''' Raised on parsing a filter expression with syntactic of semantic errors. '''
pass
_quantities: Dict[str, Quantity] = None
_parser = LarkParser(version=(0, 10, 1))
_transformer = None
@cached(cache={})
def _get_transformer(nomad_properties, without_prefix):
from nomad.datamodel import OptimadeEntry
quantities: Dict[str, Quantity] = {
q.name: Quantity(
q.name, es_field='dft.optimade.%s' % q.name,
elastic_mapping_type=q.a_search.mapping.__class__)
for q in OptimadeEntry.m_def.all_quantities.values()
if 'search' in q.m_annotations}
quantities['elements'].length_quantity = quantities['nelements']
quantities['dimension_types'].length_quantity = quantities['dimension_types']
quantities['elements'].has_only_quantity = Quantity(name='only_atoms')
quantities['elements'].nested_quantity = quantities['elements_ratios']
quantities['elements_ratios'].nested_quantity = quantities['elements_ratios']
if nomad_properties is not None:
for search_quantity in search_quantities.values():
name = search_quantity.name
if '.' in name:
if name.startswith(nomad_properties):
name = name[len(nomad_properties) + 1:]
else:
continue
names = ['_nmd_' + name]
if without_prefix:
names.append(name)
for name in names:
if name not in quantities:
quantities[name] = Quantity(
name,
es_field=search_quantity.search_field,
elastic_mapping_type=search_quantity.mapping.__class__)
return ElasticTransformer(quantities=quantities.values())
def parse_filter(filter_str: str) -> Q:
def parse_filter(filter_str: str, nomad_properties='dft', without_prefix=False) -> Q:
''' Parses the given optimade filter str and returns a suitable elastic search query.
Arguments:
filter_str: Can be direct user input with no prior processing.
nomad_properties: Also include the nomad proprietary properties of the given domain.
without_prefix: Do not prefix the nomad proprietary properties with _nmd_.
Raises:
FilterException: If the given str cannot be parsed, or if there are any semantic
errors in the given expression.
'''
global _quantities
global _transformer
if _quantities is None:
from nomad.datamodel import OptimadeEntry
_quantities = {
q.name: Quantity(
q.name, es_field='dft.optimade.%s' % q.name,
elastic_mapping_type=q.a_search.mapping.__class__)
for q in OptimadeEntry.m_def.all_quantities.values()
if 'search' in q.m_annotations}
_quantities['elements'].length_quantity = _quantities['nelements']
_quantities['dimension_types'].length_quantity = _quantities['dimension_types']
_quantities['elements'].has_only_quantity = Quantity(name='only_atoms')
_quantities['elements'].nested_quantity = _quantities['elements_ratios']
_quantities['elements_ratios'].nested_quantity = _quantities['elements_ratios']
_transformer = ElasticTransformer(quantities=_quantities.values())
transformer = _get_transformer(nomad_properties, without_prefix)
try:
parse_tree = _parser.parse(filter_str)
......@@ -65,7 +90,7 @@ def parse_filter(filter_str: str) -> Q:
raise FilterException('Syntax error: %s' % str(e))
try:
query = _transformer.transform(parse_tree)
query = transformer.transform(parse_tree)
except Exception as e:
raise FilterException('Semantic error: %s' % str(e))
......
......@@ -20,10 +20,13 @@ from typing import Set
from flask_restplus import fields
import datetime
import math
from cachetools import cached
from nomad import config
from nomad.app.common import RFC3339DateTime
from nomad.datamodel import EntryMetadata
from nomad.datamodel.dft import DFTMetadata
from nomad.datamodel.optimade import OptimadeEntry
from .api import api, url
......@@ -267,7 +270,24 @@ json_api_resource_model = api.model('Resource', {
})
class CalculationDataObject:
@cached({})
def get_entry_properties():
properties = {
attr.name: dict(description=attr.description)
for attr in OptimadeEntry.m_def.all_properties.values()}
def add_nmd_properties(prefix, section_cls):
for quantity in section_cls.m_def.all_quantities.values():
name = prefix + quantity.name
properties[name] = dict(description=quantity.description)
add_nmd_properties('_nmd_', EntryMetadata)
add_nmd_properties('_nmd_dft_', DFTMetadata)
return properties
class EntryDataObject:
def __init__(self, calc: EntryMetadata, request_fields: Set[str] = None):
def include(key):
......@@ -280,20 +300,21 @@ class CalculationDataObject:
attrs['immutable_id'] = calc.calc_id
attrs['last_modified'] = calc.last_processing if calc.last_processing is not None else calc.upload_time
self.type = 'calculation'
self.id = calc.calc_id
self.attributes = attrs
if request_fields is not None:
for request_field in request_fields:
if not request_field.startswith('_nmd_'):
continue
class StructureObject:
def __init__(self, calc: EntryMetadata, request_fields: Set[str] = None):
optimade_quantities = calc.dft.optimade.m_to_dict()
try:
if request_field.startswith('_nmd_dft_'):
attrs[request_field] = getattr(calc.dft, request_field[9:])
else:
attrs[request_field] = getattr(calc, request_field[5:])
except AttributeError:
# if unknown properties where provided, we will ignore them
pass
attrs = {key: val for key, val in optimade_quantities.items() if request_fields is None or key in request_fields}
attrs['immutable_id'] = calc.calc_id
attrs['last_modified'] = calc.last_processing if calc.last_processing is not None else calc.upload_time
self.type = 'structure'
self.type = 'calculation'
self.id = calc.calc_id
self.attributes = attrs
......
......@@ -224,13 +224,17 @@ def test_base_info_endpoint(api):
assert data['data']['id'] == '/'
def test_calculation_info_endpoint(api):
rv = api.get('/info/calculations')
@pytest.mark.parametrize('entry_type', ['calculations', 'structures'])
def test_entry_info_endpoint(api, entry_type):
rv = api.get('/info/%s' % entry_type)
assert rv.status_code == 200
data = json.loads(rv.data)
for key in ['description', 'properties', 'formats', 'output_fields_by_format']:
assert key in data['data']
assert '_nmd_atoms' in data['data']['properties']
assert '_nmd_dft_system' in data['data']['properties']
# TODO the implementation should be fixed to return actual references first
# def test_references_endpoint(api, example_structures):
......@@ -277,3 +281,15 @@ def test_structure_endpoint(api, example_structures):
assert attr is not None
assert attr.get('elements') == ['H', 'O']
assert len(attr.get('dimension_types')) == 3
def test_nmd_properties(api, example_structures):
rv = api.get('/structures/%s' % 'test_calc_id_1?request_fields=_nmd_atoms,_nmd_dft_system,_nmd_doesnotexist')
assert rv.status_code == 200
data = json.loads(rv.data)
assert data.get('data') is not None
attr = data['data'].get('attributes')
assert attr is not None
assert attr.get('_nmd_atoms') == ['H', 'O']
assert '_nmd_dft_system' in attr
assert '_nmd_doesnotexist' not in attr
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment