Commit 0fb3e3e3 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Refactored dcat arg parsing. Hydra is now sibling of catalog.

parent e67118fd
Pipeline #90843 passed with stages
in 29 minutes and 53 seconds
# Copyright 2018 Markus Scheidgen
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -7,30 +10,30 @@
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from flask import Blueprint
from flask_restplus import Api
from flask import Blueprint, Response
from flask_restplus import Api, reqparse
import urllib.parse
from rdflib import Graph
from nomad import config
blueprint = Blueprint('dcat', __name__)
base_url = 'https://%s/%s/dcat' % (
config.services.api_host.strip('/'),
config.services.api_base_path.strip('/'))
base_url = config.api_url(api='dcat')
def url(*args, **kwargs):
''' Returns the full dcat api url for the given path (args) and query (kwargs) parameters. '''
url = base_url + '/' + '/'.join(args)
url = f'{base_url.rstrip("/")}/{"/".join(args).lstrip("/")}'
if len(kwargs) > 0:
return '%s?%s' % (url, urllib.parse.urlencode(kwargs))
return f'{url}?{urllib.parse.urlencode(kwargs)}'
else:
return url
......@@ -48,3 +51,24 @@ api = Api(
def errorhandler(error):
'''When an internal server error is caused by an unexpected exception.'''
return str(error)
arg_parser = reqparse.RequestParser()
arg_parser.add_argument('format', type=str, choices=[
'xml',
'n3',
'turtle',
'nt',
'pretty-xml',
'trig'])
def rdf_respose(g: Graph) -> Response:
args = arg_parser.parse_args()
format_ = args.get('format')
if format_ is None:
format_ = 'pretty-xml'
content_type = 'application/xml' if format in ['xml', 'pretty-xml'] else 'text/%s' % format_
return Response(
g.serialize(format=format_).decode('utf-8'), 200,
{'Content-Type': content_type})
# Copyright 2018 Markus Scheidgen
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -7,32 +10,25 @@
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from flask_restplus import Resource, reqparse, fields
from flask import Response
from flask_restplus import Resource, fields
from elasticsearch_dsl import Q
from nomad import search
from .api import api
from .api import api, arg_parser, rdf_respose
from .mapping import Mapping
ns = api.namespace('catalog', description='The API for DCAT catalog.')
iso8601 = fields.DateTime(dt_format='iso8601')
arg_parser = reqparse.RequestParser()
arg_parser.add_argument('format', type=str, choices=[
'xml',
'n3',
'turtle',
'nt',
'pretty-xml',
'trig'])
arg_parser = arg_parser.copy()
arg_parser.add_argument('after', type=str)
arg_parser.add_argument(
'modified_since', type=lambda x: iso8601.parse(x),
......@@ -51,13 +47,10 @@ class Catalog(Resource):
''' Returns a page of DCAT datasets. '''
args = arg_parser.parse_args()
format_ = args.get('format')
if format_ is None:
format_ = 'xml'
modified_since = args.get('modified_since', None)
modified_since = iso8601.parse(modified_since) if modified_since is not None else None
after = args.get('after', None)
after = args.get('after', '')
if after is None:
after = ''
search_request = search.SearchRequest().owner('public')
if modified_since is not None:
......@@ -67,14 +60,10 @@ class Catalog(Resource):
search_request.q &= modified_clause
es_search = search_request._search.query(search_request.q)
if after is not None:
if after is not '':
es_search = es_search.extra(search_after=[after], sort='calc_id')
es_response = es_search.execute()
mapping = Mapping()
mapping.map_catalog(es_response.hits)
content_type = 'application/xml' if format_ == 'xml' else 'text/%s' % format_
return Response(
mapping.g.serialize(format=format_).decode('utf-8'), 200,
{'Content-Type': content_type})
mapping.map_catalog(es_response.hits, after, modified_since)
return rdf_respose(mapping.g)
# Copyright 2018 Markus Scheidgen
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -7,33 +10,22 @@
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from flask_restplus import Resource, abort, reqparse
from flask import Response
#
from flask_restplus import Resource, abort
from elasticsearch.exceptions import NotFoundError
from nomad import search
from .api import api
from .api import api, arg_parser, rdf_respose
from .mapping import Mapping
ns = api.namespace('datasets', description='The API for DCAT datasets.')
arg_parser = reqparse.RequestParser()
arg_parser.add_argument('format', type=str, choices=[
'xml',
'n3',
'turtle',
'nt',
'pretty-xml',
'trig'])
@ns.route('/<string:entry_id>')
class Dataset(Resource):
@api.doc('get_dcat_dataset')
......@@ -44,11 +36,6 @@ class Dataset(Resource):
@api.response(200, 'Data send', headers={'Content-Type': 'application/xml'})
def get(self, entry_id):
''' Returns a DCAT dataset for a given NOMAD entry id. '''
format_ = arg_parser.parse_args().get('format')
if format_ is None:
format_ = 'xml'
try:
entry = search.entry_document.get(entry_id)
except NotFoundError:
......@@ -59,7 +46,4 @@ class Dataset(Resource):
mapping = Mapping()
mapping.map_entry(entry)
content_type = 'application/xml' if format_ == 'xml' else 'text/%s' % format_
return Response(
mapping.g.serialize(format=format_).decode('utf-8'), 200,
{'Content-Type': content_type})
return rdf_respose(mapping.g)
# Copyright 2020 Markus Scheidgen
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -7,13 +10,14 @@
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from rdflib import Graph, Literal, RDF, URIRef, BNode
from rdflib.namespace import Namespace, DCAT, DCTERMS as DCT, FOAF
from rdflib.namespace import Namespace, DCAT, DCTERMS as DCT, FOAF, RDF
from nomad import config
from nomad.datamodel import User
......@@ -22,7 +26,6 @@ from nomad.datamodel import EntryMetadata, User
from .api import url
VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
HYDRA = Namespace('http://www.w3.org/ns/hydra/core#')
......@@ -37,6 +40,7 @@ def get_optional_entry_prop(entry, name):
class Mapping():
def __init__(self):
self.g = Graph()
self.g.namespace_manager.bind('rdf', RDF)
self.g.namespace_manager.bind('dcat', DCAT)
self.g.namespace_manager.bind('dct', DCT)
self.g.namespace_manager.bind('vcard', VCARD)
......@@ -45,25 +49,32 @@ class Mapping():
self.persons = {}
def map_catalog(self, entries):
catalog = URIRef(url('catalog'))
def map_catalog(self, entries, after: str, modified_since):
def uri_ref(after):
kwargs = dict()
if after is not None:
kwargs['after'] = after
if modified_since is not None:
kwargs['modified_since'] = modified_since.strftime('%Y-%m-%d')
return URIRef(url('catalog', **kwargs))
after = after.strip()
catalog = uri_ref(after=None)
self.g.add((catalog, RDF.type, DCAT.Catalog))
last_entry = None
for entry in entries:
self.g.add((catalog, DCT.dataset, self.map_entry(entry, slim=True)))
last_entry = entry
hydra_collection = BNode()
hydra_collection = uri_ref(after)
self.g.add((hydra_collection, RDF.type, HYDRA.Collection))
self.g.add((hydra_collection, HYDRA.totalItems, Literal(entries.total)))
self.g.add((
hydra_collection, HYDRA.first,
URIRef('%s/catalog' % config.api_url(api='dcat'), last_entry.calc_id)))
self.g.add((hydra_collection, HYDRA.first, uri_ref('')))
if last_entry is not None:
next_url = '%s/catalog?after=%s' % (config.api_url(api='dcat'), last_entry.calc_id)
self.g.add((hydra_collection, HYDRA.next, URIRef(next_url)))
self.g.add((hydra_collection, HYDRA.next, uri_ref(last_entry.calc_id)))
self.g.add((catalog, HYDRA.collection, hydra_collection))
self.g.add((hydra_collection, RDF.type, HYDRA.collection))
def map_entry(self, entry: EntryMetadata, slim=False):
dataset = URIRef(url('datasets', entry.calc_id))
......
# Copyright 2018 Markus Scheidgen
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -7,10 +10,11 @@
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pytest
from datetime import datetime
......@@ -62,7 +66,11 @@ def test_get_dataset(elastic_infra, api, example_entry):
clear_elastic(elastic_infra)
def test_get_catalog(elastic_infra, api, example_entry):
@pytest.mark.parametrize('after,modified_since', [
(None, None),
(None, '2020-01-07'),
('test-id-3', '2020-01-07')])
def test_get_catalog(elastic_infra, api, example_entry, after, modified_since):
clear_elastic(elastic_infra)
for i in range(1, 11):
......@@ -73,7 +81,12 @@ def test_get_catalog(elastic_infra, api, example_entry):
infrastructure.elastic_client.indices.refresh(index=config.elastic.index_name)
rv = api.get('/catalog/?after=test-id-3&modified_since=2020-01-07&format=nt')
url = '/catalog/?format=turtle'
if after:
url += '&after=' + after
if modified_since:
url += '&modified_since=' + modified_since
rv = api.get(url)
assert rv.status_code == 200
clear_elastic(elastic_infra)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment