Commit 0fb3e3e3 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Refactored dcat arg parsing. Hydra is now sibling of catalog.

parent e67118fd
Pipeline #90843 passed with stages
in 29 minutes and 53 seconds
# Copyright 2018 Markus Scheidgen #
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#
from flask import Blueprint from flask import Blueprint, Response
from flask_restplus import Api from flask_restplus import Api, reqparse
import urllib.parse import urllib.parse
from rdflib import Graph
from nomad import config from nomad import config
blueprint = Blueprint('dcat', __name__) blueprint = Blueprint('dcat', __name__)
base_url = 'https://%s/%s/dcat' % ( base_url = config.api_url(api='dcat')
config.services.api_host.strip('/'),
config.services.api_base_path.strip('/'))
def url(*args, **kwargs): def url(*args, **kwargs):
''' Returns the full dcat api url for the given path (args) and query (kwargs) parameters. ''' ''' Returns the full dcat api url for the given path (args) and query (kwargs) parameters. '''
url = base_url + '/' + '/'.join(args) url = f'{base_url.rstrip("/")}/{"/".join(args).lstrip("/")}'
if len(kwargs) > 0: if len(kwargs) > 0:
return '%s?%s' % (url, urllib.parse.urlencode(kwargs)) return f'{url}?{urllib.parse.urlencode(kwargs)}'
else: else:
return url return url
...@@ -48,3 +51,24 @@ api = Api( ...@@ -48,3 +51,24 @@ api = Api(
def errorhandler(error): def errorhandler(error):
'''When an internal server error is caused by an unexpected exception.''' '''When an internal server error is caused by an unexpected exception.'''
return str(error) return str(error)
arg_parser = reqparse.RequestParser()
arg_parser.add_argument('format', type=str, choices=[
'xml',
'n3',
'turtle',
'nt',
'pretty-xml',
'trig'])
def rdf_respose(g: Graph) -> Response:
args = arg_parser.parse_args()
format_ = args.get('format')
if format_ is None:
format_ = 'pretty-xml'
content_type = 'application/xml' if format in ['xml', 'pretty-xml'] else 'text/%s' % format_
return Response(
g.serialize(format=format_).decode('utf-8'), 200,
{'Content-Type': content_type})
# Copyright 2018 Markus Scheidgen #
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#
from flask_restplus import Resource, reqparse, fields from flask_restplus import Resource, fields
from flask import Response
from elasticsearch_dsl import Q from elasticsearch_dsl import Q
from nomad import search from nomad import search
from .api import api from .api import api, arg_parser, rdf_respose
from .mapping import Mapping from .mapping import Mapping
ns = api.namespace('catalog', description='The API for DCAT catalog.') ns = api.namespace('catalog', description='The API for DCAT catalog.')
iso8601 = fields.DateTime(dt_format='iso8601') iso8601 = fields.DateTime(dt_format='iso8601')
arg_parser = reqparse.RequestParser() arg_parser = arg_parser.copy()
arg_parser.add_argument('format', type=str, choices=[
'xml',
'n3',
'turtle',
'nt',
'pretty-xml',
'trig'])
arg_parser.add_argument('after', type=str) arg_parser.add_argument('after', type=str)
arg_parser.add_argument( arg_parser.add_argument(
'modified_since', type=lambda x: iso8601.parse(x), 'modified_since', type=lambda x: iso8601.parse(x),
...@@ -51,13 +47,10 @@ class Catalog(Resource): ...@@ -51,13 +47,10 @@ class Catalog(Resource):
''' Returns a page of DCAT datasets. ''' ''' Returns a page of DCAT datasets. '''
args = arg_parser.parse_args() args = arg_parser.parse_args()
format_ = args.get('format')
if format_ is None:
format_ = 'xml'
modified_since = args.get('modified_since', None) modified_since = args.get('modified_since', None)
modified_since = iso8601.parse(modified_since) if modified_since is not None else None after = args.get('after', '')
after = args.get('after', None) if after is None:
after = ''
search_request = search.SearchRequest().owner('public') search_request = search.SearchRequest().owner('public')
if modified_since is not None: if modified_since is not None:
...@@ -67,14 +60,10 @@ class Catalog(Resource): ...@@ -67,14 +60,10 @@ class Catalog(Resource):
search_request.q &= modified_clause search_request.q &= modified_clause
es_search = search_request._search.query(search_request.q) es_search = search_request._search.query(search_request.q)
if after is not None: if after is not '':
es_search = es_search.extra(search_after=[after], sort='calc_id') es_search = es_search.extra(search_after=[after], sort='calc_id')
es_response = es_search.execute() es_response = es_search.execute()
mapping = Mapping() mapping = Mapping()
mapping.map_catalog(es_response.hits) mapping.map_catalog(es_response.hits, after, modified_since)
content_type = 'application/xml' if format_ == 'xml' else 'text/%s' % format_ return rdf_respose(mapping.g)
return Response(
mapping.g.serialize(format=format_).decode('utf-8'), 200,
{'Content-Type': content_type})
# Copyright 2018 Markus Scheidgen #
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#
from flask_restplus import Resource, abort, reqparse from flask_restplus import Resource, abort
from flask import Response
from elasticsearch.exceptions import NotFoundError from elasticsearch.exceptions import NotFoundError
from nomad import search from nomad import search
from .api import api from .api import api, arg_parser, rdf_respose
from .mapping import Mapping from .mapping import Mapping
ns = api.namespace('datasets', description='The API for DCAT datasets.') ns = api.namespace('datasets', description='The API for DCAT datasets.')
arg_parser = reqparse.RequestParser()
arg_parser.add_argument('format', type=str, choices=[
'xml',
'n3',
'turtle',
'nt',
'pretty-xml',
'trig'])
@ns.route('/<string:entry_id>') @ns.route('/<string:entry_id>')
class Dataset(Resource): class Dataset(Resource):
@api.doc('get_dcat_dataset') @api.doc('get_dcat_dataset')
...@@ -44,11 +36,6 @@ class Dataset(Resource): ...@@ -44,11 +36,6 @@ class Dataset(Resource):
@api.response(200, 'Data send', headers={'Content-Type': 'application/xml'}) @api.response(200, 'Data send', headers={'Content-Type': 'application/xml'})
def get(self, entry_id): def get(self, entry_id):
''' Returns a DCAT dataset for a given NOMAD entry id. ''' ''' Returns a DCAT dataset for a given NOMAD entry id. '''
format_ = arg_parser.parse_args().get('format')
if format_ is None:
format_ = 'xml'
try: try:
entry = search.entry_document.get(entry_id) entry = search.entry_document.get(entry_id)
except NotFoundError: except NotFoundError:
...@@ -59,7 +46,4 @@ class Dataset(Resource): ...@@ -59,7 +46,4 @@ class Dataset(Resource):
mapping = Mapping() mapping = Mapping()
mapping.map_entry(entry) mapping.map_entry(entry)
content_type = 'application/xml' if format_ == 'xml' else 'text/%s' % format_ return rdf_respose(mapping.g)
return Response(
mapping.g.serialize(format=format_).decode('utf-8'), 200,
{'Content-Type': content_type})
# Copyright 2020 Markus Scheidgen #
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#
from rdflib import Graph, Literal, RDF, URIRef, BNode from rdflib import Graph, Literal, RDF, URIRef, BNode
from rdflib.namespace import Namespace, DCAT, DCTERMS as DCT, FOAF from rdflib.namespace import Namespace, DCAT, DCTERMS as DCT, FOAF, RDF
from nomad import config from nomad import config
from nomad.datamodel import User from nomad.datamodel import User
...@@ -22,7 +26,6 @@ from nomad.datamodel import EntryMetadata, User ...@@ -22,7 +26,6 @@ from nomad.datamodel import EntryMetadata, User
from .api import url from .api import url
VCARD = Namespace('http://www.w3.org/2006/vcard/ns#') VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
HYDRA = Namespace('http://www.w3.org/ns/hydra/core#') HYDRA = Namespace('http://www.w3.org/ns/hydra/core#')
...@@ -37,6 +40,7 @@ def get_optional_entry_prop(entry, name): ...@@ -37,6 +40,7 @@ def get_optional_entry_prop(entry, name):
class Mapping(): class Mapping():
def __init__(self): def __init__(self):
self.g = Graph() self.g = Graph()
self.g.namespace_manager.bind('rdf', RDF)
self.g.namespace_manager.bind('dcat', DCAT) self.g.namespace_manager.bind('dcat', DCAT)
self.g.namespace_manager.bind('dct', DCT) self.g.namespace_manager.bind('dct', DCT)
self.g.namespace_manager.bind('vcard', VCARD) self.g.namespace_manager.bind('vcard', VCARD)
...@@ -45,25 +49,32 @@ class Mapping(): ...@@ -45,25 +49,32 @@ class Mapping():
self.persons = {} self.persons = {}
def map_catalog(self, entries): def map_catalog(self, entries, after: str, modified_since):
catalog = URIRef(url('catalog')) def uri_ref(after):
kwargs = dict()
if after is not None:
kwargs['after'] = after
if modified_since is not None:
kwargs['modified_since'] = modified_since.strftime('%Y-%m-%d')
return URIRef(url('catalog', **kwargs))
after = after.strip()
catalog = uri_ref(after=None)
self.g.add((catalog, RDF.type, DCAT.Catalog)) self.g.add((catalog, RDF.type, DCAT.Catalog))
last_entry = None last_entry = None
for entry in entries: for entry in entries:
self.g.add((catalog, DCT.dataset, self.map_entry(entry, slim=True))) self.g.add((catalog, DCT.dataset, self.map_entry(entry, slim=True)))
last_entry = entry last_entry = entry
hydra_collection = BNode() hydra_collection = uri_ref(after)
self.g.add((hydra_collection, RDF.type, HYDRA.Collection)) self.g.add((hydra_collection, RDF.type, HYDRA.Collection))
self.g.add((hydra_collection, HYDRA.totalItems, Literal(entries.total))) self.g.add((hydra_collection, HYDRA.totalItems, Literal(entries.total)))
self.g.add(( self.g.add((hydra_collection, HYDRA.first, uri_ref('')))
hydra_collection, HYDRA.first,
URIRef('%s/catalog' % config.api_url(api='dcat'), last_entry.calc_id)))
if last_entry is not None: if last_entry is not None:
next_url = '%s/catalog?after=%s' % (config.api_url(api='dcat'), last_entry.calc_id) self.g.add((hydra_collection, HYDRA.next, uri_ref(last_entry.calc_id)))
self.g.add((hydra_collection, HYDRA.next, URIRef(next_url)))
self.g.add((catalog, HYDRA.collection, hydra_collection)) self.g.add((hydra_collection, RDF.type, HYDRA.collection))
def map_entry(self, entry: EntryMetadata, slim=False): def map_entry(self, entry: EntryMetadata, slim=False):
dataset = URIRef(url('datasets', entry.calc_id)) dataset = URIRef(url('datasets', entry.calc_id))
......
# Copyright 2018 Markus Scheidgen #
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#
import pytest import pytest
from datetime import datetime from datetime import datetime
...@@ -62,7 +66,11 @@ def test_get_dataset(elastic_infra, api, example_entry): ...@@ -62,7 +66,11 @@ def test_get_dataset(elastic_infra, api, example_entry):
clear_elastic(elastic_infra) clear_elastic(elastic_infra)
def test_get_catalog(elastic_infra, api, example_entry): @pytest.mark.parametrize('after,modified_since', [
(None, None),
(None, '2020-01-07'),
('test-id-3', '2020-01-07')])
def test_get_catalog(elastic_infra, api, example_entry, after, modified_since):
clear_elastic(elastic_infra) clear_elastic(elastic_infra)
for i in range(1, 11): for i in range(1, 11):
...@@ -73,7 +81,12 @@ def test_get_catalog(elastic_infra, api, example_entry): ...@@ -73,7 +81,12 @@ def test_get_catalog(elastic_infra, api, example_entry):
infrastructure.elastic_client.indices.refresh(index=config.elastic.index_name) infrastructure.elastic_client.indices.refresh(index=config.elastic.index_name)
rv = api.get('/catalog/?after=test-id-3&modified_since=2020-01-07&format=nt') url = '/catalog/?format=turtle'
if after:
url += '&after=' + after
if modified_since:
url += '&modified_since=' + modified_since
rv = api.get(url)
assert rv.status_code == 200 assert rv.status_code == 200
clear_elastic(elastic_infra) clear_elastic(elastic_infra)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment