From 0fc2acf1a6710ba4bd5e135097f2bf212724b206 Mon Sep 17 00:00:00 2001 From: Markus Scheidgen <markus.scheidgen@gmail.com> Date: Fri, 27 Nov 2020 14:43:55 +0100 Subject: [PATCH] Added catalog endpoint to dcat. --- nomad/app/dcat/__init__.py | 1 + nomad/app/dcat/catalog.py | 79 ++++++++++++++++++++++++++++++++++++++ nomad/app/dcat/mapping.py | 6 +++ tests/app/test_dcat.py | 18 +++++++++ 4 files changed, 104 insertions(+) create mode 100644 nomad/app/dcat/catalog.py diff --git a/nomad/app/dcat/__init__.py b/nomad/app/dcat/__init__.py index 77cc4f59c4..fe92905faa 100644 --- a/nomad/app/dcat/__init__.py +++ b/nomad/app/dcat/__init__.py @@ -21,3 +21,4 @@ from flask_restplus import Api from .api import blueprint, api from .datasets import Dataset +from .catalog import Catalog diff --git a/nomad/app/dcat/catalog.py b/nomad/app/dcat/catalog.py new file mode 100644 index 0000000000..54c7d7ceb1 --- /dev/null +++ b/nomad/app/dcat/catalog.py @@ -0,0 +1,79 @@ +# Copyright 2018 Markus Scheidgen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an"AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from flask_restplus import Resource, reqparse, fields +from flask import Response +from elasticsearch_dsl import Q + +from nomad import search + +from .api import api +from .mapping import Mapping + +ns = api.namespace('catalog', description='The API for DCAT catalog.') + +iso8601 = fields.DateTime(dt_format='iso8601') + +arg_parser = reqparse.RequestParser() +arg_parser.add_argument('format', type=str, choices=[ + 'xml', + 'n3', + 'turtle', + 'nt', + 'pretty-xml', + 'trig']) +arg_parser.add_argument('after', type=str) +arg_parser.add_argument( + 'modified_since', type=lambda x: iso8601.parse(x), + help='A yyyy-MM-ddTHH:mm:ss (RFC3339) maximum entry time (e.g. upload time)') + + +@ns.route('/') +class Catalog(Resource): + @api.doc('get_dcat_datasets') + @api.expect(arg_parser) + @api.produces(['application/xml']) + @api.response(404, 'There is no entry with the given id.') + @api.response(401, 'This entry is not publically accessible.') + @api.response(200, 'Data send', headers={'Content-Type': 'application/xml'}) + def get(self): + ''' Returns a page of DCAT datasets. ''' + + args = arg_parser.parse_args() + format_ = args.get('format') + if format_ is None: + format_ = 'xml' + + modified_since = args.get('modified_since', None) + modified_since = iso8601.parse(modified_since) if modified_since is not None else None + after = args.get('after', None) + + search_request = search.SearchRequest().owner('public') + if modified_since is not None: + modified_clause = Q('range', upload_time=dict(gte=modified_since)) + modified_clause |= Q('range', last_edit=dict(gte=modified_since)) + modified_clause |= Q('range', last_processing=dict(gte=modified_since)) + search_request.q &= modified_clause + + es_search = search_request._search.query(search_request.q) + if after is not None: + es_search = es_search.extra(search_after=[after], sort='calc_id') + es_response = es_search.execute() + + mapping = Mapping() + mapping.map_catalog(es_response.hits) + + return Response( + mapping.g.serialize(format=format_).decode('utf-8'), 200, + {'Content-Type': 'application/xml'}) diff --git a/nomad/app/dcat/mapping.py b/nomad/app/dcat/mapping.py index 5af1141423..0151b75224 100644 --- a/nomad/app/dcat/mapping.py +++ b/nomad/app/dcat/mapping.py @@ -36,6 +36,12 @@ class Mapping(): self.persons = {} + def map_catalog(self, entries): + catalog = URIRef(url('catalog')) + self.g.add((catalog, RDF.type, DCAT.Catalog)) + for entry in entries: + self.g.add((catalog, DCT.dataset, self.map_entry(entry))) + def map_entry(self, entry: EntryMetadata): dataset = URIRef(url('datasets', entry.calc_id)) diff --git a/tests/app/test_dcat.py b/tests/app/test_dcat.py index 6afa483fe3..abdc406d05 100644 --- a/tests/app/test_dcat.py +++ b/tests/app/test_dcat.py @@ -15,6 +15,7 @@ import pytest from datetime import datetime +from nomad import infrastructure, config from nomad.datamodel import EntryMetadata from nomad.app.dcat.mapping import Mapping @@ -59,3 +60,20 @@ def test_get_dataset(elastic_infra, api, example_entry): assert rv.status_code == 200 clear_elastic(elastic_infra) + + +def test_get_catalog(elastic_infra, api, example_entry): + clear_elastic(elastic_infra) + + for i in range(1, 11): + example_entry.calc_id = 'test-id-%d' % i + example_entry.upload_time = datetime(2000, 1, 1) + example_entry.last_processing = datetime(2020, 1, i) + example_entry.a_elastic.index() + + infrastructure.elastic_client.indices.refresh(index=config.elastic.index_name) + + rv = api.get('/catalog/?after=test-id-3&modified_since=2020-01-07&format=nt') + assert rv.status_code == 200 + + clear_elastic(elastic_infra) -- GitLab