From 0fc2acf1a6710ba4bd5e135097f2bf212724b206 Mon Sep 17 00:00:00 2001
From: Markus Scheidgen <markus.scheidgen@gmail.com>
Date: Fri, 27 Nov 2020 14:43:55 +0100
Subject: [PATCH] Added catalog endpoint to dcat.

---
 nomad/app/dcat/__init__.py |  1 +
 nomad/app/dcat/catalog.py  | 79 ++++++++++++++++++++++++++++++++++++++
 nomad/app/dcat/mapping.py  |  6 +++
 tests/app/test_dcat.py     | 18 +++++++++
 4 files changed, 104 insertions(+)
 create mode 100644 nomad/app/dcat/catalog.py

diff --git a/nomad/app/dcat/__init__.py b/nomad/app/dcat/__init__.py
index 77cc4f59c4..fe92905faa 100644
--- a/nomad/app/dcat/__init__.py
+++ b/nomad/app/dcat/__init__.py
@@ -21,3 +21,4 @@ from flask_restplus import Api
 
 from .api import blueprint, api
 from .datasets import Dataset
+from .catalog import Catalog
diff --git a/nomad/app/dcat/catalog.py b/nomad/app/dcat/catalog.py
new file mode 100644
index 0000000000..54c7d7ceb1
--- /dev/null
+++ b/nomad/app/dcat/catalog.py
@@ -0,0 +1,79 @@
+# Copyright 2018 Markus Scheidgen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an"AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from flask_restplus import Resource, reqparse, fields
+from flask import Response
+from elasticsearch_dsl import Q
+
+from nomad import search
+
+from .api import api
+from .mapping import Mapping
+
+ns = api.namespace('catalog', description='The API for DCAT catalog.')
+
+iso8601 = fields.DateTime(dt_format='iso8601')
+
+arg_parser = reqparse.RequestParser()
+arg_parser.add_argument('format', type=str, choices=[
+    'xml',
+    'n3',
+    'turtle',
+    'nt',
+    'pretty-xml',
+    'trig'])
+arg_parser.add_argument('after', type=str)
+arg_parser.add_argument(
+    'modified_since', type=lambda x: iso8601.parse(x),
+    help='A yyyy-MM-ddTHH:mm:ss (RFC3339) maximum entry time (e.g. upload time)')
+
+
+@ns.route('/')
+class Catalog(Resource):
+    @api.doc('get_dcat_datasets')
+    @api.expect(arg_parser)
+    @api.produces(['application/xml'])
+    @api.response(404, 'There is no entry with the given id.')
+    @api.response(401, 'This entry is not publically accessible.')
+    @api.response(200, 'Data send', headers={'Content-Type': 'application/xml'})
+    def get(self):
+        ''' Returns a page of DCAT datasets. '''
+
+        args = arg_parser.parse_args()
+        format_ = args.get('format')
+        if format_ is None:
+            format_ = 'xml'
+
+        modified_since = args.get('modified_since', None)
+        modified_since = iso8601.parse(modified_since) if modified_since is not None else None
+        after = args.get('after', None)
+
+        search_request = search.SearchRequest().owner('public')
+        if modified_since is not None:
+            modified_clause = Q('range', upload_time=dict(gte=modified_since))
+            modified_clause |= Q('range', last_edit=dict(gte=modified_since))
+            modified_clause |= Q('range', last_processing=dict(gte=modified_since))
+            search_request.q &= modified_clause
+
+        es_search = search_request._search.query(search_request.q)
+        if after is not None:
+            es_search = es_search.extra(search_after=[after], sort='calc_id')
+        es_response = es_search.execute()
+
+        mapping = Mapping()
+        mapping.map_catalog(es_response.hits)
+
+        return Response(
+            mapping.g.serialize(format=format_).decode('utf-8'), 200,
+            {'Content-Type': 'application/xml'})
diff --git a/nomad/app/dcat/mapping.py b/nomad/app/dcat/mapping.py
index 5af1141423..0151b75224 100644
--- a/nomad/app/dcat/mapping.py
+++ b/nomad/app/dcat/mapping.py
@@ -36,6 +36,12 @@ class Mapping():
 
         self.persons = {}
 
+    def map_catalog(self, entries):
+        catalog = URIRef(url('catalog'))
+        self.g.add((catalog, RDF.type, DCAT.Catalog))
+        for entry in entries:
+            self.g.add((catalog, DCT.dataset, self.map_entry(entry)))
+
     def map_entry(self, entry: EntryMetadata):
         dataset = URIRef(url('datasets', entry.calc_id))
 
diff --git a/tests/app/test_dcat.py b/tests/app/test_dcat.py
index 6afa483fe3..abdc406d05 100644
--- a/tests/app/test_dcat.py
+++ b/tests/app/test_dcat.py
@@ -15,6 +15,7 @@
 import pytest
 from datetime import datetime
 
+from nomad import infrastructure, config
 from nomad.datamodel import EntryMetadata
 from nomad.app.dcat.mapping import Mapping
 
@@ -59,3 +60,20 @@ def test_get_dataset(elastic_infra, api, example_entry):
     assert rv.status_code == 200
 
     clear_elastic(elastic_infra)
+
+
+def test_get_catalog(elastic_infra, api, example_entry):
+    clear_elastic(elastic_infra)
+
+    for i in range(1, 11):
+        example_entry.calc_id = 'test-id-%d' % i
+        example_entry.upload_time = datetime(2000, 1, 1)
+        example_entry.last_processing = datetime(2020, 1, i)
+        example_entry.a_elastic.index()
+
+    infrastructure.elastic_client.indices.refresh(index=config.elastic.index_name)
+
+    rv = api.get('/catalog/?after=test-id-3&modified_since=2020-01-07&format=nt')
+    assert rv.status_code == 200
+
+    clear_elastic(elastic_infra)
-- 
GitLab