Commit 5d37c347 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'dcat' into 'v0.9.9'

Add dcat API for stream project.

See merge request !238
parents 7554fc1d 0fb3e3e3
Pipeline #90854 passed with stages
in 25 minutes and 52 seconds
...@@ -60,6 +60,15 @@ export default function About() { ...@@ -60,6 +60,15 @@ export default function About() {
open API standard for materials science databases. This API can be used to search open API standard for materials science databases. This API can be used to search
and access NOMAD metadata in a standardized way that can also be applied to many and access NOMAD metadata in a standardized way that can also be applied to many
[other materials science databses](https://providers.optimade.org/). [other materials science databses](https://providers.optimade.org/).
## DCAT
- [DCAT API dashboard](${appBase}/dcat/)
[DCAT](https://www.w3.org/TR/vocab-dcat-2/) is a RDF vocabulary designed to facilitate
interoperability between data catalogs published on the Web. This API allows you
access to NOMAD via RDF documents following DCAT. You can access NOMAD entries as
DCAT Datasets or all NOMAD entries as a DCAT Catalog.
`}</Markdown> `}</Markdown>
</div> </div>
} }
...@@ -19,7 +19,7 @@ import React, { useContext, useLayoutEffect, useRef, useCallback, useEffect, use ...@@ -19,7 +19,7 @@ import React, { useContext, useLayoutEffect, useRef, useCallback, useEffect, use
import {ReactComponent as AboutSvg} from './about.svg' import {ReactComponent as AboutSvg} from './about.svg'
import PropTypes from 'prop-types' import PropTypes from 'prop-types'
import Markdown from './Markdown' import Markdown from './Markdown'
import { appBase, optimadeBase, apiBase, debug, consent, aitoolkitEnabled, encyclopediaEnabled } from '../config' import { appBase, debug, consent, aitoolkitEnabled, encyclopediaEnabled } from '../config'
import { apiContext } from './api' import { apiContext } from './api'
import packageJson from '../../package.json' import packageJson from '../../package.json'
import { domains } from './domains' import { domains } from './domains'
...@@ -318,20 +318,16 @@ export default function About() { ...@@ -318,20 +318,16 @@ export default function About() {
</InfoCard> </InfoCard>
<InfoCard xs={4} title="APIs" bottom><Markdown>{` <InfoCard xs={4} title="APIs" bottom><Markdown>{`
The NOMAD can also be accessed programmatically via ReST APIs. The NOMAD can also be accessed programmatically via ReST APIs.
There is the proprietary NOMAD API and an implementation of the There is the proprietary NOMAD API,an implementation of the
standardized [OPTiMaDe API (0.10.0)](https://github.com/Materials-Consortia/OPTiMaDe/tree/master) standardized [OPTiMaDe API](https://github.com/Materials-Consortia/OPTiMaDe/tree/master)
materials science database API. materials science database API, and more.
Both APIs are described via [swagger/OpenAPI spec.](https://swagger.io/), We offer a [tutorial on how to use the API with plain Python](${appBase}/docs/api_tutorial.html).
therefore you can use your favorite swagger client library
(e.g. [bravado](https://github.com/Yelp/bravado) for Python):
- [NOMAD API](${apiBase}/)
- [OPTiMaDe API](${optimadeBase}/)
There is a [tutorial on how to use the API with plain Python](${appBase}/docs/api_tutorial.html).
Another [tutorial covers how to install and use NOMAD's Python client library](${appBase}/docs/archive_tutorial.html). Another [tutorial covers how to install and use NOMAD's Python client library](${appBase}/docs/archive_tutorial.html).
The [NOMAD Analytics Toolkit](https://nomad-lab.eu/AIToolkit) allows to use The [NOMAD Analytics Toolkit](https://nomad-lab.eu/AIToolkit) allows to use
this without installation and directly on NOMAD servers. this without installation and directly on NOMAD servers.
Visit our [API page](/apis).
`}</Markdown></InfoCard> `}</Markdown></InfoCard>
<Grid item xs={12}> <Grid item xs={12}>
<Markdown>{` <Markdown>{`
......
...@@ -17,8 +17,8 @@ ...@@ -17,8 +17,8 @@
# #
''' '''
This module comprises the nomad@FAIRDI APIs. Currently there is NOMAD's official api, and This module comprises the nomad@FAIRDI APIs. Currently there is NOMAD's official api, optimade api,
we will soon at the optimade api. The app module also servers documentation, gui, and and dcat api. The app module also servers documentation, gui, and
alive. alive.
''' '''
from flask import Flask, Blueprint, jsonify, url_for, abort, request, make_response from flask import Flask, Blueprint, jsonify, url_for, abort, request, make_response
...@@ -37,6 +37,7 @@ from nomad import config, utils as nomad_utils ...@@ -37,6 +37,7 @@ from nomad import config, utils as nomad_utils
from .api import blueprint as api_blueprint, api from .api import blueprint as api_blueprint, api
from .optimade import blueprint as optimade_blueprint, api as optimade from .optimade import blueprint as optimade_blueprint, api as optimade
from .dcat import blueprint as dcat_blueprint
from .docs import blueprint as docs_blueprint from .docs import blueprint as docs_blueprint
from .dist import blueprint as dist_blueprint from .dist import blueprint as dist_blueprint
from .gui import blueprint as gui_blueprint from .gui import blueprint as gui_blueprint
...@@ -112,6 +113,7 @@ CORS(app) ...@@ -112,6 +113,7 @@ CORS(app)
app.register_blueprint(api_blueprint, url_prefix='/api') app.register_blueprint(api_blueprint, url_prefix='/api')
app.register_blueprint(optimade_blueprint, url_prefix='/optimade') app.register_blueprint(optimade_blueprint, url_prefix='/optimade')
app.register_blueprint(dcat_blueprint, url_prefix='/dcat')
app.register_blueprint(docs_blueprint, url_prefix='/docs') app.register_blueprint(docs_blueprint, url_prefix='/docs')
app.register_blueprint(dist_blueprint, url_prefix='/dist') app.register_blueprint(dist_blueprint, url_prefix='/dist')
app.register_blueprint(gui_blueprint, url_prefix='/gui') app.register_blueprint(gui_blueprint, url_prefix='/gui')
......
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
The optimade implementation of NOMAD.
'''
from flask import Blueprint
from flask_restplus import Api
from .api import blueprint, api
from .datasets import Dataset
from .catalog import Catalog
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from flask import Blueprint, Response
from flask_restplus import Api, reqparse
import urllib.parse
from rdflib import Graph
from nomad import config
blueprint = Blueprint('dcat', __name__)
base_url = config.api_url(api='dcat')
def url(*args, **kwargs):
''' Returns the full dcat api url for the given path (args) and query (kwargs) parameters. '''
url = f'{base_url.rstrip("/")}/{"/".join(args).lstrip("/")}'
if len(kwargs) > 0:
return f'{url}?{urllib.parse.urlencode(kwargs)}'
else:
return url
api = Api(
blueprint,
version='1.0', title='NOMAD\'s API for servicing dcat resources',
description='NOMAD\'s API for serving dcat resources',
validate=True)
# For some unknown reason it is necessary for each fr api to have a handler.
# Otherwise the global app error handler won't be called.
@api.errorhandler(Exception)
def errorhandler(error):
'''When an internal server error is caused by an unexpected exception.'''
return str(error)
arg_parser = reqparse.RequestParser()
arg_parser.add_argument('format', type=str, choices=[
'xml',
'n3',
'turtle',
'nt',
'pretty-xml',
'trig'])
def rdf_respose(g: Graph) -> Response:
args = arg_parser.parse_args()
format_ = args.get('format')
if format_ is None:
format_ = 'pretty-xml'
content_type = 'application/xml' if format in ['xml', 'pretty-xml'] else 'text/%s' % format_
return Response(
g.serialize(format=format_).decode('utf-8'), 200,
{'Content-Type': content_type})
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from flask_restplus import Resource, fields
from elasticsearch_dsl import Q
from nomad import search
from .api import api, arg_parser, rdf_respose
from .mapping import Mapping
ns = api.namespace('catalog', description='The API for DCAT catalog.')
iso8601 = fields.DateTime(dt_format='iso8601')
arg_parser = arg_parser.copy()
arg_parser.add_argument('after', type=str)
arg_parser.add_argument(
'modified_since', type=lambda x: iso8601.parse(x),
help='A yyyy-MM-ddTHH:mm:ss (RFC3339) maximum entry time (e.g. upload time)')
@ns.route('/')
class Catalog(Resource):
@api.doc('get_dcat_datasets')
@api.expect(arg_parser)
@api.produces(['application/xml'])
@api.response(404, 'There is no entry with the given id.')
@api.response(401, 'This entry is not publically accessible.')
@api.response(200, 'Data send', headers={'Content-Type': 'application/xml'})
def get(self):
''' Returns a page of DCAT datasets. '''
args = arg_parser.parse_args()
modified_since = args.get('modified_since', None)
after = args.get('after', '')
if after is None:
after = ''
search_request = search.SearchRequest().owner('public')
if modified_since is not None:
modified_clause = Q('range', upload_time=dict(gte=modified_since))
modified_clause |= Q('range', last_edit=dict(gte=modified_since))
modified_clause |= Q('range', last_processing=dict(gte=modified_since))
search_request.q &= modified_clause
es_search = search_request._search.query(search_request.q)
if after is not '':
es_search = es_search.extra(search_after=[after], sort='calc_id')
es_response = es_search.execute()
mapping = Mapping()
mapping.map_catalog(es_response.hits, after, modified_since)
return rdf_respose(mapping.g)
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from flask_restplus import Resource, abort
from elasticsearch.exceptions import NotFoundError
from nomad import search
from .api import api, arg_parser, rdf_respose
from .mapping import Mapping
ns = api.namespace('datasets', description='The API for DCAT datasets.')
@ns.route('/<string:entry_id>')
class Dataset(Resource):
@api.doc('get_dcat_dataset')
@api.expect(arg_parser)
@api.produces(['application/xml'])
@api.response(404, 'There is no entry with the given id.')
@api.response(401, 'This entry is not publically accessible.')
@api.response(200, 'Data send', headers={'Content-Type': 'application/xml'})
def get(self, entry_id):
''' Returns a DCAT dataset for a given NOMAD entry id. '''
try:
entry = search.entry_document.get(entry_id)
except NotFoundError:
abort(404, message='There is no calculation with id %s' % entry_id)
if entry.with_embargo or not entry.published:
abort(401, message='Not authorized to access %s' % entry_id)
mapping = Mapping()
mapping.map_entry(entry)
return rdf_respose(mapping.g)
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from rdflib import Graph, Literal, RDF, URIRef, BNode
from rdflib.namespace import Namespace, DCAT, DCTERMS as DCT, FOAF, RDF
from nomad import config
from nomad.datamodel import User
from nomad.datamodel import EntryMetadata, User
from .api import url
VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
HYDRA = Namespace('http://www.w3.org/ns/hydra/core#')
def get_optional_entry_prop(entry, name):
try:
return entry[name]
except KeyError:
return 'unavailable'
class Mapping():
def __init__(self):
self.g = Graph()
self.g.namespace_manager.bind('rdf', RDF)
self.g.namespace_manager.bind('dcat', DCAT)
self.g.namespace_manager.bind('dct', DCT)
self.g.namespace_manager.bind('vcard', VCARD)
self.g.namespace_manager.bind('foaf', FOAF)
self.g.namespace_manager.bind('hydra', HYDRA)
self.persons = {}
def map_catalog(self, entries, after: str, modified_since):
def uri_ref(after):
kwargs = dict()
if after is not None:
kwargs['after'] = after
if modified_since is not None:
kwargs['modified_since'] = modified_since.strftime('%Y-%m-%d')
return URIRef(url('catalog', **kwargs))
after = after.strip()
catalog = uri_ref(after=None)
self.g.add((catalog, RDF.type, DCAT.Catalog))
last_entry = None
for entry in entries:
self.g.add((catalog, DCT.dataset, self.map_entry(entry, slim=True)))
last_entry = entry
hydra_collection = uri_ref(after)
self.g.add((hydra_collection, RDF.type, HYDRA.Collection))
self.g.add((hydra_collection, HYDRA.totalItems, Literal(entries.total)))
self.g.add((hydra_collection, HYDRA.first, uri_ref('')))
if last_entry is not None:
self.g.add((hydra_collection, HYDRA.next, uri_ref(last_entry.calc_id)))
self.g.add((hydra_collection, RDF.type, HYDRA.collection))
def map_entry(self, entry: EntryMetadata, slim=False):
dataset = URIRef(url('datasets', entry.calc_id))
self.g.add((dataset, RDF.type, DCAT.Dataset))
self.g.add((dataset, DCT.identifier, Literal(entry.calc_id)))
self.g.add((dataset, DCT.issued, Literal(entry.upload_time)))
self.g.add((dataset, DCT.modified, Literal(entry.last_processing)))
self.g.add((dataset, DCT.title, Literal(get_optional_entry_prop(entry, 'formula'))))
self.g.add((dataset, DCT.description, Literal(get_optional_entry_prop(entry, 'comment'))))
if slim:
return dataset
self.g.add((dataset, DCAT.landing_page, URIRef('%s/entry/id/%s/%s' % (
config.gui_url(), entry.upload_id, entry.calc_id))))
self.g.add((dataset, DCT.license, URIRef('https://creativecommons.org/licenses/by/4.0/legalcode')))
self.g.add((dataset, DCT.language, URIRef('http://id.loc.gov/vocabulary/iso639-1/en')))
self.g.add((dataset, DCT.publisher, self.map_user(entry.uploader)))
for author in entry.authors:
self.g.add((dataset, DCT.creator, self.map_user(author)))
self.g.add((dataset, DCAT.contactPoint, self.map_contact(entry.uploader)))
self.g.add((dataset, DCAT.distribution, self.map_distribution(entry, 'api')))
self.g.add((dataset, DCAT.distribution, self.map_distribution(entry, 'json')))
self.g.add((dataset, DCAT.distribution, self.map_distribution(entry, 'raw')))
return dataset
def map_user(self, user: User):
person = self.persons.get(user.user_id)
if person is not None:
return person
user = User.get(user.user_id)
person = BNode()
self.g.add((person, RDF.type, FOAF.Person))
self.g.add((person, FOAF.givenName, Literal(user.first_name)))
self.g.add((person, FOAF.familyName, Literal(user.last_name)))
self.g.add((person, FOAF.nick, Literal(user.username)))
self.g.add((person, FOAF.mbox, URIRef('mailto:%s' % (user.email))))
self.persons[user.user_id] = person
return person
def map_contact(self, user: User):
person = self.persons.get(user.user_id)
if person is None:
person = self.map_user(user)
user = User.get(user.user_id)
self.g.add((person, RDF.type, VCARD.Individual))
self.g.add((person, VCARD.givenName, Literal(user.first_name)))
self.g.add((person, VCARD.familyName, Literal(user.last_name)))
self.g.add((person, VCARD.nickName, Literal(user.username)))
self.g.add((person, VCARD.hasEmail, Literal(user.email)))
self.g.add((person, VCARD.organizationName, Literal('unavailable' if user.affiliation is None else user.affiliation)))
# address = BNode()
# self.g.add((address, RDF.type, VCARD.Address))
# self.g.add((address, VCARD.street_address, )) # affiliation_address?
# self.g.add((address, VCARD.postal_code, )) # affiliation_address?
# self.g.add((address, VCARD.country_name, )) # affiliation_address?
# self.g.add((address, VCARD.locality, )) # affiliation_address?
# self.g.add((address, VCARD.region, )) # affiliation_address?
# self.g.add((person, VCARD.hasAddress, address))
return person
def map_distribution(self, entry, dist_kind):
if dist_kind == 'api':
# DataService: API
service = BNode()
self.g.add((service, RDF.type, DCAT.DataService))
self.g.add((service, DCT.title, Literal('NOMAD API'))) # How to include terms from swagger document here?
self.g.add((service, DCT.description, Literal('Official NOMAD API'))) # same question
self.g.add((service, DCAT.endpointURL, URIRef('https://nomad-lab.eu/prod/rae/api/'))) # config.api_url() ?
# not sure if the following needs to be dataset specific:
self.g.add((service, DCAT.endpointDescription, URIRef('https://nomad-lab.eu/prod/rae/api/swagger.json')))
# Distribution over API
dist = BNode()
self.g.add((dist, DCT.title, Literal('unavailable' if entry.formula is None else entry.formula + '_api')))
self.g.add((dist, RDF.type, DCAT.Distribution))
self.g.add((dist, DCAT.accessService, service))
elif dist_kind == 'json':
# Distribution as JSON
dist = BNode()
self.g.add((dist, RDF.type, DCAT.Distribution))
self.g.add((dist, DCT.title, Literal(get_optional_entry_prop(entry, 'formula') + '_json')))
self.g.add((dist, DCAT.mediaType, URIRef('https://www.iana.org/assignments/media-types/application/json')))
self.g.add((dist, DCAT.packageFormat, URIRef('https://www.iana.org/assignments/media-types/application/zip')))
self.g.add((dist, DCAT.downloadURL, URIRef(
'http://nomad-lab.eu/prod/rae/api/archive/download?upload_id=%s&calc_id=%s' % (entry.upload_id, entry.calc_id))))
self.g.add((dist, DCAT.accessURL, URIRef('%s/entry/id/%s/%s' % (
config.gui_url(), entry.upload_id, entry.calc_id))))
elif dist_kind == 'raw':
# Distribution of the raw data
dist = BNode()
self.g.add((dist, RDF.type, DCAT.Distribution))
self.g.add((dist, DCT.title, Literal(get_optional_entry_prop(entry, 'formula') + '_raw')))
self.g.add((dist, DCAT.accessURL, URIRef('https://nomad-lab.eu/prod/rae/api/raw/calc/%s/%s' % (
entry.upload_id, entry.calc_id))))
self.g.add((dist, DCAT.packageFormat, URIRef('https://www.iana.org/assignments/media-types/application/zip')))
return dist
...@@ -173,12 +173,14 @@ tests = NomadConfig( ...@@ -173,12 +173,14 @@ tests = NomadConfig(
) )
def api_url(ssl: bool = True): def api_url(ssl: bool = True, api: str = 'api'):
return '%s://%s/%s/api' % ( base_url = '%s://%s/%s' % (
'https' if services.https and ssl else 'http', 'https' if services.https and ssl else 'http',
services.api_host.strip('/'), services.api_host.strip('/'),
services.api_base_path.strip('/')) services.api_base_path.strip('/'))
return '%s/%s' % (base_url.strip('/'), api)
def gui_url(page: str = None): def gui_url(page: str = None):
base = api_url(True)[:-3] base = api_url(True)[:-3]
......
...@@ -75,6 +75,7 @@ unidecode ...@@ -75,6 +75,7 @@ unidecode
python-json-logger python-json-logger
recommonmark recommonmark
jinja2 jinja2
rdflib
# [dev] # [dev]
setuptools setuptools
......
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pytest
from datetime import datetime
from nomad import infrastructure, config
from nomad.datamodel import EntryMetadata
from nomad.app.dcat.mapping import Mapping
from tests.conftest import clear_elastic
from tests.app.test_app import BlueprintClient
@pytest.fixture(scope='session')
def api(session_client):
return BlueprintClient(session_client, '/dcat')
@pytest.fixture(scope='module')
def example_entry(test_user, other_test_user):
entry = EntryMetadata(
calc_id='test-id',
upload_id='upload-id',
upload_time=datetime.now(),
last_processing=datetime.now(),
uploader=test_user,
coauthors=[other_test_user],
comment='this is a calculation comment',
formula='H20',
published=True)