Commit 0d2cd79c authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'remove-flask-restplus' into 'v1.0.3'

Moved dcat to fastapi. Added dois to dcat. Removed flask_restplus.

See merge request !569
parents 6ac10391 d4c5e242
Pipeline #123315 passed with stages
in 40 minutes and 18 seconds
......@@ -16,15 +16,16 @@
# limitations under the License.
#
from flask import Blueprint, Response, request
from flask_restplus import Api, reqparse
from typing import Optional
from fastapi import Response, Query, Header
import urllib.parse
from rdflib import Graph
from enum import Enum
from nomad import config
blueprint = Blueprint('dcat', __name__)
root_path = f'{config.services.api_base_path}/dcat'
base_url = config.api_url(api='dcat')
......@@ -38,30 +39,15 @@ def url(*args, **kwargs):
return url
api = Api(
blueprint,
version='1.0', title='NOMAD\'s API for servicing dcat resources',
description='NOMAD\'s API for serving dcat resources',
validate=True)
class Formats(str, Enum):
xml = 'xml',
n3 = 'n3',
turtle = 'turtle',
nt = 'nt',
pretty_xml = 'pretty-xml',
trig = 'trig'
# For some unknown reason it is necessary for each fr api to have a handler.
# Otherwise the global app error handler won't be called.
@api.errorhandler(Exception)
def errorhandler(error):
'''When an internal server error is caused by an unexpected exception.'''
return str(error)
arg_parser = reqparse.RequestParser()
arg_parser.add_argument('format', type=str, choices=[
'xml',
'n3',
'turtle',
'nt',
'pretty-xml',
'trig'])
all_repsonse_types = {
'application/xml': 'xml',
'application/rdf+prettyxml': 'pretty-xml',
......@@ -88,21 +74,23 @@ response_types = [
'application/x-trig']
def rdf_respose(g: Graph) -> Response:
args = arg_parser.parse_args()
format_ = args.get('format')
def rdf_response(
format: Optional[Formats] = Query(None), accept: Optional[str] = Header(None)
):
format_ = format.value if format else None
if format_ is None:
accept_header = request.headers.get('Accept', None)
if accept_header is not None:
format_ = all_repsonse_types.get(accept_header, 'pretty-xml')
if accept:
format_ = all_repsonse_types.get(accept, 'pretty-xml')
else:
format_ = 'pretty-xml'
try:
content_type = next(key for key, value in all_repsonse_types.items() if value == format_)
except StopIteration:
content_type = 'application/xml' if format in ['xml', 'pretty-xml'] else 'text/%s' % format_
def create_response(g: Graph):
try:
content_type = next(key for key, value in all_repsonse_types.items() if value == format_)
except StopIteration:
content_type = 'application/xml' if format_ in ['xml', 'pretty-xml'] else f'text/format'
return Response(
g.serialize(format=format_).decode('utf-8'), media_type=content_type)
return Response(
g.serialize(format=format_).decode('utf-8'), 200,
{'Content-Type': content_type})
return create_response
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from fastapi import FastAPI, status, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, RedirectResponse
import traceback
from nomad import config, utils
from .common import root_path
from .routers import dcat
logger = utils.get_logger(__name__)
app = FastAPI(
root_path=root_path,
openapi_url='/openapi.json',
docs_url='/extensions/docs',
redoc_url='/extensions/redoc',
swagger_ui_oauth2_redirect_url='/extensions/docs/oauth2-redirect',
title='DCAT API',
version='v1, NOMAD %s@%s' % (config.meta.version, config.meta.commit),
description='NOMAD\'s API for serving dcat resources')
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
async def redirect_to_docs(req: Request):
return RedirectResponse(f'{root_path}/extensions/docs')
# app.add_route(f'{root_path}', redirect_to_docs, include_in_schema=False)
app.add_route('/', redirect_to_docs, include_in_schema=False)
@app.exception_handler(Exception)
async def unicorn_exception_handler(request: Request, e: Exception):
logger.error('unexpected exception in API', url=request.url, exc_info=e)
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={
'detail': {
'reason': 'Unexpected exception while handling your request',
'exception': str(e),
'exception_class': e.__class__.__name__,
'exception_traceback': traceback.format_exc()
}
}
)
app.include_router(dcat.router)
......@@ -22,9 +22,8 @@ from rdflib.namespace import Namespace, DCAT, DCTERMS as DCT, FOAF, RDF
from nomad import config
from nomad.datamodel import User
from nomad.datamodel import User
from .common import url
from .api import url
VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
HYDRA = Namespace('http://www.w3.org/ns/hydra/core#')
......@@ -92,7 +91,8 @@ class Mapping():
self.g.add((dataset, DCT.issued, Literal(entry['upload_create_time'])))
self.g.add((dataset, DCT.modified, Literal(entry['last_processing_time'])))
self.g.add((dataset, DCT.title, Literal(get_optional_entry_prop(entry, 'results.material.chemical_formula_descriptive'))))
self.g.add((dataset, DCT.description, Literal(get_optional_entry_prop(entry, 'comment'))))
if 'comment' in entry:
self.g.add((dataset, DCT.description, Literal(get_optional_entry_prop(entry, 'comment'))))
if slim:
return dataset
......@@ -115,6 +115,10 @@ class Mapping():
self.g.add((dataset, DCAT.distribution, self.map_distribution(entry, 'json')))
self.g.add((dataset, DCAT.distribution, self.map_distribution(entry, 'raw')))
if 'datasets' in entry:
for nomad_dataset in entry['datasets']:
self.g.add((dataset, DCAT.distribution, self.map_nomad_dataset(nomad_dataset)))
return dataset
def map_user(self, user_id: str):
......@@ -147,50 +151,46 @@ class Mapping():
self.g.add((person, VCARD.nickName, Literal(user.username)))
self.g.add((person, VCARD.hasEmail, Literal(user.email)))
self.g.add((person, VCARD.organization, Literal(get_optional_entry_prop(user, 'affiliation'))))
# address = BNode()
# self.g.add((address, RDF.type, VCARD.Address))
# self.g.add((address, VCARD.street_address, )) # affiliation_address?
# self.g.add((address, VCARD.postal_code, )) # affiliation_address?
# self.g.add((address, VCARD.country_name, )) # affiliation_address?
# self.g.add((address, VCARD.locality, )) # affiliation_address?
# self.g.add((address, VCARD.region, )) # affiliation_address?
# self.g.add((person, VCARD.hasAddress, address))
return person
def map_distribution(self, entry: dict, dist_kind):
if dist_kind == 'api':
# DataService: API
service = BNode()
self.g.add((service, RDF.type, DCAT.DataService))
self.g.add((service, DCT.title, Literal('NOMAD API'))) # How to include terms from swagger document here?
self.g.add((service, DCT.description, Literal('Official NOMAD API'))) # same question
self.g.add((service, DCAT.endpointURL, URIRef('https://nomad-lab.eu/prod/rae/api/v1'))) # TODO config.api_url() ?
# not sure if the following needs to be dataset specific:
self.g.add((service, DCAT.endpointDescription, URIRef('https://nomad-lab.eu/prod/rae/api/swagger.json')))
# Distribution over API
dist = BNode()
self.g.add((dist, DCT.title, Literal(get_optional_entry_prop(entry, 'formula') + '_api')))
self.g.add((dist, RDF.type, DCAT.Distribution))
self.g.add((dist, DCAT.accessService, service))
self.g.add((dist, DCT.title, Literal(f'{entry["entry_id"]}_metadata')))
self.g.add((dist, DCAT.mediaType, URIRef('https://www.iana.org/assignments/media-types/application/json')))
self.g.add((dist, DCAT.accessURL, URIRef(f'${config.api_url()}/v1/entries/{entry["entry_id"]}/archive/download')))
elif dist_kind == 'json':
# Distribution as JSON
dist = BNode()
self.g.add((dist, RDF.type, DCAT.Distribution))
self.g.add((dist, DCT.title, Literal(get_optional_entry_prop(entry, 'formula') + '_json')))
self.g.add((dist, DCT.title, Literal(f'{entry["entry_id"]}_archive')))
self.g.add((dist, DCAT.mediaType, URIRef('https://www.iana.org/assignments/media-types/application/json')))
self.g.add((dist, DCAT.packageFormat, URIRef('https://www.iana.org/assignments/media-types/application/zip')))
self.g.add((dist, DCAT.downloadURL, URIRef(
f'http://nomad-lab.eu/prod/rae/api/v1/entries/{entry["entry_id"]}/archive/download')))
self.g.add((dist, DCAT.accessURL, URIRef('%s/entry/id/%s/%s' % (
config.gui_url(), entry['upload_id'], entry['entry_id']))))
self.g.add((dist, DCAT.accessURL, URIRef(f'${config.api_url()}/v1/entries/{entry["entry_id"]}/archive/download')))
elif dist_kind == 'raw':
# Distribution of the raw data
dist = BNode()
self.g.add((dist, RDF.type, DCAT.Distribution))
self.g.add((dist, DCT.title, Literal(get_optional_entry_prop(entry, 'formula') + '_raw')))
self.g.add((dist, DCAT.accessURL, URIRef(f'https://nomad-lab.eu/prod/rae/api/v1/entries/{entry["entry_id"]}/raw')))
self.g.add((dist, DCAT.packageFormat, URIRef('https://www.iana.org/assignments/media-types/application/zip')))
self.g.add((dist, DCT.title, Literal(f'{entry["entry_id"]}_raw_files')))
self.g.add((dist, DCAT.accessURL, URIRef(f'${config.api_url()}/v1/entries/{entry["entry_id"]}/raw')))
self.g.add((dist, DCAT.mediaType, URIRef('https://www.iana.org/assignments/media-types/application/zip')))
return dist
def map_nomad_dataset(self, dataset: dict):
dist = BNode()
self.g.add((dist, RDF.type, DCAT.Distribution))
id_literal = dataset['dataset_id']
try:
id_literal = dataset['doi']
except KeyError:
pass
self.g.add((dist, DCT.identifier, Literal(id_literal)))
self.g.add((dist, DCT.title, Literal(dataset['dataset_name'])))
self.g.add((dist, DCT.accessURL, URIRef(
f'{config.gui_url()}/dataset/id/{dataset["dataset_id"]}')))
return dist
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from typing import Union
from fastapi import APIRouter, Query, Path, HTTPException, status, Depends
from datetime import datetime, date
from elasticsearch_dsl import Q
from nomad import utils
from nomad.utils import strip
from nomad.search import search
from nomad.app.v1.models import MetadataPagination, HTTPExceptionModel
from nomad.app.v1.utils import create_responses
from ..common import rdf_response
from ..mapping import Mapping
router = APIRouter()
default_tag = 'dcat'
logger = utils.get_logger(__name__)
_bad_id_response = status.HTTP_404_NOT_FOUND, {
'model': HTTPExceptionModel,
'description': strip('''
Dataset not found. The given id does not match any dataset.''')}
_raw_response = 200, {
'content': {'application/octet': {}},
'description': 'The response. The returned content type depends on the format parameter.'}
@router.get(
'/datasets/{entry_id}', tags=[default_tag],
summary='Returns a DCAT dataset for a given NOMAD entry id.',
responses=create_responses(_bad_id_response, _raw_response)
)
async def get_dataset(
entry_id: str = Path(..., description='The unique NOMAD entry id.'),
rdf_respose=Depends(rdf_response)
):
''' Returns a DCAT dataset for a given NOMAD entry id. '''
results = search(owner='public', query=dict(entry_id=entry_id))
if results.pagination.total == 0:
raise HTTPException(
status_code=_bad_id_response[0],
detail=_bad_id_response[1]['description'])
entry = results.data[0]
mapping = Mapping()
mapping.map_entry(entry)
return rdf_respose(mapping.g)
@router.get(
'/catalog/', tags=[default_tag],
summary='Returns a DCAT dataset for a given NOMAD entry id.',
responses=create_responses(_raw_response)
)
async def get_catalog(
after: str = Query(None, description='return entries after the given entry_id'),
modified_since: Union[datetime, date] = Query(None, description='maximum entry time (e.g. upload time)'),
rdf_respose=Depends(rdf_response)
):
''' Returns a DCAT dataset for a given NOMAD entry id. '''
search_query = Q()
if modified_since is not None:
modified_clause = Q('range', upload_create_time=dict(gte=modified_since))
modified_clause |= Q('range', last_edit_time=dict(gte=modified_since))
modified_clause |= Q('range', last_processing_time=dict(gte=modified_since))
search_query &= modified_clause
pagination = MetadataPagination(page_after_value=after)
search_response = search(owner='public', query=search_query, pagination=pagination)
mapping = Mapping()
mapping.map_catalog(
search_response.data,
total=search_response.pagination.total,
after=after,
modified_since=modified_since, slim=False)
return rdf_respose(mapping.g)
......@@ -21,111 +21,25 @@ This module comprises the nomad@FAIRDI APIs. Currently there is NOMAD's official
and dcat api. The app module also servers documentation, gui, and
alive.
'''
from flask import Flask, jsonify, url_for, abort, request
from flask_restplus import Api
from flask import Flask
from flask_cors import CORS
import random
from nomad import config, utils as nomad_utils
from .dcat import blueprint as dcat_blueprint
from .docs import blueprint as docs_blueprint
from .dist import blueprint as dist_blueprint
from .gui import blueprint as gui_blueprint
from . import common
@property # type: ignore
def specs_url(self):
'''
Fixes issue where swagger-ui makes a call to swagger.json over HTTP.
This can ONLY be used on servers that actually use HTTPS. On servers that use HTTP,
this code should not be used at all.
'''
return url_for(self.endpoint('specs'), _external=True, _scheme='https')
if config.services.https:
Api.specs_url = specs_url
app = Flask(__name__)
''' The Flask app that serves all APIs. '''
app.config.RESTPLUS_MASK_HEADER = False # type: ignore
app.config.RESTPLUS_MASK_SWAGGER = False # type: ignore
app.config.SWAGGER_UI_OPERATION_ID = True # type: ignore
app.config.SWAGGER_UI_REQUEST_DURATION = True # type: ignore
app.config['SECRET_KEY'] = config.services.api_secret
CORS(app)
app.register_blueprint(dcat_blueprint, url_prefix='/dcat')
app.register_blueprint(docs_blueprint, url_prefix='/docs')
app.register_blueprint(dist_blueprint, url_prefix='/dist')
app.register_blueprint(gui_blueprint, url_prefix='/gui')
@app.errorhandler(Exception)
def handle(error: Exception):
status_code = getattr(error, 'code', 500)
if not isinstance(status_code, int):
status_code = 500
if status_code < 100:
status_code = 500
name = getattr(error, 'name', 'Internal Server Error')
description = getattr(error, 'description', 'No description available')
data = dict(
code=status_code,
name=name,
description=description)
data.update(getattr(error, 'data', []))
response = jsonify(data)
response.status_code = status_code
if status_code == 500:
local_logger = common.logger
# the logger is created in before_request, if the error was created before that
# logger can be None
if local_logger is None:
local_logger = nomad_utils.get_logger(__name__)
# TODO the error seems not to be the actual exception, therefore
# there might be no stacktrace. Maybe there is a way to get the actual
# exception/stacktrace
local_logger.error('internal server error', error=str(error), exc_info=error)
return response
@app.route('/alive')
def alive():
''' Simple endpoint to utilize kubernetes liveness/readiness probing. '''
return "I am, alive!"
@app.before_request
def before_request():
# api logger
args = getattr(request, 'view_args')
if args is None:
args = {}
else:
args = dict(**args)
args.update(
name=__name__,
blueprint=str(request.blueprint),
endpoint=request.endpoint,
method=request.method,
url=request.url,
json=request.json,
args=request.args)
common.logger = nomad_utils.get_logger(**args)
# chaos monkey
if config.services.api_chaos > 0:
if random.randint(0, 100) <= config.services.api_chaos:
abort(random.choice([400, 404, 500]), 'With best wishes from the chaos monkey.')
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from structlog import BoundLogger
from flask_restplus import fields
from datetime import datetime
import pytz
from contextlib import contextmanager
logger: BoundLogger = None
''' A logger pre configured with information about the current request. '''
class RFC3339DateTime(fields.DateTime):
def format(self, value):
if isinstance(value, datetime):
return super().format(value.replace(tzinfo=pytz.utc))
else:
return str(value)
rfc3339DateTime = RFC3339DateTime()
class DotKeyFieldMixin:
''' Allows use of flask_restplus fields with '.' in key names. By default, '.'
is used as a separator for accessing nested properties. Mixin prevents this,
allowing fields to use '.' in the key names.
Example of issue:
>>> data = {"my.dot.field": 1234}
>>> model = {"my.dot.field: fields.String}
>>> marshal(data, model)
{"my.dot.field:": None}
flask_restplus tries to fetch values for data['my']['dot']['field'] instead
of data['my.dot.field'] which is the desired behaviour in this case.
'''
def output(self, key, obj, **kwargs):
transformed_obj = {k.replace(".", "___"): v for k, v in obj.items()}
transformed_key = key.replace(".", "___")
# if self.attribute is set and contains '.' super().output() will
# use '.' as a separator for nested access.
# -> temporarily set to None to overcome this
with self.toggle_attribute():
data = super().output(transformed_key, transformed_obj)
return data
@contextmanager
def toggle_attribute(self):
''' Context manager to temporarily set self.attribute to None
Yields self.attribute before setting to None
'''
attribute = self.attribute
self.attribute = None
yield attribute
self.attribute = attribute
class DotKeyNested(DotKeyFieldMixin, fields.Nested):
pass
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
The optimade implementation of NOMAD.
'''
from flask import Blueprint
from flask_restplus import Api
from .api import blueprint, api
from .datasets import Dataset
from .catalog import Catalog