Commit bf8e6b03 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added a first implementation of a new API with fastapi.

parent f44449bf
......@@ -5,6 +5,7 @@
......@@ -12,7 +13,8 @@ __pycache__
......@@ -23,8 +25,8 @@ target/
......@@ -32,4 +34,3 @@ parser.osio.log
......@@ -3,7 +3,7 @@
# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code.
# Add files or directories to the blacklist. They should be base names, not
# paths.
......@@ -666,7 +666,7 @@ ignore-on-opaque-inference=yes
# List of class names for which member attributes should not be checked (useful
# for classes with dynamically set attributes). This supports the use of
# qualified names.
# List of module names for which member attributes should not be checked
# (useful for modules/projects where namespaces are manipulated during runtime
......@@ -3,7 +3,7 @@ window.nomadEnv = {
'keycloakRealm': 'fairdi_nomad_test',
'keycloakClientId': 'nomad_gui_dev',
'appBase': '',
'appBase': 'http://localhost:8000/fairdi/nomad/latest',
'appBase': 'http://localhost:8000',
'debug': false,
'matomoEnabled': false,
'matomoUrl': '',
......@@ -22,6 +22,7 @@ export const version = window.nomadEnv.version
export const appBase = window.nomadEnv.appBase.replace(/\/$/, '')
// export const apiBase = ''
export const apiBase = `${appBase}/api`
export const apiV1Base = `${appBase}/api/v1`
export const optimadeBase = `${appBase}/optimade`
export const guiBase = process.env.PUBLIC_URL
export const matomoUrl = window.nomadEnv.matomoUrl
......@@ -90,7 +90,6 @@ if
app = Flask(__name__)
''' The Flask app that serves all APIs. '''
app.config.APPLICATION_ROOT = common.base_path # type: ignore
app.config.RESTPLUS_MASK_HEADER = False # type: ignore
app.config.RESTPLUS_MASK_SWAGGER = False # type: ignore
app.config.SWAGGER_UI_OPERATION_ID = True # type: ignore
......@@ -98,17 +97,6 @@ app.config.SWAGGER_UI_REQUEST_DURATION = True # type: ignore
app.config['SECRET_KEY'] =
def api_base_path_response(env, resp):
resp('200 OK', [('Content-Type', 'text/plain')])
return [
('Development nomad api server. Api is served under %s/.' %'utf-8')]
app.wsgi_app = DispatcherMiddleware( # type: ignore
api_base_path_response, { app.wsgi_app})
app.register_blueprint(api_blueprint, url_prefix='/api')
......@@ -182,18 +170,3 @@ def before_request():
if > 0:
if random.randint(0, 100) <=
abort(random.choice([400, 404, 500]), 'With best wishes from the chaos monkey.')
def setup():
from nomad import infrastructure
if not app.config['TESTING']:
# each subprocess is supposed disconnect connect again:
from mongoengine import disconnect
except Exception:
......@@ -213,8 +213,8 @@ def apply_search_parameters(search_request: search.SearchRequest, args: Dict[str
g.user.user_id if g.user is not None else None)
except ValueError as e:
abort(401, getattr(e, 'message', 'Invalid owner parameter: %s' % owner))
except search.AuthenticationRequiredError as e:
abort(401, str(e))
except Exception as e:
abort(400, getattr(e, 'message', 'Invalid owner parameter'))
......@@ -22,15 +22,10 @@ from datetime import datetime
import pytz
from contextlib import contextmanager
from nomad import config
logger: BoundLogger = None
''' A logger pre configured with information about the current request. '''
base_path =
''' Provides the root path of the nomad APIs. '''
class RFC3339DateTime(fields.DateTime):
......@@ -26,7 +26,7 @@ blueprint = Blueprint('optimade', __name__)
base_url = 'https://%s/%s/optimade' % ('/'),'/'))'/'))
def url(endpoint: str = None, version='v1', prefix=None, **kwargs):
from fastapi import FastAPI, status, Request
from fastapi.responses import JSONResponse
from fastapi.middleware.wsgi import WSGIMiddleware
import traceback
from nomad import config, utils
from import app as flask_app
from nomad.app_fastapi.routers import users, entries, auth
logger = utils.get_logger(__name__)
app = FastAPI(,
title='NOMAD API',
version='v1, NOMAD %s@%s' % (config.meta.version, config.meta.commit),
**Disclaimer!** This is the new NOMAD API. It is still under development and only includes a
part of the NOMAD API functionality. You can still use the old flask-based API
as `/api` and the optimade API as `/optimade/v1`.
## Getting started
... TODO put the examples and tutorial here ...
## Conventions
### Paths
The various API operations are organized with the following path scheme. The first
part of the path, describes the data entity that is covered by
the operations below (e.g. `entries`, `users`, `datasets`, `uploads`). For example
everything below `entries` will be about searching entries, getting
an entry, editing entries, etc.
The second (optional and variable) path segment allows to denote a specific entity instance,
e.g. a specific entry or dataset, usually by id. With out such a variable second
path segment, its about all instances, e.g. searching entries or listing all datasets.
Optional (if available) further path segments will determine the variety and format
of data. This is mostly for entries to distinguish the metadata, raw, and archive
data or distinguish between listing (i.e. paginated json) and downloading
(i.e. streaming a zip-file)
Further, we try to adhere to the paradim of getting and posting resources. Therefore,
when you post a complex query, you will not post it to `/entries` (a query is not an entry),
but `/entries/query`. Here *query* being a kind of virtual resource.
### Parameters and bodies for GET and POST operations
We offer **GET** and **POST** versions for many complex operations. The idea is that
**GET** is easy to use, e.g. via curl or simply in the browser, while **POST**
allows to provide more complex parameters (i.e. a JSON body). For example to
search for entries, you can use the **GET** operation `/entries` to specify simple
queries via URL, e.g. `/entries?code_name=VASP&atoms=Ti`, but you would use
**POST** `/entries/query` to provide a complex nested queries, e.g. with logical
Typicall the **POST** version is a super-set of the functionality of the **GET**
version. But, most top-level parameters in the **POST** body, will be available
in the **GET** version as URL parameters with the same name and meaning. This
is especially true for reoccuring parameters for general API concepts like pagination
or specifying required result fields.
### Response layout
Typically a response will mirror all input parameters in the normalized form that
was used to perform the operation.
Some of these will be augmented with result values. For example the pagination
section of a request will be augmented with the total available number.
The actual requested data, will be placed under the key `data`.
## About Authentication
NOMAD is an open datasharing platform, and most of the API operations do not require
any authorization and can be freely used without a user or credentials. However,
to upload data, edit data, or view your own and potentially unpublished data,
the API needs to authenticate you.
The NOMAD API uses OAuth and tokens to authenticate users. We provide simple operations
that allow you to acquire an *access token* via username and password based
authentication (`/auth/token`). The resulting access token can then be used on all operations
(e.g. that support or require authentication).
To use authentication in the dashboard, simply use the Authorize button. The
dashboard GUI will manage the access token and use it while you try out the various
async def startup_event():
from nomad import infrastructure
# each subprocess is supposed disconnect connect again:
from mongoengine import disconnect
except Exception:
async def unicorn_exception_handler(request: Request, e: Exception):
logger.error('unexpected exception in API', url=request.url, exc_info=e)
return JSONResponse(
'detail': {
'reason': 'Unexpected exception while handling your request',
'exception': str(e),
'exception_class': e.__class__.__name__,
'exception_traceback': traceback.format_exc()
app.include_router(auth.router, prefix='/api/v1/auth')
app.include_router(users.router, prefix='/api/v1/users')
app.include_router(entries.router, prefix='/api/v1/entries')
app.mount('/', WSGIMiddleware(flask_app))
This diff is collapsed.
from fastapi import Depends, APIRouter, HTTPException, status
from import OAuth2PasswordBearer, OAuth2PasswordRequestForm
from pydantic import BaseModel
from nomad import infrastructure
from nomad.utils import get_logger, strip
from nomad.app_fastapi.models import User, HTTPExceptionModel
from nomad.app_fastapi.utils import create_responses
logger = get_logger(__name__)
router = APIRouter()
default_tag = 'auth'
class Token(BaseModel):
access_token: str
token_type: str
oauth2_scheme = OAuth2PasswordBearer(tokenUrl='/api/v1/auth/token', auto_error=False)
async def get_optional_user(access_token: str = Depends(oauth2_scheme)) -> User:
A dependency that provides the authenticated (if credentials are available) or None.
if access_token is None:
return None
return User(**infrastructure.keycloak.tokenauth(access_token))
except infrastructure.KeycloakError as e:
raise HTTPException(
detail=str(e), headers={'WWW-Authenticate': 'Bearer'})
async def get_required_user(user: User = Depends(get_optional_user)) -> User:
A dependency that provides the authenticated user or raises 401 if no user is
if user is None:
raise HTTPException(
detail='Authentication required',
headers={'WWW-Authenticate': 'Bearer'})
return user
_bad_credentials_response = status.HTTP_401_UNAUTHORIZED, {
'model': HTTPExceptionModel,
'description': strip('''
Unauthorized. The provided credentials were not recognized.''')}
summary='Get an access token',
async def get_token(form_data: OAuth2PasswordRequestForm = Depends()):
This API uses OAuth as an authentication mechanism. This operation allows you to
retrieve an *access token* by posting username and password as form data.
This token can be used on subsequent API calls to authenticate
you. Operations that support or require authentication will expect the *access token*
in an HTTP Authorization header like this: `Authorization: Bearer <access token>`.
On the OpenAPI dashboard, you can use the *Authorize* button at the top.
You only need to provide `username` and `password` values. You can ignore the other
access_token = infrastructure.keycloak.basicauth(
form_data.username, form_data.password)
except infrastructure.KeycloakError:
raise HTTPException(
detail='Incorrect username or password',
headers={'WWW-Authenticate': 'Bearer'})
return {'access_token': access_token, 'token_type': 'bearer'}
summary='Get an access token',
async def get_token_via_query(username: str, password: str):
This is an convenience alternative to the **POST** version of this operation.
It allows you to retrieve an *access token* by providing username and password.
access_token = infrastructure.keycloak.basicauth(username, password)
except infrastructure.KeycloakError:
raise HTTPException(
detail='Incorrect username or password',
headers={'WWW-Authenticate': 'Bearer'})
return {'access_token': access_token, 'token_type': 'bearer'}
This diff is collapsed.
from fastapi import Depends, APIRouter, status
from nomad.app_fastapi.routers.auth import get_required_user
from nomad.app_fastapi.models import User, HTTPExceptionModel
from nomad.app_fastapi.utils import create_responses
from nomad.utils import strip
router = APIRouter()
default_tag = 'users'
_authentication_required_response = status.HTTP_401_UNAUTHORIZED, {
'model': HTTPExceptionModel,
'description': strip('''
Unauthorized. The operation requires authorization,
but no or bad authentication credentials are given.''')}
summary='Get your account data',
description='Returnes the account data of the authenticated user.',
async def read_users_me(current_user: User = Depends(get_required_user)):
return current_user
from typing import Dict, Iterator, Any
from types import FunctionType
import sys
import inspect
from fastapi import Query, HTTPException # pylint: disable=unused-import
from pydantic import ValidationError, BaseModel # pylint: disable=unused-import
import zipstream
if sys.version_info >= (3, 7):
import zipfile
import zipfile37 as zipfile # pragma: no cover
def parameter_dependency_from_model(name: str, model_cls):
Takes a pydantic model class as input and creates a dependency with corresponding
Query parameter definitions that can be used for GET
This will only work, if the fields defined in the input model can be turned into
suitable query parameters. Otherwise fastapi will complain down the road.
name: Name for the dependency function.
model_cls: A ``BaseModel`` inheriting model class as input.
names = []
annotations: Dict[str, type] = {}
defaults = []
for field_model in model_cls.__fields__.values():
field_info = field_model.field_info
annotations[] = field_model.outer_type_
defaults.append(Query(field_model.default, description=field_info.description))
code = inspect.cleandoc('''
def %s(%s):
return %s(%s)
except ValidationError as e:
errors = e.errors()
for error in errors:
error['loc'] = ['query'] + list(error['loc'])
raise HTTPException(422, detail=errors)
''' % (
name, ', '.join(names), model_cls.__name__,
', '.join(['%s=%s' % (name, name) for name in names])))
compiled = compile(code, 'string', 'exec')
env = {model_cls.__name__: model_cls}
func = FunctionType(compiled.co_consts[0], env, name)
func.__annotations__ = annotations
func.__defaults__ = (*defaults,)
return func
class File(BaseModel):
path: str
f: Any
size: int
def create_streamed_zipfile(
files: Iterator[File],
compress: bool = False) -> Iterator[bytes]:
Creates a streaming zipfile object that can be used in fastapi's ``StreamingResponse``.
def path_to_write_generator():
for file_obj in files:
def content_generator():
while True:
data = * 64)
if not data:
yield data
yield dict(
compression = zipfile.ZIP_DEFLATED if compress else zipfile.ZIP_STORED
zip_stream = zipstream.ZipFile(mode='w', compression=compression, allowZip64=True)
zip_stream.paths_to_write = path_to_write_generator()
for chunk in zip_stream:
yield chunk
def create_responses(*args):
return {
status_code: response
for status_code, response in args}
......@@ -812,7 +812,7 @@ def delete_partial_archives_from_mongo(entry_ids: List[str]):
def read_partial_archives_from_mongo(entry_ids: List[str], as_dict=False) -> Dict[str, Union[EntryArchive, Dict]]:
Reads the partial archives for a set of entries of the same upload.
Reads the partial archives for a set of entries.
entry_ids: A list of entry ids.
......@@ -866,7 +866,7 @@ def compute_required_with_referenced(required):
# TODO this function should be based on the metainfo
if not isinstance(required, dict):
return required
return None
if any(key.startswith('section_run') for key in required):
return None
......@@ -149,7 +149,7 @@ logstash = NomadConfig(
services = NomadConfig(
......@@ -177,7 +177,7 @@ def api_url(ssl: bool = True, api: str = 'api'):
base_url = '%s://%s/%s' % (
'https' if services.https and ssl else 'http',
return '%s/%s' % (base_url.strip('/'), api)
......@@ -305,6 +305,7 @@ parser_matching_size = 150 * 80 # 150 lines of 80 ASCII characters per line
console_log_level = logging.WARNING
max_upload_size = 32 * (1024 ** 3)
raw_file_strip_cutoff = 1000
max_entry_download = 500000
use_empty_parsers = False
reprocess_unmatched = True
metadata_file_name = 'nomad'
......@@ -580,3 +580,8 @@ class EntryArchive(metainfo.MSection):
processing_logs = metainfo.Quantity(
type=Any, shape=['0..*'],
description='The processing logs for this entry as a list of structlog entries.')
# preemptively create the elasticsearch document definition, which populates metrics and
# search quantities in the search_extension
......@@ -23,6 +23,7 @@ is run once for each *api* and *worker* process. Individual functions for partia
exist to facilitate testing, aspects of :py:mod:`nomad.cli`, etc.
from typing import Dict, Any
import os.path
import os
import shutil
......@@ -134,6 +135,9 @@ def setup_elastic(create_mappings=True):
return elastic_client
class KeycloakError(Exception): pass
class Keycloak():
A class that encapsulates all keycloak related functions for easier mocking and
......@@ -171,6 +175,63 @@ class Keycloak():
return self.__public_keys
def basicauth(self, username: str, password: str) -> str:
Performs basic authentication and returns an access token.
token_info = self._oidc_client.token(username=username, password=password)
except KeycloakAuthenticationError as e:
raise KeycloakError(e)
except Exception as e:
logger.error('cannot perform basicauth', exc_info=e)
raise e