From 5efd12d31e854d52f03c4cdd181ae8c70ca7e2cc Mon Sep 17 00:00:00 2001
From: Markus Scheidgen <markus.scheidgen@gmail.com>
Date: Fri, 19 Feb 2021 16:25:19 +0100
Subject: [PATCH] Added Oasis access restriction via optional white-listing.

---
 gui/src/components/errors.js             |   2 +
 nomad/app/flask/api/auth.py              |  82 ++++++++---------
 nomad/app/flask/dcat/catalog.py          |   2 +
 nomad/app/flask/dcat/datasets.py         |   2 +
 nomad/app/main.py                        |  29 +++++-
 nomad/app/v1/routers/auth.py             |  32 +++++--
 nomad/config.py                          |   3 +-
 nomad/infrastructure.py                  | 111 +++++++----------------
 ops/docker-compose/nomad-oasis/README.md |  22 +++++
 tests/conftest.py                        |  15 +--
 10 files changed, 161 insertions(+), 139 deletions(-)

diff --git a/gui/src/components/errors.js b/gui/src/components/errors.js
index 8a6ea93a33..6c1cb6c5f4 100644
--- a/gui/src/components/errors.js
+++ b/gui/src/components/errors.js
@@ -69,6 +69,8 @@ class ErrorSnacksUnstyled extends React.Component {
     if (error instanceof Error) {
       if (error.name === 'CannotReachApi') {
         errorStr = 'Cannot reach NOMAD, please try again later.'
+      } else if (error.name === 'NotAuthorized') {
+        errorStr = error.message
       } else if (error.name === 'DoesNotExist') {
         errorStr = 'You are trying to access information that does not exist. Please try again and let us know, if this error keeps happening.'
       } else if (error.name === 'VersionMismatch') {
diff --git a/nomad/app/flask/api/auth.py b/nomad/app/flask/api/auth.py
index 2a8ca9e837..899005f2f3 100644
--- a/nomad/app/flask/api/auth.py
+++ b/nomad/app/flask/api/auth.py
@@ -72,27 +72,6 @@ api.authorizations = {
 }
 
 
-def _verify_upload_token(token) -> str:
-    '''
-    Verifies the upload token generated with :func:`generate_upload_token`.
-
-    Returns: The user UUID or None if the toke could not be verified.
-    '''
-    payload, signature = token.split('.')
-    payload = utils.base64_decode(payload)
-    signature = utils.base64_decode(signature)
-
-    compare = hmac.new(
-        bytes(config.services.api_secret, 'utf-8'),
-        msg=payload,
-        digestmod=hashlib.sha1)
-
-    if signature != compare.digest():
-        return None
-
-    return str(uuid.UUID(bytes=payload))
-
-
 def authenticate(
         basic: bool = False, upload_token: bool = False, signature_token: bool = False,
         required: bool = False, admin_only: bool = False):
@@ -124,35 +103,54 @@ def authenticate(
             g.user = None
 
             if upload_token and 'token' in request.args:
-                token = request.args['token']
-                user_id = _verify_upload_token(token)
-                if user_id is not None:
+                try:
+                    token = request.args['token']
+                    payload, signature = token.split('.')
+                    payload = utils.base64_decode(payload)
+                    signature = utils.base64_decode(signature)
+
+                    compare = hmac.new(
+                        bytes(config.services.api_secret, 'utf-8'),
+                        msg=payload,
+                        digestmod=hashlib.sha1)
+
+                    if signature != compare.digest():
+                        return None
+
+                    user_id = str(uuid.UUID(bytes=payload))
                     g.user = infrastructure.keycloak.get_user(user_id)
+                except KeyError:
+                    abort(401, 'Invalid token')
 
             elif signature_token and 'signature_token' in request.args:
                 token = request.args.get('signature_token', None)
-                if token is not None:
-                    try:
-                        decoded = jwt.decode(token, config.services.api_secret, algorithms=['HS256'])
-                        user = datamodel.User(user_id=decoded['user'])
-                        if user is None:
-                            abort(401, 'User for the given signature does not exist')
-                        else:
-                            g.user = user
-                    except KeyError:
-                        abort(401, 'Token with invalid/unexpected payload')
-                    except jwt.ExpiredSignatureError:
-                        abort(401, 'Expired token')
-                    except jwt.InvalidTokenError:
-                        abort(401, 'Invalid token')
+                try:
+                    decoded = jwt.decode(token, config.services.api_secret, algorithms=['HS256'])
+                    g.user = datamodel.User.get(user_id=decoded['user'])
+                except KeyError:
+                    abort(401, 'Token with invalid/unexpected payload')
+                except jwt.ExpiredSignatureError:
+                    abort(401, 'Expired token')
+                except jwt.InvalidTokenError:
+                    abort(401, 'Invalid token')
 
             elif 'token' in request.args:
                 abort(401, 'Query param token not supported for this endpoint')
 
+            elif 'signature_token' in request.args:
+                abort(401, 'Query param signature_token not supported for this endpoint')
+
             else:
-                error = infrastructure.keycloak.authorize_flask(basic=basic)
-                if error is not None:
-                    abort(401, message=error)
+                try:
+                    g.user, g.oidc_access_token = infrastructure.keycloak.auth(request.headers, allow_basic=basic)
+                except infrastructure.KeycloakError as e:
+                    abort(401, message=str(e))
+
+            if config.oasis.allowed_users is not None:
+                if g.user is None:
+                    abort(401, message='Authentication is required for this Oasis')
+                if g.user.email not in config.oasis.allowed_users:
+                    abort(401, message='You are not authorized to access this Oasis')
 
             if required and g.user is None:
                 abort(401, message='Authentication is required for this endpoint')
@@ -218,7 +216,7 @@ class AuthResource(Resource):
             return {
                 'upload_token': generate_upload_token(g.user),
                 'signature_token': signature_token(),
-                'access_token': infrastructure.keycloak.access_token
+                'access_token': g.oidc_access_token
             }
 
         except KeyError:
diff --git a/nomad/app/flask/dcat/catalog.py b/nomad/app/flask/dcat/catalog.py
index 39eb4aa4de..9eb4c0daf9 100644
--- a/nomad/app/flask/dcat/catalog.py
+++ b/nomad/app/flask/dcat/catalog.py
@@ -20,6 +20,7 @@ from flask_restplus import Resource, fields
 from elasticsearch_dsl import Q
 
 from nomad import search
+from nomad.app.flask.api.auth import authenticate
 
 from .api import api, arg_parser, rdf_respose, response_types
 from .mapping import Mapping
@@ -44,6 +45,7 @@ class Catalog(Resource):
     @api.response(404, 'There is no entry with the given id.')
     @api.response(401, 'This entry is not publically accessible.')
     @api.response(200, 'Data send', headers={'Content-Type': 'application/xml'})
+    @authenticate()
     def get(self):
         ''' Returns a page of DCAT datasets. '''
         args = arg_parser.parse_args()
diff --git a/nomad/app/flask/dcat/datasets.py b/nomad/app/flask/dcat/datasets.py
index 5e3c7d0209..8cb34edc1c 100644
--- a/nomad/app/flask/dcat/datasets.py
+++ b/nomad/app/flask/dcat/datasets.py
@@ -19,6 +19,7 @@ from flask_restplus import Resource, abort
 from elasticsearch.exceptions import NotFoundError
 
 from nomad import search
+from nomad.app.flask.api.auth import authenticate
 
 from .api import api, arg_parser, rdf_respose, response_types
 from .mapping import Mapping
@@ -35,6 +36,7 @@ class Dataset(Resource):
     @api.response(404, 'There is no entry with the given id.')
     @api.response(401, 'This entry is not publically accessible.')
     @api.response(200, 'Data send', headers={'Content-Type': 'application/xml'})
+    @authenticate()
     def get(self, entry_id):
         ''' Returns a DCAT dataset for a given NOMAD entry id. '''
         try:
diff --git a/nomad/app/main.py b/nomad/app/main.py
index 3bdceecf16..cb4deea5ae 100644
--- a/nomad/app/main.py
+++ b/nomad/app/main.py
@@ -16,18 +16,43 @@
 # limitations under the License.
 #
 
-from fastapi import FastAPI
+from fastapi import FastAPI, status, Response
 from fastapi.middleware.wsgi import WSGIMiddleware
+from starlette.middleware.base import BaseHTTPMiddleware
 
-from nomad import config
+from nomad import config, infrastructure
 
 from .optimade import optimade_app
 from .flask import app as flask_app
 from .v1.main import app as v1_app
 
 
+class OasisAuthenticationMiddleware(BaseHTTPMiddleware):
+    async def dispatch(self, request, call_next):
+        path = request.url.path
+        if 'extensions' in path or 'info' in path or 'versions' in path:
+            return await call_next(request)
+
+        if 'Authorization' not in request.headers:
+            return Response(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                content='You have to authenticate to use this Oasis endpoint.')
+
+        else:
+            user, _ = infrastructure.keycloak.auth(request.headers)
+            if user is None or user.email not in config.oasis.allowed_users:
+                return Response(
+                    status_code=status.HTTP_401_UNAUTHORIZED,
+                    content='You are not authorized to access this Oasis endpoint.')
+
+        return await call_next(request)
+
+
 app = FastAPI()
 
+if config.oasis.allowed_users is not None:
+    optimade_app.add_middleware(OasisAuthenticationMiddleware)
+
 app_base = config.services.api_base_path
 app.mount(f'{app_base}/api/v1', v1_app)
 app.mount(f'{app_base}/optimade', optimade_app)
diff --git a/nomad/app/v1/routers/auth.py b/nomad/app/v1/routers/auth.py
index c7fb6b2ffe..be269cfce7 100644
--- a/nomad/app/v1/routers/auth.py
+++ b/nomad/app/v1/routers/auth.py
@@ -16,11 +16,12 @@
 # limitations under the License.
 #
 
+from typing import cast
 from fastapi import Depends, APIRouter, HTTPException, status
 from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
 from pydantic import BaseModel
 
-from nomad import infrastructure
+from nomad import infrastructure, config, datamodel
 from nomad.utils import get_logger, strip
 
 from ..common import root_path
@@ -46,14 +47,29 @@ async def get_optional_user(access_token: str = Depends(oauth2_scheme)) -> User:
     A dependency that provides the authenticated (if credentials are available) or None.
     '''
     if access_token is None:
-        return None
+        user: datamodel.User = None
+    else:
+        try:
+            user = cast(datamodel.User, infrastructure.keycloak.tokenauth(access_token))
+        except infrastructure.KeycloakError as e:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail=str(e), headers={'WWW-Authenticate': 'Bearer'})
+
+    if config.oasis.allowed_users is not None:
+        if user is None:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail='Authentication is required for this Oasis',
+                headers={'WWW-Authenticate': 'Bearer'})
+
+        if user.email not in config.oasis.allowed_users:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail='You are not authorized to access this Oasis',
+                headers={'WWW-Authenticate': 'Bearer'})
 
-    try:
-        return User(**infrastructure.keycloak.tokenauth(access_token))
-    except infrastructure.KeycloakError as e:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail=str(e), headers={'WWW-Authenticate': 'Bearer'})
+    return user
 
 
 async def get_required_user(user: User = Depends(get_optional_user)) -> User:
diff --git a/nomad/config.py b/nomad/config.py
index 9310a2c322..b9b40d67f2 100644
--- a/nomad/config.py
+++ b/nomad/config.py
@@ -166,7 +166,8 @@ services = NomadConfig(
 
 oasis = NomadConfig(
     central_nomad_api_url='https://nomad-lab.eu/prod/rae/api',
-    central_nomad_deployment_id='nomad-lab.eu/prod/rae'
+    central_nomad_deployment_id='nomad-lab.eu/prod/rae',
+    allowed_users=None  # a list of usernames or user account emails
 )
 
 tests = NomadConfig(
diff --git a/nomad/infrastructure.py b/nomad/infrastructure.py
index 8812c95cc7..1a719dbb9a 100644
--- a/nomad/infrastructure.py
+++ b/nomad/infrastructure.py
@@ -23,7 +23,7 @@ is run once for each *api* and *worker* process. Individual functions for partia
 exist to facilitate testing, aspects of :py:mod:`nomad.cli`, etc.
 '''
 
-from typing import Dict, Any
+from typing import Tuple, Dict
 import os.path
 import os
 import shutil
@@ -37,7 +37,6 @@ from keycloak import KeycloakOpenID, KeycloakAdmin
 from keycloak.exceptions import KeycloakAuthenticationError, KeycloakGetError
 import json
 import jwt
-from flask import g, request
 import basicauth
 from datetime import datetime
 import re
@@ -175,6 +174,29 @@ class Keycloak():
 
         return self.__public_keys
 
+    def auth(self, headers: Dict[str, str], allow_basic: bool = False) -> Tuple[object, str]:
+        '''
+        Performs authentication based on the provided headers. Either basic or bearer.
+
+        Returns:
+            The user and its access_token
+
+        Raises:
+            KeycloakError
+        '''
+
+        if headers.get('Authorization', '').startswith('Bearer '):
+            access_token = headers['Authorization'].split(None, 1)[1].strip()
+            return self.tokenauth(access_token), access_token
+
+        if allow_basic and headers.get('Authorization', '').startswith('Basic '):
+            auth = headers['Authorization'].split(None, 1)[1].strip()
+            username, password = basicauth.decode(auth)
+            access_token = self.basicauth(username, password)
+            return self.tokenauth(access_token), access_token
+
+        return None, None
+
     def basicauth(self, username: str, password: str) -> str:
         '''
         Performs basic authentication and returns an access token.
@@ -192,9 +214,12 @@ class Keycloak():
 
         return token_info['access_token']
 
-    def tokenauth(self, access_token: str) -> Dict[str, Any]:
+    def tokenauth(self, access_token: str) -> object:
         '''
-        Authenticates the given token and returns the user record.
+        Authenticates the given access_token
+
+        Returns:
+            The user
 
         Raises:
             KeycloakError
@@ -220,7 +245,8 @@ class Keycloak():
                     Could not validate credentials.
                     The given token does not contain a user_id.'''))
 
-            return dict(
+            from nomad import datamodel
+            return datamodel.User(
                 user_id=user_id,
                 email=payload.get('email', None),
                 first_name=payload.get('given_name', None),
@@ -232,74 +258,6 @@ class Keycloak():
             logger.error('cannot perform tokenauth', exc_info=e)
             raise e
 
-    def authorize_flask(self, basic: bool = True) -> str:
-        '''
-        Authorizes the current flask request with keycloak. Uses either Bearer or Basic
-        authentication, depending on available headers in the request. Bearer auth is
-        basically offline (besides retrieving and caching keycloaks public key for signature
-        validation). Basic auth causes authentication agains keycloak with each request.
-
-        Will set ``g.user``, either with None or user data from the respective OIDC token.
-
-        Returns: An error message or None
-        '''
-        g.oidc_access_token = None
-        if 'Authorization' in request.headers and request.headers['Authorization'].startswith('Bearer '):
-            g.oidc_access_token = request.headers['Authorization'].split(None, 1)[1].strip()
-        elif 'Authorization' in request.headers and request.headers['Authorization'].startswith('Basic '):
-            if not basic:
-                return 'Basic authentication not allowed, use Bearer token instead'
-
-            try:
-                auth = request.headers['Authorization'].split(None, 1)[1].strip()
-                username, password = basicauth.decode(auth)
-                token_info = self._oidc_client.token(username=username, password=password)
-                g.oidc_access_token = token_info['access_token']
-            except KeycloakAuthenticationError:
-                return 'Could not authenticate, wrong credentials'
-            except Exception as e:
-                logger.error('Could not authenticate Basic auth', exc_info=e)
-                return 'Could not authenticate Basic auth: %s' % str(e)
-
-        if g.oidc_access_token is not None:
-            auth_error: str = None
-            try:
-                kid = jwt.get_unverified_header(g.oidc_access_token)['kid']
-                key = self._public_keys.get(kid)
-                if key is None:
-                    logger.error('The user provided keycloak public key does not exist. Does the UI use the right realm?')
-                    auth_error = 'Could not verify JWT token: public key does not exist'
-                else:
-                    options = dict(verify_aud=False, verify_exp=True, verify_iss=True)
-                    payload = jwt.decode(
-                        g.oidc_access_token, key=key, algorithms=['RS256'], options=options,
-                        issuer='%s/realms/%s' % (config.keycloak.server_url.rstrip('/'), config.keycloak.realm_name))
-
-            except jwt.InvalidTokenError as e:
-                auth_error = str(e)
-            except Exception as e:
-                logger.error('Could not verify JWT token', exc_info=e)
-                raise e
-
-            if auth_error is not None:
-                g.user = None
-                return auth_error
-
-            else:
-                from nomad import datamodel
-                g.user = datamodel.User(
-                    user_id=payload.get('sub', None),
-                    email=payload.get('email', None),
-                    first_name=payload.get('given_name', None),
-                    last_name=payload.get('family_name', None))
-
-                return None
-
-        else:
-            g.user = None
-            # Do not return an error. This is the case were there are no credentials
-            return None
-
     def __create_username(self, user):
         if user.first_name is not None and user.last_name is not None:
             user.username = '%s%s' % (user.first_name[:1], user.last_name)
@@ -427,8 +385,7 @@ class Keycloak():
         '''
         Retrives all available information about a user from the keycloak admin
         interface. This must be used to retrieve complete user information, because
-        the info solely gathered from tokens (i.e. for the authenticated user ``g.user``)
-        is generally incomplete.
+        the info solely gathered from tokens is generally incomplete.
         '''
 
         if user is not None and user_id is None:
@@ -468,10 +425,6 @@ class Keycloak():
 
         return self.__admin_client
 
-    @property
-    def access_token(self):
-        return getattr(g, 'oidc_access_token', None)
-
 
 keycloak = Keycloak()
 
diff --git a/ops/docker-compose/nomad-oasis/README.md b/ops/docker-compose/nomad-oasis/README.md
index 35039f478f..08a5d81a45 100644
--- a/ops/docker-compose/nomad-oasis/README.md
+++ b/ops/docker-compose/nomad-oasis/README.md
@@ -504,6 +504,28 @@ docker exec nomad_oasis_elastic bash -c 'curl -X DELETE http://elastic:9200/noma
 docker exec nomad_oasis_mongo bash -c 'mongo nomad_fairdi --eval "printjson(db.dropDatabase())"'
 ```
 
+## Restricting access to your Oasis
+
+An Oasis works exactly the same way the official NOMAD works. It is open and everybody
+can access published data. Everybody with an account can upload data. This might not be
+what you want.
+
+Currently there are two ways to restrict access to your Oasis. First, you do not
+expose the Oasis to the public internet, e.g. you only make it available on an intra-net or
+through a VPN.
+
+Second, we offer a simple white-list mechanism. As the Oasis administrator your provide a
+list of accounts as part of your Oasis configuration. To use the Oasis, all users have to
+be logged in and be on your white list of allowed users. To enable white-listing, you
+can provide a list of NOMAD account email addresses in your `nomad.yaml` like this:
+
+```
+oasis:
+    allowed_users:
+        - user1@gmail.com
+        - user2@gmail.com
+```
+
 ## NOMAD Oasis FAQ
 
 ### Why use an Oasis?
diff --git a/tests/conftest.py b/tests/conftest.py
index cb27f7bd72..01b244bcf2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -16,7 +16,7 @@
 # limitations under the License.
 #
 
-from typing import Tuple, List, Dict, Any
+from typing import Tuple, List
 import pytest
 import logging
 from collections import namedtuple
@@ -274,17 +274,18 @@ class KeycloakMock:
         self.id_counter = 2
         self.users = dict(**test_users)
 
-    def tokenauth(self, access_token: str) -> Dict[str, Any]:
+    def tokenauth(self, access_token: str):
         if access_token in self.users:
-            return self.users[access_token]
+            return User(**self.users[access_token])
         else:
             raise infrastructure.KeycloakError('user does not exist')
 
-    def authorize_flask(self, *args, **kwargs):
-        if 'Authorization' in request.headers and request.headers['Authorization'].startswith('Bearer '):
+    def auth(self, headers, **kwargs):
+        if 'Authorization' in headers and headers['Authorization'].startswith('Bearer '):
             user_id = request.headers['Authorization'].split(None, 1)[1].strip()
-            g.oidc_access_token = user_id
-            g.user = User(**self.users[user_id])
+            return User(**self.users[user_id]), user_id
+
+        return None, None
 
     def add_user(self, user, *args, **kwargs):
         self.id_counter += 1
-- 
GitLab