From d3deb6c9689a5ba2001053508f79e4c1dc83a8d3 Mon Sep 17 00:00:00 2001
From: Markus Scheidgen <markus.scheidgen@gmail.com>
Date: Tue, 21 Sep 2021 15:55:46 +0200
Subject: [PATCH] Migrated integrationtests to v1 api. #591

---
 nomad/cli/admin/uploads.py           |   3 +-
 nomad/cli/client/__init__.py         |   2 +
 nomad/cli/client/client.py           |   1 +
 nomad/cli/client/integrationtests.py | 205 +++++++++++++++------------
 nomad/client/__init__.py             |   2 +-
 nomad/client/{auth.py => api.py}     |  60 +++++++-
 nomad/client/archive.py              |   2 +-
 nomad/config.py                      |   3 +
 tests/test_client.py                 |   6 +-
 9 files changed, 179 insertions(+), 105 deletions(-)
 rename nomad/client/{auth.py => api.py} (63%)

diff --git a/nomad/cli/admin/uploads.py b/nomad/cli/admin/uploads.py
index a9cc574af2..51ed2686f5 100644
--- a/nomad/cli/admin/uploads.py
+++ b/nomad/cli/admin/uploads.py
@@ -24,7 +24,6 @@ import json
 import elasticsearch_dsl as es
 
 from nomad import processing as proc, config, infrastructure, utils, files, datamodel, search
-from nomad.search.v1 import quantity_values
 
 from .admin import admin, __run_processing, __run_parallel
 
@@ -102,6 +101,7 @@ def uploads(
         query |= mongoengine.Q(process_status__in=proc.ProcessStatus.STATUSES_PROCESSING)
 
     if unindexed:
+        from nomad.search.v1 import quantity_values
         uploads_in_es = set(quantity_values('upload_id', page_size=1000, owner='all'))
 
         uploads_in_mongo = mongo_client[config.mongo.db_name]['calc'].distinct('upload_id')
@@ -126,6 +126,7 @@ def query_uploads(ctx, uploads):
         if ctx.obj.query_mongo:
             uploads = proc.Calc.objects(**json_query).distinct(field="upload_id")
         else:
+            from nomad.search.v1 import quantity_values
             uploads = list(quantity_values(
                 'upload_id', query=es.Q(json_query), page_size=1000, owner='all'))
     except Exception:
diff --git a/nomad/cli/client/__init__.py b/nomad/cli/client/__init__.py
index aa61faa371..d1aaaf15e8 100644
--- a/nomad/cli/client/__init__.py
+++ b/nomad/cli/client/__init__.py
@@ -53,6 +53,8 @@ lazy_import.lazy_module('nomad.parsing.parsers')
 lazy_import.lazy_module('nomad.infrastructure')
 lazy_import.lazy_module('nomad.doi')
 lazy_import.lazy_module('nomad.client')
+lazy_import.lazy_module('nomad.client.api')
+lazy_import.lazy_module('nomad.client.archive')
 
 from . import local, upload, integrationtests, statistics, update_database  # noqa
 from .client import create_client  # noqa
diff --git a/nomad/cli/client/client.py b/nomad/cli/client/client.py
index ed31e45776..9dfa27d177 100644
--- a/nomad/cli/client/client.py
+++ b/nomad/cli/client/client.py
@@ -110,6 +110,7 @@ def client(ctx, url: str, user: str, password: str, no_ssl_verify: bool, no_toke
     nomad_config.client.url = url
 
     ctx.obj.user = user
+    ctx.obj.auth = nomad_client.Auth(user=user, password=password)
 
     global _create_client
 
diff --git a/nomad/cli/client/integrationtests.py b/nomad/cli/client/integrationtests.py
index f69ab617fb..d0b2e096de 100644
--- a/nomad/cli/client/integrationtests.py
+++ b/nomad/cli/client/integrationtests.py
@@ -24,6 +24,7 @@ as a final integration test.
 import time
 import os
 import click
+import json
 
 from .client import client
 
@@ -42,31 +43,35 @@ simple_example_file = 'tests/data/integration/examples_vasp.zip'
 @click.option(
     '--skip-doi', is_flag=True,
     help='Skip assigning a doi to a dataset.')
-@click.option(
-    '--skip-mirror', is_flag=True,
-    help='Skip get mirror tests.')
 @click.pass_context
-def integrationtests(ctx, skip_parsers, skip_publish, skip_doi, skip_mirror):
-    from .client import create_client
-    client = create_client()
+def integrationtests(ctx, skip_parsers, skip_publish, skip_doi):
+    from nomad.client import api
+    auth = ctx.obj.auth
+
     has_doi = False
     published = False
 
     print('get the upload command')
-    command = client.uploads.get_upload_command().response().result.upload_command_with_name
+    response = api.get('uploads/command-examples', auth=auth)
+    assert response.status_code == 200, response.text
+    command = response.json()['upload_command']
 
     def get_upload(upload):
-        upload = client.uploads.get_upload(
-            upload_id=upload.upload_id, per_page=100).response().result
-
-        while upload.process_running:
+        first = True
+        while first or upload['process_running']:
+            first = False
+            response = api.get(f'uploads/{upload["upload_id"]}', auth=auth)
+            if response.status_code == 404:
+                return None
+            assert response.status_code == 200, response.text
+            upload = response.json()['data']
             time.sleep(0.3)
-            upload = client.uploads.get_upload(
-                upload_id=upload.upload_id, per_page=100).response().result
 
         return upload
 
-    uploads = client.uploads.get_uploads(name='integration_test_upload').response().result.results
+    response = api.get('uploads', params=dict(name='integration_test_upload'), auth=auth)
+    assert response.status_code == 200, response.text
+    uploads = response.json()['data']
     assert len(uploads) == 0, 'the test upload must not exist before'
 
     if not skip_parsers:
@@ -76,136 +81,148 @@ def integrationtests(ctx, skip_parsers, skip_publish, skip_doi, skip_mirror):
         command += ' -k'
         code = os.system(command)
         assert code == 0, 'curl command must be successful'
-        uploads = client.uploads.get_uploads(name='integration_test_upload').response().result.results
-        assert len(uploads) == 1, 'exactly one test upload must be on the server'
-        upload = uploads[0]
+        response = api.get('uploads', params=dict(name='integration_test_upload'), auth=auth)
+        assert response.status_code == 200, response.text
+        response_json = response.json()
+        assert len(response_json['data']) == 1, 'exactly one test upload must be on the server'
+        upload = response_json['data'][0]
 
         print('observe the upload process to be finished')
         upload = get_upload(upload)
 
-        assert upload.process_status == 'SUCCESS'
-        total = upload.calcs.pagination.total
-        assert 100 > total > 0
-        assert len(upload.calcs.results) == total
+        assert upload['process_status'] == 'SUCCESS'
 
         print('delete the upload again')
-        client.uploads.delete_upload(upload_id=upload.upload_id).response()
-        while upload.process_running:
-            upload = client.uploads.get_upload(
-                upload_id=upload.upload_id).response().result
+        upload = api.delete(f'uploads/{upload["upload_id"]}', auth=auth).json()['data']
+        upload = get_upload(upload)
 
     print('upload simple data with API')
     with open(simple_example_file, 'rb') as f:
-        upload = client.uploads.upload(
-            name='integration test upload', file=f).response().result
+        response = api.post(
+            'uploads', files=dict(file=f), params=dict(name='integration_test_upload'),
+            auth=auth, headers={'Accept': 'application/json'})
+        assert response.status_code == 200, response.text
+        upload = response.json()['data']
 
     print('observe the upload process to be finished')
     upload = get_upload(upload)
-    total = upload.calcs.pagination.total
-    assert total > 0
-    assert len(upload.calcs.results) == total
+    response = api.get(f'uploads/{upload["upload_id"]}/entries', auth=auth)
+    assert response.status_code == 200, response.text
+    entries = response.json()['data']
+    assert upload['entries'] == len(entries)
 
     try:
         print('get repo data')
-        for calc in upload.calcs.results:
-            repo = client.repo.get_repo_calc(
-                upload_id=upload.upload_id, calc_id=calc.calc_id).response().result
-            repo['calc_id'] == calc.calc_id
+        for entry in entries:
+            response = api.get(f'entries/{entry["entry_id"]}', auth=auth)
+            assert response.status_code == 200, response.text
+            entry_metadata = response.json()['data']
+            entry_metadata['entry_id'] == entry['entry_id']
 
         print('get archive data')
-        for calc in upload.calcs.results:
-            client.archive.get_archive_calc(
-                upload_id=upload.upload_id, calc_id=calc.calc_id).response()
+        for entry in entries:
+            api.get(f'entries/{entry["entry_id"]}/archive/download', auth=auth)
+            assert response.status_code == 200, response.text
 
         print('get archive logs')
-        for calc in upload.calcs.results:
-            client.archive.get_archive_logs(
-                upload_id=upload.upload_id, calc_id=calc.calc_id).response()
-
-        query = dict(owner='staging', upload_id=[upload.upload_id])
+        for entry in entries:
+            response = api.post(
+                f'entries/{entry["entry_id"]}/archive/query',
+                data=json.dumps({
+                    'required': {
+                        'processing_logs': '*'
+                    }
+                }), auth=auth)
+            assert response.status_code == 200, response.text
+            assert list(response.json()['data']['archive'].keys()) == ['processing_logs']
+
+        query_request_params = dict(
+            owner='staging',
+            query={
+                'upload_id': upload['upload_id']
+            })
 
         print('perform repo search on data')
-        search = client.repo.search(per_page=100, **query).response().result
-        assert search.pagination.total >= total
-        assert len(search.results) <= search.pagination.total
+        response = api.post('entries/query', data=json.dumps(query_request_params), auth=auth)
+        assert response.status_code == 200, response.text
+        response_json = response.json()
+        assert response_json['pagination']['total'] == 2
+        assert response_json['pagination']['total'] == len(response_json['data'])
 
         print('performing archive paginated search')
-        result = client.archive.post_archive_query(payload={
-            'pagination': {
-                'page': 1,
-                'per_page': 10
-            },
-            'query': query
-        }).response().result
-        assert len(result.results) > 0
+        response = api.post('entries/archive/query', data=json.dumps(dict(
+            pagination=dict(page_size=1, page_offset=1),
+            **query_request_params)), auth=auth)
+        assert response.status_code == 200, response.text
+        response_json = response.json()
+        assert response_json['pagination']['total'] == 2
+        assert len(response_json['data']) == 1
 
         print('performing archive scrolled search')
-        result = client.archive.post_archive_query(payload={
-            'scroll': {
-                'scroll': True
-            },
-            'query': query
-        }).response().result
-        assert len(result.results) > 0
+        response = api.post('entries/archive/query', data=json.dumps(dict(
+            pagination=dict(page_size=1),
+            **query_request_params)), auth=auth)
+        response_json = response.json()
+        response = api.post('entries/archive/query', data=json.dumps(dict(
+            pagination=dict(page_size=1, page_after_value=response_json['pagination']['next_page_after_value']),
+            **query_request_params)), auth=auth)
+        assert response.status_code == 200, response.text
+        response_json = response.json()
+        assert response_json['pagination']['total'] == 2
+        assert len(response_json['data']) == 1
 
         print('performing download')
-        client.raw.raw_files_from_query(**query)
+        response = api.get(
+            'entries/raw/download',
+            params=dict(upload_id=upload['upload_id'], owner='visible'), auth=auth)
+        assert response.status_code == 200, response.text
 
         if not skip_publish:
             print('publish upload')
-            client.uploads.exec_upload_operation(
-                upload_id=upload.upload_id,
-                payload=dict(operation='publish')).response()
-
-            while upload.process_running:
-                upload = client.uploads.get_upload(
-                    upload_id=upload.upload_id).response().result
+            api.post(f'uploads/{upload["upload_id"]}/action/publish')
 
-            assert upload.process_status == 'SUCCESS', 'publish must be successful'
+            upload = get_upload(upload)
+            assert upload['process_status'] == 'SUCCESS', 'publish must be successful'
             published = True
 
         print('editing upload')
+        response = api.get('users', params=dict(prefix='Markus Scheidgen'))
+        assert response.status_code == 200, response.text
+        user = response.json()['data'][0]
         dataset = 'test_dataset'
         actions = {
             'comment': {'value': 'Test comment'},
             'references': [{'value': 'http;//test_reference.com'}],
-            'coauthors': [{'value': 'author1-id'}, {'value': 'author2-id'}],
-            'shared_with': [{'value': 'author3-id'}],
+            'coauthors': [{'value': user['user_id']}],
+            'shared_with': [{'value': user['user_id']}],
             'datasets': [{'value': dataset}]}
 
-        payload = dict(actions=actions, query=dict(upload_id=[upload.upload_id]))
-        result = client.repo.edit_repo(payload=payload).response().result
-        assert result.success
-        assert client.datasets.get_dataset(name=dataset).response().result['name'] == dataset
+        response = api.post(
+            'entries/edit',
+            data=json.dumps(dict(actions=actions, **query_request_params)),
+            auth=auth)
+        assert response.status_code == 200, response.text
 
         print('list datasets')
-        result = client.datasets.list_datasets(page=1, per_page=10).response().result
-        results = result.results
-        assert len(results) > 0
+        response = api.get('datasets', auth=auth, params=dict(name=dataset))
+        assert response.status_code == 200, response.text
+        response_json = response.json()
+        assert len(response_json['data']) == 1, response.text
+        dataset_id = response_json['data'][0]['dataset_id']
 
         if not skip_doi and published:
             print('assigning a DOI')
-            result = client.datasets.assign_doi(name=dataset).response().result
-            doi = result.doi
-            assert doi
+            response = api.post(f'datasets/{dataset_id}/doi', auth=auth)
+            assert response.status_code == 200, response.text
             has_doi = True
 
         if not has_doi or ctx.obj.user == 'admin':
             print('deleting dataset')
-            result = client.datasets.delete_dataset(name=dataset).response().result
-
-        if not skip_mirror and ctx.obj.user == 'admin':
-            print('getting upload mirror')
-            # get_upload_mirror gives 404
-            payload = dict(query=dict(upload_id=upload.upload_id))
-            result = client.mirror.get_uploads_mirror(payload=payload).response().result
-            assert len(result) == 1
-            assert len(client.mirror.get_upload_mirror(upload_id=upload.upload_id).response().result.calcs) > 0
+            response = api.delete(f'datasets/{dataset_id}', auth=auth)
+            assert response.status_code == 200, response.text
 
     finally:
         if not published or ctx.obj.user == 'admin':
             print('delete the upload again')
-            client.uploads.delete_upload(upload_id=upload.upload_id).response()
-            while upload.process_running:
-                upload = client.uploads.get_upload(
-                    upload_id=upload.upload_id).response().result
+            upload = api.delete(f'uploads/{upload["upload_id"]}', auth=auth).json()['data']
+            assert get_upload(upload) is None
diff --git a/nomad/client/__init__.py b/nomad/client/__init__.py
index e0207496a0..3d1aefa93f 100644
--- a/nomad/client/__init__.py
+++ b/nomad/client/__init__.py
@@ -17,4 +17,4 @@
 #
 
 from .archive import ArchiveQuery, query_archive
-from .auth import Auth, KeycloakAuthenticator
+from .api import Auth, KeycloakAuthenticator
diff --git a/nomad/client/auth.py b/nomad/client/api.py
similarity index 63%
rename from nomad/client/auth.py
rename to nomad/client/api.py
index 3ac005c40d..a069848667 100644
--- a/nomad/client/auth.py
+++ b/nomad/client/api.py
@@ -27,6 +27,27 @@ from nomad import config
 class APIError(Exception): pass
 
 
+def _call_requests(method, path: str, ssl: bool = True, *args, **kwargs):
+    url = f'{config.api_url(ssl=ssl)}/v1/{path}'
+    return getattr(requests, method)(url, *args, **kwargs)
+
+
+def get(*args, **kwargs):
+    return _call_requests('get', *args, **kwargs)
+
+
+def post(*args, **kwargs):
+    return _call_requests('post', *args, **kwargs)
+
+
+def put(*args, **kwargs):
+    return _call_requests('put', *args, **kwargs)
+
+
+def delete(*args, **kwargs):
+    return _call_requests('delete', *args, **kwargs)
+
+
 def url(path):
     ''' Returns the full NOMAD API url for the given api path. '''
     return f'{config.client.url}/v1/{path}'
@@ -34,6 +55,7 @@ def url(path):
 
 # This class is somewhat questionable, because there might be very similar functionality
 # already in requests. But it is somewhere hidden in OAuth flow implementations.
+# Maybe there is also a way to use the KeycloakAuthenticator in requests.
 class Auth(requests.auth.AuthBase):
     '''
     A request Auth class that can be used to authenticate in request callcs like this:
@@ -48,14 +70,26 @@ class Auth(requests.auth.AuthBase):
     Arguments:
         user: Optional user name or email, default is take from ``config.client.user``
         password: Optional password, default is taken from ``config.client.password``
+        from_api: If true, the necessary access token is acquired through the NOMAD api via basic auth
+            and not via keycloak directly. Default is False. Not recommended, but might
+            be useful, if keycloak can't be configured (e.g. during tests) or reached.
     '''
-    def __init__(self, user: str = config.client.user, password: str = config.client.password):
+    def __init__(
+            self, user: str = config.client.user,
+            password: str = config.client.password,
+            from_api: bool = False):
         self.user = user
         self._password = password
+        self.from_api = from_api
+
+        self.__oidc = KeycloakOpenID(
+            server_url=config.keycloak.server_url,
+            realm_name=config.keycloak.realm_name,
+            client_id=config.keycloak.client_id)
 
         self._token = None
 
-    def __call__(self, request):
+    def get_access_token_from_api(self):
         if self._token is None:
             response = requests.get(
                 url('auth/token'),
@@ -68,11 +102,27 @@ class Auth(requests.auth.AuthBase):
                     f'{response_json.get("description") or response_json.get("detail") or "unknown reason"} '
                     f'({response_json.get("code", response.status_code)})')
 
-            self._token = response.json()['access_token']
+            self._token = response.json()
 
-        # TODO check if token is still valid and refresh
+    def get_access_token_from_keycloak(self):
+        if self._token is None:
+            self._token = self.__oidc.token(username=self.user, password=self._password)
+            self._token['time'] = time.time()
+        elif self._token['expires_in'] < int(time.time()) - self._token['time'] + 10:
+            try:
+                self._token = self.__oidc.refresh_token(self._token['refresh_token'])
+                self._token['time'] = time.time()
+            except Exception:
+                self._token = self.__oidc.token(username=self.user, password=self._password)
+                self._token['time'] = time.time()
+
+    def __call__(self, request):
+        if self.from_api:
+            self.get_access_token_from_api()
+        else:
+            self.get_access_token_from_keycloak()
 
-        request.headers['Authorization'] = f'Bearer {self._token}'
+        request.headers['Authorization'] = f'Bearer {self._token["access_token"]}'
         return request
 
 
diff --git a/nomad/client/archive.py b/nomad/client/archive.py
index b8cc18f794..4655a05469 100644
--- a/nomad/client/archive.py
+++ b/nomad/client/archive.py
@@ -144,7 +144,7 @@ from nomad.datamodel import EntryArchive
 # TODO this import is necessary to load all metainfo defintions that the parsers are using
 from nomad import parsing  # pylint: disable=unused-import
 
-from .auth import Auth
+from .api import Auth
 
 
 class QueryError(Exception):
diff --git a/nomad/config.py b/nomad/config.py
index cdbc00f6d4..3521efce6e 100644
--- a/nomad/config.py
+++ b/nomad/config.py
@@ -474,6 +474,9 @@ def _apply_nomad_yaml():
             logger.error(f'cannot read nomad config: {e}')
             return
 
+    if not config_data:
+        return
+
     for key, value in config_data.items():
         if isinstance(value, dict):
             group_key = key
diff --git a/tests/test_client.py b/tests/test_client.py
index 72428265c9..8e6ccbc35a 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -29,7 +29,7 @@ from tests.processing import test_data as test_processing
 
 
 def test_requests_auth(api_v1):
-    rv = api_v1.get('users/me', auth=Auth())
+    rv = api_v1.get('users/me', auth=Auth(from_api=True))
     assert rv.status_code == 200
 
 
@@ -76,8 +76,8 @@ def test_query_authentication(api_v1, published, other_test_user, test_user):
     # The published test uploads uploader in calc and upload's user id do not match
     # due to testing the uploader change via publish metadata.
 
-    assert_results(query_archive(authentication=Auth(other_test_user.username, 'password')), total=0)
-    assert_results(query_archive(authentication=Auth(test_user.username, 'password')), total=1)
+    assert_results(query_archive(authentication=Auth(other_test_user.username, 'password', from_api=True)), total=0)
+    assert_results(query_archive(authentication=Auth(test_user.username, 'password', from_api=True)), total=1)
 
 
 @pytest.fixture(scope='function')
-- 
GitLab