From d3deb6c9689a5ba2001053508f79e4c1dc83a8d3 Mon Sep 17 00:00:00 2001 From: Markus Scheidgen <markus.scheidgen@gmail.com> Date: Tue, 21 Sep 2021 15:55:46 +0200 Subject: [PATCH] Migrated integrationtests to v1 api. #591 --- nomad/cli/admin/uploads.py | 3 +- nomad/cli/client/__init__.py | 2 + nomad/cli/client/client.py | 1 + nomad/cli/client/integrationtests.py | 205 +++++++++++++++------------ nomad/client/__init__.py | 2 +- nomad/client/{auth.py => api.py} | 60 +++++++- nomad/client/archive.py | 2 +- nomad/config.py | 3 + tests/test_client.py | 6 +- 9 files changed, 179 insertions(+), 105 deletions(-) rename nomad/client/{auth.py => api.py} (63%) diff --git a/nomad/cli/admin/uploads.py b/nomad/cli/admin/uploads.py index a9cc574af2..51ed2686f5 100644 --- a/nomad/cli/admin/uploads.py +++ b/nomad/cli/admin/uploads.py @@ -24,7 +24,6 @@ import json import elasticsearch_dsl as es from nomad import processing as proc, config, infrastructure, utils, files, datamodel, search -from nomad.search.v1 import quantity_values from .admin import admin, __run_processing, __run_parallel @@ -102,6 +101,7 @@ def uploads( query |= mongoengine.Q(process_status__in=proc.ProcessStatus.STATUSES_PROCESSING) if unindexed: + from nomad.search.v1 import quantity_values uploads_in_es = set(quantity_values('upload_id', page_size=1000, owner='all')) uploads_in_mongo = mongo_client[config.mongo.db_name]['calc'].distinct('upload_id') @@ -126,6 +126,7 @@ def query_uploads(ctx, uploads): if ctx.obj.query_mongo: uploads = proc.Calc.objects(**json_query).distinct(field="upload_id") else: + from nomad.search.v1 import quantity_values uploads = list(quantity_values( 'upload_id', query=es.Q(json_query), page_size=1000, owner='all')) except Exception: diff --git a/nomad/cli/client/__init__.py b/nomad/cli/client/__init__.py index aa61faa371..d1aaaf15e8 100644 --- a/nomad/cli/client/__init__.py +++ b/nomad/cli/client/__init__.py @@ -53,6 +53,8 @@ lazy_import.lazy_module('nomad.parsing.parsers') lazy_import.lazy_module('nomad.infrastructure') lazy_import.lazy_module('nomad.doi') lazy_import.lazy_module('nomad.client') +lazy_import.lazy_module('nomad.client.api') +lazy_import.lazy_module('nomad.client.archive') from . import local, upload, integrationtests, statistics, update_database # noqa from .client import create_client # noqa diff --git a/nomad/cli/client/client.py b/nomad/cli/client/client.py index ed31e45776..9dfa27d177 100644 --- a/nomad/cli/client/client.py +++ b/nomad/cli/client/client.py @@ -110,6 +110,7 @@ def client(ctx, url: str, user: str, password: str, no_ssl_verify: bool, no_toke nomad_config.client.url = url ctx.obj.user = user + ctx.obj.auth = nomad_client.Auth(user=user, password=password) global _create_client diff --git a/nomad/cli/client/integrationtests.py b/nomad/cli/client/integrationtests.py index f69ab617fb..d0b2e096de 100644 --- a/nomad/cli/client/integrationtests.py +++ b/nomad/cli/client/integrationtests.py @@ -24,6 +24,7 @@ as a final integration test. import time import os import click +import json from .client import client @@ -42,31 +43,35 @@ simple_example_file = 'tests/data/integration/examples_vasp.zip' @click.option( '--skip-doi', is_flag=True, help='Skip assigning a doi to a dataset.') -@click.option( - '--skip-mirror', is_flag=True, - help='Skip get mirror tests.') @click.pass_context -def integrationtests(ctx, skip_parsers, skip_publish, skip_doi, skip_mirror): - from .client import create_client - client = create_client() +def integrationtests(ctx, skip_parsers, skip_publish, skip_doi): + from nomad.client import api + auth = ctx.obj.auth + has_doi = False published = False print('get the upload command') - command = client.uploads.get_upload_command().response().result.upload_command_with_name + response = api.get('uploads/command-examples', auth=auth) + assert response.status_code == 200, response.text + command = response.json()['upload_command'] def get_upload(upload): - upload = client.uploads.get_upload( - upload_id=upload.upload_id, per_page=100).response().result - - while upload.process_running: + first = True + while first or upload['process_running']: + first = False + response = api.get(f'uploads/{upload["upload_id"]}', auth=auth) + if response.status_code == 404: + return None + assert response.status_code == 200, response.text + upload = response.json()['data'] time.sleep(0.3) - upload = client.uploads.get_upload( - upload_id=upload.upload_id, per_page=100).response().result return upload - uploads = client.uploads.get_uploads(name='integration_test_upload').response().result.results + response = api.get('uploads', params=dict(name='integration_test_upload'), auth=auth) + assert response.status_code == 200, response.text + uploads = response.json()['data'] assert len(uploads) == 0, 'the test upload must not exist before' if not skip_parsers: @@ -76,136 +81,148 @@ def integrationtests(ctx, skip_parsers, skip_publish, skip_doi, skip_mirror): command += ' -k' code = os.system(command) assert code == 0, 'curl command must be successful' - uploads = client.uploads.get_uploads(name='integration_test_upload').response().result.results - assert len(uploads) == 1, 'exactly one test upload must be on the server' - upload = uploads[0] + response = api.get('uploads', params=dict(name='integration_test_upload'), auth=auth) + assert response.status_code == 200, response.text + response_json = response.json() + assert len(response_json['data']) == 1, 'exactly one test upload must be on the server' + upload = response_json['data'][0] print('observe the upload process to be finished') upload = get_upload(upload) - assert upload.process_status == 'SUCCESS' - total = upload.calcs.pagination.total - assert 100 > total > 0 - assert len(upload.calcs.results) == total + assert upload['process_status'] == 'SUCCESS' print('delete the upload again') - client.uploads.delete_upload(upload_id=upload.upload_id).response() - while upload.process_running: - upload = client.uploads.get_upload( - upload_id=upload.upload_id).response().result + upload = api.delete(f'uploads/{upload["upload_id"]}', auth=auth).json()['data'] + upload = get_upload(upload) print('upload simple data with API') with open(simple_example_file, 'rb') as f: - upload = client.uploads.upload( - name='integration test upload', file=f).response().result + response = api.post( + 'uploads', files=dict(file=f), params=dict(name='integration_test_upload'), + auth=auth, headers={'Accept': 'application/json'}) + assert response.status_code == 200, response.text + upload = response.json()['data'] print('observe the upload process to be finished') upload = get_upload(upload) - total = upload.calcs.pagination.total - assert total > 0 - assert len(upload.calcs.results) == total + response = api.get(f'uploads/{upload["upload_id"]}/entries', auth=auth) + assert response.status_code == 200, response.text + entries = response.json()['data'] + assert upload['entries'] == len(entries) try: print('get repo data') - for calc in upload.calcs.results: - repo = client.repo.get_repo_calc( - upload_id=upload.upload_id, calc_id=calc.calc_id).response().result - repo['calc_id'] == calc.calc_id + for entry in entries: + response = api.get(f'entries/{entry["entry_id"]}', auth=auth) + assert response.status_code == 200, response.text + entry_metadata = response.json()['data'] + entry_metadata['entry_id'] == entry['entry_id'] print('get archive data') - for calc in upload.calcs.results: - client.archive.get_archive_calc( - upload_id=upload.upload_id, calc_id=calc.calc_id).response() + for entry in entries: + api.get(f'entries/{entry["entry_id"]}/archive/download', auth=auth) + assert response.status_code == 200, response.text print('get archive logs') - for calc in upload.calcs.results: - client.archive.get_archive_logs( - upload_id=upload.upload_id, calc_id=calc.calc_id).response() - - query = dict(owner='staging', upload_id=[upload.upload_id]) + for entry in entries: + response = api.post( + f'entries/{entry["entry_id"]}/archive/query', + data=json.dumps({ + 'required': { + 'processing_logs': '*' + } + }), auth=auth) + assert response.status_code == 200, response.text + assert list(response.json()['data']['archive'].keys()) == ['processing_logs'] + + query_request_params = dict( + owner='staging', + query={ + 'upload_id': upload['upload_id'] + }) print('perform repo search on data') - search = client.repo.search(per_page=100, **query).response().result - assert search.pagination.total >= total - assert len(search.results) <= search.pagination.total + response = api.post('entries/query', data=json.dumps(query_request_params), auth=auth) + assert response.status_code == 200, response.text + response_json = response.json() + assert response_json['pagination']['total'] == 2 + assert response_json['pagination']['total'] == len(response_json['data']) print('performing archive paginated search') - result = client.archive.post_archive_query(payload={ - 'pagination': { - 'page': 1, - 'per_page': 10 - }, - 'query': query - }).response().result - assert len(result.results) > 0 + response = api.post('entries/archive/query', data=json.dumps(dict( + pagination=dict(page_size=1, page_offset=1), + **query_request_params)), auth=auth) + assert response.status_code == 200, response.text + response_json = response.json() + assert response_json['pagination']['total'] == 2 + assert len(response_json['data']) == 1 print('performing archive scrolled search') - result = client.archive.post_archive_query(payload={ - 'scroll': { - 'scroll': True - }, - 'query': query - }).response().result - assert len(result.results) > 0 + response = api.post('entries/archive/query', data=json.dumps(dict( + pagination=dict(page_size=1), + **query_request_params)), auth=auth) + response_json = response.json() + response = api.post('entries/archive/query', data=json.dumps(dict( + pagination=dict(page_size=1, page_after_value=response_json['pagination']['next_page_after_value']), + **query_request_params)), auth=auth) + assert response.status_code == 200, response.text + response_json = response.json() + assert response_json['pagination']['total'] == 2 + assert len(response_json['data']) == 1 print('performing download') - client.raw.raw_files_from_query(**query) + response = api.get( + 'entries/raw/download', + params=dict(upload_id=upload['upload_id'], owner='visible'), auth=auth) + assert response.status_code == 200, response.text if not skip_publish: print('publish upload') - client.uploads.exec_upload_operation( - upload_id=upload.upload_id, - payload=dict(operation='publish')).response() - - while upload.process_running: - upload = client.uploads.get_upload( - upload_id=upload.upload_id).response().result + api.post(f'uploads/{upload["upload_id"]}/action/publish') - assert upload.process_status == 'SUCCESS', 'publish must be successful' + upload = get_upload(upload) + assert upload['process_status'] == 'SUCCESS', 'publish must be successful' published = True print('editing upload') + response = api.get('users', params=dict(prefix='Markus Scheidgen')) + assert response.status_code == 200, response.text + user = response.json()['data'][0] dataset = 'test_dataset' actions = { 'comment': {'value': 'Test comment'}, 'references': [{'value': 'http;//test_reference.com'}], - 'coauthors': [{'value': 'author1-id'}, {'value': 'author2-id'}], - 'shared_with': [{'value': 'author3-id'}], + 'coauthors': [{'value': user['user_id']}], + 'shared_with': [{'value': user['user_id']}], 'datasets': [{'value': dataset}]} - payload = dict(actions=actions, query=dict(upload_id=[upload.upload_id])) - result = client.repo.edit_repo(payload=payload).response().result - assert result.success - assert client.datasets.get_dataset(name=dataset).response().result['name'] == dataset + response = api.post( + 'entries/edit', + data=json.dumps(dict(actions=actions, **query_request_params)), + auth=auth) + assert response.status_code == 200, response.text print('list datasets') - result = client.datasets.list_datasets(page=1, per_page=10).response().result - results = result.results - assert len(results) > 0 + response = api.get('datasets', auth=auth, params=dict(name=dataset)) + assert response.status_code == 200, response.text + response_json = response.json() + assert len(response_json['data']) == 1, response.text + dataset_id = response_json['data'][0]['dataset_id'] if not skip_doi and published: print('assigning a DOI') - result = client.datasets.assign_doi(name=dataset).response().result - doi = result.doi - assert doi + response = api.post(f'datasets/{dataset_id}/doi', auth=auth) + assert response.status_code == 200, response.text has_doi = True if not has_doi or ctx.obj.user == 'admin': print('deleting dataset') - result = client.datasets.delete_dataset(name=dataset).response().result - - if not skip_mirror and ctx.obj.user == 'admin': - print('getting upload mirror') - # get_upload_mirror gives 404 - payload = dict(query=dict(upload_id=upload.upload_id)) - result = client.mirror.get_uploads_mirror(payload=payload).response().result - assert len(result) == 1 - assert len(client.mirror.get_upload_mirror(upload_id=upload.upload_id).response().result.calcs) > 0 + response = api.delete(f'datasets/{dataset_id}', auth=auth) + assert response.status_code == 200, response.text finally: if not published or ctx.obj.user == 'admin': print('delete the upload again') - client.uploads.delete_upload(upload_id=upload.upload_id).response() - while upload.process_running: - upload = client.uploads.get_upload( - upload_id=upload.upload_id).response().result + upload = api.delete(f'uploads/{upload["upload_id"]}', auth=auth).json()['data'] + assert get_upload(upload) is None diff --git a/nomad/client/__init__.py b/nomad/client/__init__.py index e0207496a0..3d1aefa93f 100644 --- a/nomad/client/__init__.py +++ b/nomad/client/__init__.py @@ -17,4 +17,4 @@ # from .archive import ArchiveQuery, query_archive -from .auth import Auth, KeycloakAuthenticator +from .api import Auth, KeycloakAuthenticator diff --git a/nomad/client/auth.py b/nomad/client/api.py similarity index 63% rename from nomad/client/auth.py rename to nomad/client/api.py index 3ac005c40d..a069848667 100644 --- a/nomad/client/auth.py +++ b/nomad/client/api.py @@ -27,6 +27,27 @@ from nomad import config class APIError(Exception): pass +def _call_requests(method, path: str, ssl: bool = True, *args, **kwargs): + url = f'{config.api_url(ssl=ssl)}/v1/{path}' + return getattr(requests, method)(url, *args, **kwargs) + + +def get(*args, **kwargs): + return _call_requests('get', *args, **kwargs) + + +def post(*args, **kwargs): + return _call_requests('post', *args, **kwargs) + + +def put(*args, **kwargs): + return _call_requests('put', *args, **kwargs) + + +def delete(*args, **kwargs): + return _call_requests('delete', *args, **kwargs) + + def url(path): ''' Returns the full NOMAD API url for the given api path. ''' return f'{config.client.url}/v1/{path}' @@ -34,6 +55,7 @@ def url(path): # This class is somewhat questionable, because there might be very similar functionality # already in requests. But it is somewhere hidden in OAuth flow implementations. +# Maybe there is also a way to use the KeycloakAuthenticator in requests. class Auth(requests.auth.AuthBase): ''' A request Auth class that can be used to authenticate in request callcs like this: @@ -48,14 +70,26 @@ class Auth(requests.auth.AuthBase): Arguments: user: Optional user name or email, default is take from ``config.client.user`` password: Optional password, default is taken from ``config.client.password`` + from_api: If true, the necessary access token is acquired through the NOMAD api via basic auth + and not via keycloak directly. Default is False. Not recommended, but might + be useful, if keycloak can't be configured (e.g. during tests) or reached. ''' - def __init__(self, user: str = config.client.user, password: str = config.client.password): + def __init__( + self, user: str = config.client.user, + password: str = config.client.password, + from_api: bool = False): self.user = user self._password = password + self.from_api = from_api + + self.__oidc = KeycloakOpenID( + server_url=config.keycloak.server_url, + realm_name=config.keycloak.realm_name, + client_id=config.keycloak.client_id) self._token = None - def __call__(self, request): + def get_access_token_from_api(self): if self._token is None: response = requests.get( url('auth/token'), @@ -68,11 +102,27 @@ class Auth(requests.auth.AuthBase): f'{response_json.get("description") or response_json.get("detail") or "unknown reason"} ' f'({response_json.get("code", response.status_code)})') - self._token = response.json()['access_token'] + self._token = response.json() - # TODO check if token is still valid and refresh + def get_access_token_from_keycloak(self): + if self._token is None: + self._token = self.__oidc.token(username=self.user, password=self._password) + self._token['time'] = time.time() + elif self._token['expires_in'] < int(time.time()) - self._token['time'] + 10: + try: + self._token = self.__oidc.refresh_token(self._token['refresh_token']) + self._token['time'] = time.time() + except Exception: + self._token = self.__oidc.token(username=self.user, password=self._password) + self._token['time'] = time.time() + + def __call__(self, request): + if self.from_api: + self.get_access_token_from_api() + else: + self.get_access_token_from_keycloak() - request.headers['Authorization'] = f'Bearer {self._token}' + request.headers['Authorization'] = f'Bearer {self._token["access_token"]}' return request diff --git a/nomad/client/archive.py b/nomad/client/archive.py index b8cc18f794..4655a05469 100644 --- a/nomad/client/archive.py +++ b/nomad/client/archive.py @@ -144,7 +144,7 @@ from nomad.datamodel import EntryArchive # TODO this import is necessary to load all metainfo defintions that the parsers are using from nomad import parsing # pylint: disable=unused-import -from .auth import Auth +from .api import Auth class QueryError(Exception): diff --git a/nomad/config.py b/nomad/config.py index cdbc00f6d4..3521efce6e 100644 --- a/nomad/config.py +++ b/nomad/config.py @@ -474,6 +474,9 @@ def _apply_nomad_yaml(): logger.error(f'cannot read nomad config: {e}') return + if not config_data: + return + for key, value in config_data.items(): if isinstance(value, dict): group_key = key diff --git a/tests/test_client.py b/tests/test_client.py index 72428265c9..8e6ccbc35a 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -29,7 +29,7 @@ from tests.processing import test_data as test_processing def test_requests_auth(api_v1): - rv = api_v1.get('users/me', auth=Auth()) + rv = api_v1.get('users/me', auth=Auth(from_api=True)) assert rv.status_code == 200 @@ -76,8 +76,8 @@ def test_query_authentication(api_v1, published, other_test_user, test_user): # The published test uploads uploader in calc and upload's user id do not match # due to testing the uploader change via publish metadata. - assert_results(query_archive(authentication=Auth(other_test_user.username, 'password')), total=0) - assert_results(query_archive(authentication=Auth(test_user.username, 'password')), total=1) + assert_results(query_archive(authentication=Auth(other_test_user.username, 'password', from_api=True)), total=0) + assert_results(query_archive(authentication=Auth(test_user.username, 'password', from_api=True)), total=1) @pytest.fixture(scope='function') -- GitLab