Commit 6dee8979 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Minor client lib improvements.

parent da65222c
Pipeline #72457 passed with stages
in 17 minutes and 32 seconds
.DS_Store
.env/
.pyenv/
.*env/
.pytest/
.python-version
.ipynb_checkpoints/
......
This diff is collapsed.
This diff is collapsed.
......@@ -288,18 +288,14 @@ class ArchiveQueryResource(Resource):
calcs = results['results']
upload_files = None
current_upload_id = None
check_restricted = g.user is not None
for entry in calcs:
with_embargo = entry['with_embargo']
if (not entry['published'] or with_embargo) and not check_restricted:
continue
upload_id = entry['upload_id']
calc_id = entry['calc_id']
if upload_files is None or current_upload_id != upload_id:
if upload_files is not None:
check_restricted = g.user is not None # maybe the user has restricted access for the next upload
upload_files.close()
upload_files = UploadFiles.get(upload_id, create_authorization_predicate(upload_id))
......@@ -326,7 +322,8 @@ class ArchiveQueryResource(Resource):
abort(400, str(e))
except Restricted:
check_restricted = False
# TODO in reality this should not happen
pass
if upload_files is not None:
upload_files.close()
......
......@@ -320,11 +320,15 @@ def create_authorization_predicate(upload_id, calc_id=None):
if g.user.user_id == upload.user_id:
return True
try:
calc = processing.Calc.get(calc_id)
except KeyError:
return False
return g.user.user_id in calc.metadata.get('shared_with', [])
# TODO I doubt if shared_with is actually working
if calc_id is not None:
try:
calc = processing.Calc.get(calc_id)
except KeyError:
return False
return g.user.user_id in calc.metadata.get('shared_with', [])
return False
except KeyError as e:
logger = utils.get_logger(__name__, upload_id=upload_id, calc_id=calc_id)
......
......@@ -36,7 +36,7 @@ and a query schema similar to the archive json format can be provided to filter
'''
from typing import Dict, Union, Any, List
from collections import Sequence
import collections.abc
import requests
from urllib.parse import urlparse
from bravado import requests_client as bravado_requests_client
......@@ -89,14 +89,22 @@ class ApiStatistics(mi.MSection):
type=int,
description='Number of entries loaded in the last api call')
last_response_size = mi.Quantity(
last_response_data_size = mi.Quantity(
type=int, unit=mi.units.bytes,
description='Bytes loaded in the last api call')
loaded_data_size = mi.Quantity(
type=int, unit=mi.units.bytes,
description='Bytes loaded from this query')
loaded_nentries = mi.Quantity(
type=int,
description='Number of downloaded entries')
napi_calls = mi.Quantity(
type=int,
description='Number of made api calls')
def __repr__(self):
out = StringIO()
for quantity in self.m_def.all_quantities.values():
......@@ -105,27 +113,27 @@ class ApiStatistics(mi.MSection):
return out.getvalue()
class ArchiveQuery(Sequence):
class ArchiveQuery(collections.abc.Sequence):
def __init__(
self,
query: dict = None, query_schema: dict = None,
query: dict = None, required: dict = None,
url: str = None, username: str = None, password: str = None,
scroll: bool = False, per_page: int = 10,
authentication: Union[Dict[str, str], KeycloakAuthenticator] = None, **kwargs):
scroll: bool = False, per_page: int = 10, max: int = None,
authentication: Union[Dict[str, str], KeycloakAuthenticator] = None):
self.scroll = scroll
self._scroll_id = None
self._page = 1
self._per_page = per_page
self.page = 1
self.per_page = per_page
self.max = max
self.query: Dict[str, Any] = {
'query': {}
}
if query is not None:
self.query['query'].update(query)
if query_schema is not None:
self.query['query_schema'] = query_schema
self.query['query'].update(kwargs)
if required is not None:
self.query['query_schema'] = required
self.password = password
self.username = username
......@@ -133,8 +141,9 @@ class ArchiveQuery(Sequence):
self._authentication = authentication
self._total = -1
self._capped_total = -1
self._results: List[dict] = []
self._api_statistics = ApiStatistics()
self._statistics = ApiStatistics()
@property
def authentication(self):
......@@ -164,7 +173,7 @@ class ArchiveQuery(Sequence):
else:
self.query.setdefault('pagination', {}).update(
page=self._page, per_page=self._per_page)
page=self.page, per_page=self.per_page)
response = requests.post(url, headers=self.authentication, json=self.query)
if response.status_code != 200:
......@@ -182,7 +191,12 @@ class ArchiveQuery(Sequence):
else:
pagination = data['pagination']
self._total = pagination['total']
self._page = pagination['page'] + 1
self.page = pagination['page'] + 1
if self.max is not None:
self._capped_total = min(self.max, self._total)
else:
self._capped_total = self._total
results = data.get('results', [])
......@@ -192,10 +206,13 @@ class ArchiveQuery(Sequence):
self._results.append(archive)
try:
self._api_statistics.last_response_size = len(response.content)
self._api_statistics.nentries = self._total
self._api_statistics.last_response_nentries = len(results)
self._api_statistics.loaded_nentries = len(self._results)
data_size = len(response.content)
self._statistics.last_response_data_size = data_size
self._statistics.loaded_data_size += data_size
self._statistics.nentries = self._total
self._statistics.last_response_nentries = len(results)
self._statistics.loaded_nentries = len(self._results)
self._statistics.napi_calls += 1
except Exception:
# fails in test due to mocked requests library
pass
......@@ -204,23 +221,40 @@ class ArchiveQuery(Sequence):
if self._total == -1:
self.call_api()
return str(self._api_statistics)
return str(self._statistics)
def __getitem__(self, key):
if isinstance(key, slice):
return [self[i] for i in range(*key.indices(len(self)))]
if key >= self.__len__():
raise IndexError()
while len(self._results) < key:
while len(self._results) < key + 1:
self.call_api()
return self._results[key]
def __len__(self): # pylint: disable=invalid-length-returned
if self._capped_total == -1:
self.call_api()
return self._capped_total
@property
def total(self):
if self._total == -1:
self.call_api()
return self._total
@property
def statistics(self):
if self._total == -1:
self.call_api()
return self._statistics
def query_archive(*args, **kwargs):
return ArchiveQuery(*args, **kwargs)
......
......@@ -53,13 +53,20 @@ def test_query_query(api, published_wo_user_metadata):
({'section_run[0]': {'section_system': '*'}}, [EntryArchive.section_run, section_run.section_system])
])
def test_query_schema(api, published_wo_user_metadata, q_schema, sub_sections):
assert_results(query_archive(query_schema=q_schema), sub_section_defs=sub_sections)
assert_results(query_archive(required=q_schema), sub_section_defs=sub_sections)
def test_query_scroll(api, published_wo_user_metadata):
assert_results(query_archive(scroll=True))
def test_query_authentication(api, published, other_test_user_auth, test_user_auth):
assert_results(query_archive(authentication=other_test_user_auth))
def test_query_authentication(api, published, other_test_user_auth, test_user_auth, other_test_user):
# The published test uploads uploader in calc and upload's user id do not match
# due to testing the uploader change via publish metadata.
# TODO this is a workarround and should be changed in conftest, especially since we do not need this
# feature anymore.
published.user_id = other_test_user.user_id
published.save()
assert_results(query_archive(authentication=other_test_user_auth), total=1)
assert_results(query_archive(authentication=test_user_auth), total=0)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment