Commit 5d54a7a9 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Fixed retrival of all uploads/calcs in ArchiveQuery with max.

parent 3cea3dda
Pipeline #82799 passed with stages
in 22 minutes and 43 seconds
......@@ -316,7 +316,8 @@ class ArchiveQuery(collections.abc.Sequence):
per_page: Determine how many results are downloaded per page (or scroll window).
Default is 10.
max: Optionally determine the maximum amount of downloaded archives. The iteration
will stop if max is surpassed even if more results are available. Default is unlimited.
will stop if max is surpassed even if more results are available. Default is 10.000.
None value will set it to unlimited.
raise_errors: There situations where archives for certain entries are unavailable.
If set to True, this cases will raise an Exception. Otherwise, the entries
with missing archives are simply skipped (default).
......@@ -331,7 +332,7 @@ class ArchiveQuery(collections.abc.Sequence):
self,
query: dict = None, required: dict = None,
url: str = None, username: str = None, password: str = None,
parallel: int = 1, per_page: int = 10, max: int = None,
parallel: int = 1, per_page: int = 10, max: int = 10000,
raise_errors: bool = False,
authentication: Union[Dict[str, str], KeycloakAuthenticator] = None):
......@@ -410,8 +411,7 @@ class ArchiveQuery(collections.abc.Sequence):
while True:
response = requests.get(
url if after is None else '%s&after=%s' % (url, after),
# TODO size=1000,
url if after is None else '%s&after=%s&size=1000' % (url, after),
headers=self.authentication)
if response.status_code != 200:
......@@ -432,6 +432,9 @@ class ArchiveQuery(collections.abc.Sequence):
for upload in values.values():
nentries += upload['total']
if self.max is not None and nentries >= self.max:
break
# distribute uploads to processes
if self.parallel is None:
self.parallel = 1
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment