Commit c8d797eb authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'bugfixes' into 'v0.9.0'

Bugfixes

See merge request !182
parents 78e43ee2 5d54a7a9
Pipeline #82806 passed with stages
in 30 minutes and 57 seconds
......@@ -316,7 +316,8 @@ class ArchiveQuery(collections.abc.Sequence):
per_page: Determine how many results are downloaded per page (or scroll window).
Default is 10.
max: Optionally determine the maximum amount of downloaded archives. The iteration
will stop if max is surpassed even if more results are available. Default is unlimited.
will stop if max is surpassed even if more results are available. Default is 10.000.
None value will set it to unlimited.
raise_errors: There situations where archives for certain entries are unavailable.
If set to True, this cases will raise an Exception. Otherwise, the entries
with missing archives are simply skipped (default).
......@@ -331,7 +332,7 @@ class ArchiveQuery(collections.abc.Sequence):
self,
query: dict = None, required: dict = None,
url: str = None, username: str = None, password: str = None,
parallel: int = 1, per_page: int = 10, max: int = None,
parallel: int = 1, per_page: int = 10, max: int = 10000,
raise_errors: bool = False,
authentication: Union[Dict[str, str], KeycloakAuthenticator] = None):
......@@ -410,8 +411,7 @@ class ArchiveQuery(collections.abc.Sequence):
while True:
response = requests.get(
url if after is None else '%s&after=%s' % (url, after),
# TODO size=1000,
url if after is None else '%s&after=%s&size=1000' % (url, after),
headers=self.authentication)
if response.status_code != 200:
......@@ -432,6 +432,9 @@ class ArchiveQuery(collections.abc.Sequence):
for upload in values.values():
nentries += upload['total']
if self.max is not None and nentries >= self.max:
break
# distribute uploads to processes
if self.parallel is None:
self.parallel = 1
......
......@@ -619,7 +619,7 @@ class SearchRequest:
es.clear_scroll(body={'scroll_id': [scroll_id]}, ignore=(404, )) # pylint: disable=E1123
scroll_id = None
scroll_info = dict(total=total, size=size)
scroll_info = dict(total=total, size=size, scroll=True)
if scroll_id is not None:
scroll_info.update(scroll_id=scroll_id)
......
......@@ -1202,7 +1202,9 @@ class TestRepo():
while scroll_id is not None:
rv = api.get('/repo/?scroll=1m&scroll_id=%s' % scroll_id)
data = json.loads(rv.data)
scroll_id = data.get('scroll', {}).get('scroll_id', None)
scroll_info = data.get('scroll', {})
assert scroll_info['scroll']
scroll_id = scroll_info.get('scroll_id', None)
has_another_page |= len(data.get('results')) > 0
if n_results < 2:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment