Commit e1bcd545 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Sort upload grouped repo results by upload_time. #247

parent 337fb2dc
Pipeline #68306 passed with stages
in 14 minutes and 22 seconds
......@@ -208,9 +208,13 @@ class RepoCalcsResource(Resource):
else:
for group_name, (group_quantity, _) in search.groups.items():
if args.get(group_name, False):
kwargs: Dict[str, Any] = {}
if group_name == 'uploads':
kwargs.update(order_by='upload_time', order='desc')
search_request.quantity(
group_quantity, size=per_page, examples=1,
after=request.args.get('%s_after' % group_name, None))
after=request.args.get('%s_after' % group_name, None),
**kwargs)
results = search_request.execute_paginated(
per_page=per_page, page=page, order=order, order_by=order_by)
......
......@@ -477,7 +477,9 @@ class SearchRequest:
return self
def quantity(self, name, size=100, after=None, examples=0, examples_source=None):
def quantity(
self, name, size=100, after=None, examples=0, examples_source=None,
order_by: str = None, order: str = 'desc'):
"""
Adds a requests for values of the given quantity.
It allows to scroll through all values via elasticsearch's
......@@ -501,6 +503,13 @@ class SearchRequest:
size:
The size gives the ammount of maximum values in the next scroll window.
If the size is None, a maximum of 100 quantity values will be requested.
examples:
Number of results to return that has each value
order_by:
A sortable quantity that should be used to order. The max of each
value bucket is used.
order:
"desc" or "asc"
"""
if size is None:
size = 100
......@@ -511,17 +520,25 @@ class SearchRequest:
# We are using elastic searchs 'composite aggregations' here. We do not really
# compose aggregations, but only those pseudo composites allow us to use the
# 'after' feature that allows to scan through all aggregation values.
composite = dict(sources={name: terms}, size=size)
if order_by is None:
composite = dict(sources={name: terms}, size=size)
else:
sort_terms = A('terms', field=order_by)
composite = dict(sources=[{order_by: sort_terms}, {name: terms}], size=size)
if after is not None:
composite['after'] = {name: after}
if order_by is None:
composite['after'] = {name: after}
else:
composite['after'] = {order_by: after, name: ''}
composite_agg = self._search.aggs.bucket('quantity:%s' % name, 'composite', **composite)
composite = self._search.aggs.bucket('quantity:%s' % name, 'composite', **composite)
if examples > 0:
kwargs = {}
kwargs: Dict[str, Any] = {}
if examples_source is not None:
kwargs.update(_source=dict(includes=examples_source))
composite.metric('examples', A('top_hits', size=examples, **kwargs))
composite_agg.metric('examples', A('top_hits', size=examples, **kwargs))
return self
......@@ -696,7 +713,14 @@ class SearchRequest:
result = dict(values=values)
if 'after_key' in quantity:
result.update(after=quantity['after_key'][quantity_name])
after = quantity['after_key']
if len(after) == 1:
result.update(after=after[quantity_name])
else:
for key in after:
if key != quantity_name:
result.update(after=after[key])
break
return result
......
......@@ -790,7 +790,9 @@ class TestRepo():
uploads = data.get('uploads', None)
assert uploads is not None
values = uploads['values']
assert values['example_upload_id']['total'] == 4
# the 4 uploads have "example upload id", but 3 have older upload time. Therefore,
# only 1 calc will be in the (today, "example upload id") bucket.
assert values['example_upload_id']['total'] == 1
assert values['example_upload_id']['examples'][0]['upload_id'] == 'example_upload_id'
assert 'after' in uploads
assert 'uploads' in data['statistics']['total']['all']
......
......@@ -137,7 +137,11 @@ def test_search_totals(elastic, example_search_data):
assert 'quantities' not in results
def test_search_quantity(elastic, normalized: parsing.LocalBackend, test_user: datamodel.User, other_test_user: datamodel.User):
@pytest.mark.parametrize("order_by", [None, 'upload_id'])
def test_search_quantity(
elastic, normalized: parsing.LocalBackend, test_user: datamodel.User,
other_test_user: datamodel.User, order_by: str):
calc_with_metadata = datamodel.CalcWithMetadata(upload_id='test upload id', calc_id='test id')
calc_with_metadata.apply_domain_metadata(normalized)
calc_with_metadata.uploader = test_user.user_id
......@@ -148,12 +152,17 @@ def test_search_quantity(elastic, normalized: parsing.LocalBackend, test_user: d
create_entry(calc_with_metadata)
refresh_index()
request = SearchRequest().quantity(name='authors', size=1, examples=1)
request = SearchRequest().quantity(
name='authors', size=1, examples=1, order_by=order_by)
results = request.execute()
assert len(results['quantities']['authors']['values'].keys()) == 1
name = list(results['quantities']['authors']['values'].keys())[0]
assert len(results['quantities']['authors']['values'][name]['examples']) == 1
assert results['quantities']['authors']['after'] == name
if order_by is None:
assert results['quantities']['authors']['after'] == name
else:
assert results['quantities']['authors']['after'] == \
results['quantities']['authors']['values'][name]['examples'][0][order_by]
def refresh_index():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment