Commit 4861d6a4 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added users metrics and date histogram aggregation to search.

parent b42ef722
Pipeline #53810 passed with stages
in 17 minutes and 4 seconds
......@@ -110,6 +110,8 @@ repo_request_parser.add_argument(
'scroll', type=bool, help='Enable scrolling')
repo_request_parser.add_argument(
'scroll_id', type=str, help='The id of the current scrolling window to use.')
repo_request_parser.add_argument(
'date_histogram', type=bool, help='Add an additional aggregation over the upload time')
repo_request_parser.add_argument(
'metrics', type=str, action='append', help=(
'Metrics to aggregate over all quantities and their values as comma separated list. '
......@@ -199,6 +201,7 @@ class RepoCalcsResource(Resource):
try:
scroll = bool(request.args.get('scroll', False))
date_histogram = bool(request.args.get('date_histogram', False))
scroll_id = request.args.get('scroll_id', None)
page = int(request.args.get('page', 1))
per_page = int(request.args.get('per_page', 10 if not scroll else 1000))
......@@ -250,7 +253,8 @@ class RepoCalcsResource(Resource):
else:
results = search.metrics_search(
q=q, per_page=per_page, page=page, order=order, order_by=order_by,
time_range=time_range, metrics_to_use=metrics, search_parameters=search_parameters)
time_range=time_range, metrics_to_use=metrics, search_parameters=search_parameters,
with_date_histogram=date_histogram)
# TODO just a work around to make things prettier
quantities = results['quantities']
......
......@@ -219,7 +219,8 @@ search_quantities = datamodel.Domain.instance.search_quantities
metrics = {
'datasets': ('cardinality', 'datasets.id'),
'unique_code_runs': ('cardinality', 'calc_hash')
'unique_code_runs': ('cardinality', 'calc_hash'),
'users': ('cardinality', 'uploader.name.keyword')
}
"""
The available search metrics. Metrics are integer values given for each entry that can
......@@ -476,7 +477,7 @@ def quantity_search(
def metrics_search(
quantities: Dict[str, int] = aggregations, metrics_to_use: List[str] = [],
with_entries: bool = True, **kwargs) -> Dict[str, Any]:
with_entries: bool = True, with_date_histogram: bool = False, **kwargs) -> Dict[str, Any]:
"""
Performs a search like :func:`entry_search`, but instead of entries, returns the given
metrics aggregated for (a limited set of values) of the given quantities calculated
......@@ -524,7 +525,6 @@ def metrics_search(
# We are using elastic searchs 'composite aggregations' here. We do not really
# compose aggregations, but only those pseudo composites allow us to use the
# 'after' feature that allows to scan through all aggregation values.
terms: Dict[str, Any] = None
quantity = search_quantities[quantity_name]
min_doc_count = 0 if quantity.zero_aggs else 1
terms = A(
......@@ -535,6 +535,10 @@ def metrics_search(
if quantity_name not in ['authors']:
add_metrics(buckets)
if with_date_histogram:
histogram = A('date_histogram', field='upload_time', interval='1M', format='yyyy-MM-dd')
add_metrics(search.aggs.bucket('date_histogram', histogram))
add_metrics(search.aggs)
response, entry_results = _execute_paginated_search(search, **kwargs)
......@@ -557,6 +561,12 @@ def metrics_search(
if quantity_name not in metrics_names # ES aggs for total metrics, and aggs for quantities stand side by side
}
if with_date_histogram:
metrics_results['date_histogram'] = {
bucket.key_as_string: get_metrics(bucket, bucket.doc_count)
for bucket in response.aggregations.date_histogram.buckets
}
total_metrics_result = get_metrics(response.aggregations, entry_results['pagination']['total'])
metrics_results['total'] = dict(all=total_metrics_result)
......
......@@ -826,6 +826,14 @@ class TestRepo():
else:
assert len(metrics_result) == 1 # code_runs is the only metric for authors
def test_search_date_histogram(self, client, example_elastic_calcs, no_warn):
rv = client.get('/repo/?date_histogram=true&metrics=total_energies')
assert rv.status_code == 200
data = json.loads(rv.data)
histogram = data.get('quantities').get('date_histogram')
print(histogram)
assert len(histogram) > 0
@pytest.mark.parametrize('n_results, page, per_page', [(2, 1, 5), (1, 1, 1), (0, 2, 3)])
def test_search_pagination(self, client, example_elastic_calcs, no_warn, n_results, page, per_page):
rv = client.get('/repo/?page=%d&per_page=%d' % (page, per_page))
......
......@@ -66,9 +66,13 @@ def test_metrics_search(elastic, normalized: parsing.LocalBackend):
create_entry(calc_with_metadata)
refresh_index()
assert 'users' in search.metrics_names
assert 'datasets' in search.metrics_names
assert 'unique_code_runs' in search.metrics_names
use_metrics = search.metrics_names
results = metrics_search(metrics_to_use=use_metrics, with_entries=True)
results = metrics_search(metrics_to_use=use_metrics, with_entries=True, with_date_histogram=True)
quantities = results['quantities']
hits = results['results']
assert results['pagination']['total'] == 1
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment