Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
4861d6a4
Commit
4861d6a4
authored
Aug 14, 2019
by
Markus Scheidgen
Browse files
Added users metrics and date histogram aggregation to search.
parent
b42ef722
Pipeline
#53810
passed with stages
in 17 minutes and 4 seconds
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
nomad/api/repo.py
View file @
4861d6a4
...
...
@@ -110,6 +110,8 @@ repo_request_parser.add_argument(
'scroll'
,
type
=
bool
,
help
=
'Enable scrolling'
)
repo_request_parser
.
add_argument
(
'scroll_id'
,
type
=
str
,
help
=
'The id of the current scrolling window to use.'
)
repo_request_parser
.
add_argument
(
'date_histogram'
,
type
=
bool
,
help
=
'Add an additional aggregation over the upload time'
)
repo_request_parser
.
add_argument
(
'metrics'
,
type
=
str
,
action
=
'append'
,
help
=
(
'Metrics to aggregate over all quantities and their values as comma separated list. '
...
...
@@ -199,6 +201,7 @@ class RepoCalcsResource(Resource):
try
:
scroll
=
bool
(
request
.
args
.
get
(
'scroll'
,
False
))
date_histogram
=
bool
(
request
.
args
.
get
(
'date_histogram'
,
False
))
scroll_id
=
request
.
args
.
get
(
'scroll_id'
,
None
)
page
=
int
(
request
.
args
.
get
(
'page'
,
1
))
per_page
=
int
(
request
.
args
.
get
(
'per_page'
,
10
if
not
scroll
else
1000
))
...
...
@@ -250,7 +253,8 @@ class RepoCalcsResource(Resource):
else
:
results
=
search
.
metrics_search
(
q
=
q
,
per_page
=
per_page
,
page
=
page
,
order
=
order
,
order_by
=
order_by
,
time_range
=
time_range
,
metrics_to_use
=
metrics
,
search_parameters
=
search_parameters
)
time_range
=
time_range
,
metrics_to_use
=
metrics
,
search_parameters
=
search_parameters
,
with_date_histogram
=
date_histogram
)
# TODO just a work around to make things prettier
quantities
=
results
[
'quantities'
]
...
...
nomad/search.py
View file @
4861d6a4
...
...
@@ -219,7 +219,8 @@ search_quantities = datamodel.Domain.instance.search_quantities
metrics
=
{
'datasets'
:
(
'cardinality'
,
'datasets.id'
),
'unique_code_runs'
:
(
'cardinality'
,
'calc_hash'
)
'unique_code_runs'
:
(
'cardinality'
,
'calc_hash'
),
'users'
:
(
'cardinality'
,
'uploader.name.keyword'
)
}
"""
The available search metrics. Metrics are integer values given for each entry that can
...
...
@@ -476,7 +477,7 @@ def quantity_search(
def
metrics_search
(
quantities
:
Dict
[
str
,
int
]
=
aggregations
,
metrics_to_use
:
List
[
str
]
=
[],
with_entries
:
bool
=
True
,
**
kwargs
)
->
Dict
[
str
,
Any
]:
with_entries
:
bool
=
True
,
with_date_histogram
:
bool
=
False
,
**
kwargs
)
->
Dict
[
str
,
Any
]:
"""
Performs a search like :func:`entry_search`, but instead of entries, returns the given
metrics aggregated for (a limited set of values) of the given quantities calculated
...
...
@@ -524,7 +525,6 @@ def metrics_search(
# We are using elastic searchs 'composite aggregations' here. We do not really
# compose aggregations, but only those pseudo composites allow us to use the
# 'after' feature that allows to scan through all aggregation values.
terms
:
Dict
[
str
,
Any
]
=
None
quantity
=
search_quantities
[
quantity_name
]
min_doc_count
=
0
if
quantity
.
zero_aggs
else
1
terms
=
A
(
...
...
@@ -535,6 +535,10 @@ def metrics_search(
if
quantity_name
not
in
[
'authors'
]:
add_metrics
(
buckets
)
if
with_date_histogram
:
histogram
=
A
(
'date_histogram'
,
field
=
'upload_time'
,
interval
=
'1M'
,
format
=
'yyyy-MM-dd'
)
add_metrics
(
search
.
aggs
.
bucket
(
'date_histogram'
,
histogram
))
add_metrics
(
search
.
aggs
)
response
,
entry_results
=
_execute_paginated_search
(
search
,
**
kwargs
)
...
...
@@ -557,6 +561,12 @@ def metrics_search(
if
quantity_name
not
in
metrics_names
# ES aggs for total metrics, and aggs for quantities stand side by side
}
if
with_date_histogram
:
metrics_results
[
'date_histogram'
]
=
{
bucket
.
key_as_string
:
get_metrics
(
bucket
,
bucket
.
doc_count
)
for
bucket
in
response
.
aggregations
.
date_histogram
.
buckets
}
total_metrics_result
=
get_metrics
(
response
.
aggregations
,
entry_results
[
'pagination'
][
'total'
])
metrics_results
[
'total'
]
=
dict
(
all
=
total_metrics_result
)
...
...
tests/test_api.py
View file @
4861d6a4
...
...
@@ -826,6 +826,14 @@ class TestRepo():
else
:
assert
len
(
metrics_result
)
==
1
# code_runs is the only metric for authors
def
test_search_date_histogram
(
self
,
client
,
example_elastic_calcs
,
no_warn
):
rv
=
client
.
get
(
'/repo/?date_histogram=true&metrics=total_energies'
)
assert
rv
.
status_code
==
200
data
=
json
.
loads
(
rv
.
data
)
histogram
=
data
.
get
(
'quantities'
).
get
(
'date_histogram'
)
print
(
histogram
)
assert
len
(
histogram
)
>
0
@
pytest
.
mark
.
parametrize
(
'n_results, page, per_page'
,
[(
2
,
1
,
5
),
(
1
,
1
,
1
),
(
0
,
2
,
3
)])
def
test_search_pagination
(
self
,
client
,
example_elastic_calcs
,
no_warn
,
n_results
,
page
,
per_page
):
rv
=
client
.
get
(
'/repo/?page=%d&per_page=%d'
%
(
page
,
per_page
))
...
...
tests/test_search.py
View file @
4861d6a4
...
...
@@ -66,9 +66,13 @@ def test_metrics_search(elastic, normalized: parsing.LocalBackend):
create_entry
(
calc_with_metadata
)
refresh_index
()
assert
'users'
in
search
.
metrics_names
assert
'datasets'
in
search
.
metrics_names
assert
'unique_code_runs'
in
search
.
metrics_names
use_metrics
=
search
.
metrics_names
results
=
metrics_search
(
metrics_to_use
=
use_metrics
,
with_entries
=
True
)
results
=
metrics_search
(
metrics_to_use
=
use_metrics
,
with_entries
=
True
,
with_date_histogram
=
True
)
quantities
=
results
[
'quantities'
]
hits
=
results
[
'results'
]
assert
results
[
'pagination'
][
'total'
]
==
1
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment