Commit d22785ab authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Configurable metrics.

parent ac1f85da
Pipeline #45422 passed with stages
in 27 minutes and 33 seconds
......@@ -8,7 +8,7 @@ import TablePagination from '@material-ui/core/TablePagination'
import TableRow from '@material-ui/core/TableRow'
import Paper from '@material-ui/core/Paper'
import { TableHead, LinearProgress, FormControl, FormControlLabel, Checkbox, FormGroup,
FormLabel, IconButton, MuiThemeProvider, Typography, Tooltip, TableSortLabel, ExpansionPanelDetails, ExpansionPanelSummary, ExpansionPanel, Grid } from '@material-ui/core'
FormLabel, IconButton, MuiThemeProvider, Typography, Tooltip, TableSortLabel, ExpansionPanelDetails, ExpansionPanelSummary, ExpansionPanel, Grid, CircularProgress } from '@material-ui/core'
import { compose } from 'recompose'
import { withErrors } from './errors'
import AnalyticsIcon from '@material-ui/icons/Settings'
......@@ -160,15 +160,21 @@ class Repo extends React.Component {
update(changes) {
changes = changes || {}
const { page, rowsPerPage, owner, sortedBy, sortOrder, searchValues } = {...this.state, ...changes}
const { page, rowsPerPage, owner, sortedBy, sortOrder, searchValues, metric } = {...this.state, ...changes}
delete changes.metric
this.setState({loading: true, ...changes})
// code_runs is returned anyways
const metrics_to_retrieve = metric === 'code_runs' ? [] : [metric]
this.props.api.search({
page: page,
per_page: rowsPerPage,
owner: owner || 'all',
order_by: sortedBy,
order: (sortOrder === 'asc') ? 1 : -1,
total_metrics: metrics_to_retrieve,
aggregation_metrics: metrics_to_retrieve,
...searchValues
}).then(data => {
const { pagination: { total, page, per_page }, results, aggregations, metrics } = data
......@@ -181,7 +187,8 @@ class Repo extends React.Component {
per_page,
total: total,
loading: false,
owner: owner
owner: owner,
metric: metric
})
}).catch(errors => {
this.setState({data: [], total: 0, loading: false, owner: owner})
......@@ -207,7 +214,7 @@ class Repo extends React.Component {
}
handleMetricChange(metric) {
this.setState({metric: metric})
this.update({metric: metric})
}
handleSort(columnKey) {
......@@ -282,7 +289,8 @@ class Repo extends React.Component {
const metricsLabel = {
code_runs: 'Code runs',
total_energies: 'Total energy calculations',
geometries: 'Unique geometries'
geometries: 'Unique geometries',
datasets: 'Datasets'
}
return (
<div className={classes.root}>
......@@ -315,7 +323,10 @@ class Repo extends React.Component {
<ExpansionPanel>
<ExpansionPanelSummary expandIcon={<ExpandMoreIcon/>} className={classes.searchSummary}>
<Typography variant="h6" style={{textAlign: 'center', width: '100%', fontWeight: 'normal'}}>
Found <b>{metrics.total_energies}</b> total energy calculations in <b>{metrics.code_runs}</b> code runs that simulate <b>{metrics.geometries}</b> unique geometries; data curated in <b>{metrics.datasets}</b> datasets.
Found <b>{metrics.code_runs}</b> code runs
{metric === 'geometries' ? (<span> that simulate <b>{metrics.geometries}</b> unique geometries</span>) : ''}
{metric === 'total_energies' ? (<span> with <b>{metrics.total_energies}</b> total energy calculations</span>) : ''}
{metric === 'datasets' ? (<span> curated in <b>{metrics.datasets}</b> datasets</span>) : ''}.
</Typography>
</ExpansionPanelSummary>
<ExpansionPanelDetails className={classes.searchDetails}>
......@@ -323,7 +334,7 @@ class Repo extends React.Component {
<FormControl>
<FormLabel>Metric used in statistics: </FormLabel>
<FormGroup row>
{['code_runs', 'total_energies', 'geometries'].map(metric => (
{['code_runs', 'total_energies', 'geometries', 'datasets'].map(metric => (
<FormControlLabel key={metric}
control={
<Checkbox checked={this.state.metric === metric} onChange={() => this.handleMetricChange(metric)} value={metric} />
......
......@@ -82,6 +82,14 @@ repo_request_parser.add_argument(
'scroll', type=bool, help='Enable scrolling')
repo_request_parser.add_argument(
'scroll_id', type=str, help='The id of the current scrolling window to use.')
repo_request_parser.add_argument(
'total_metrics', type=str, help=(
'Metrics to aggregate all search results over.'
'Possible values are total_energies, geometries, and datasets.'))
repo_request_parser.add_argument(
'aggregation_metrics', type=str, help=(
'Metrics to aggregate all aggregation buckets over as comma separated list. '
'Possible values are total_energies, geometries, and datasets.'))
for search_quantity in search.search_quantities.keys():
_, _, description = search.search_quantities[search_quantity]
......@@ -127,6 +135,15 @@ class RepoCalcsResource(Resource):
page = int(request.args.get('page', 1))
per_page = int(request.args.get('per_page', 10 if not scroll else 1000))
order = int(request.args.get('order', -1))
total_metrics_str = request.args.get('total_metrics', '')
aggregation_metrics_str = request.args.get('aggregation_metrics', '')
total_metrics = [
metric for metric in total_metrics_str.split(',')
if metric in ['total_energies', 'geometries', 'datasets']]
aggregation_metrics = [
metric for metric in aggregation_metrics_str.split(',')
if metric in ['total_energies', 'geometries', 'datasets']]
except Exception:
abort(400, message='bad parameter types')
......@@ -166,11 +183,15 @@ class RepoCalcsResource(Resource):
data.pop('page', None)
data.pop('order', None)
data.pop('order_by', None)
data.pop('total_metrics', None)
data.pop('aggregation_metrics', None)
if scroll:
data.update(scroll_id=scroll_id, size=per_page)
else:
data.update(per_page=per_page, page=page, order=order, order_by=order_by)
data.update(
per_page=per_page, page=page, order=order, order_by=order_by,
total_metrics=total_metrics, aggregation_metrics=aggregation_metrics)
try:
if scroll:
......
......@@ -260,6 +260,12 @@ The available search quantities in :func:`aggregate_search` as tuples with *sear
elastic field and description.
"""
metrics = {
'total_energies': ('sum', 'n_total_energies'),
'geometries': ('cardinality', 'n_geometries'),
'datasets': ('cardinality', 'datasets.id'),
}
def _construct_search(q: Q = None, **kwargs) -> Search:
search = Search(index=config.elastic.index_name)
......@@ -342,6 +348,8 @@ def scroll_search(
def aggregate_search(
page: int = 1, per_page: int = 10, order_by: str = 'formula', order: int = -1,
q: Q = None, aggregations: Dict[str, int] = aggregations,
aggregation_metrics: List[str] = ['total_energies', 'geometries', 'datasets'],
total_metrics: List[str] = ['total_energies', 'geometries', 'datasets'],
**kwargs) -> Tuple[int, List[dict], Dict[str, Dict[str, Dict[str, int]]], Dict[str, int]]:
"""
Performs a search and returns paginated search results and aggregations. The aggregations
......@@ -354,6 +362,9 @@ def aggregate_search(
q: An *elasticsearch_dsl* query used to further filter the results (via `and`)
aggregations: A customized list of aggregations to perform. Keys are index fields,
and values the amount of buckets to return. Only works on *keyword* field.
aggregation_metrics: The metrics used to aggregate over. Can be `total_energies``,
``geometries``, or ``datasets``. ``code_runs`` is always given.
total_metrics: The metrics used to for total numbers.
**kwargs: Quantity, value pairs to search for.
Returns: A tuple with the total hits, an array with the results, an dictionary with
......@@ -362,10 +373,10 @@ def aggregate_search(
search = _construct_search(q, **kwargs)
def add_metrics(parent):
parent.metric('total_energies', A('sum', field='n_total_energies'))
parent.metric('geometries', A('cardinality', field='geometries'))
parent.metric('datasets', A('cardinality', field='datasets.id'))
def add_metrics(parent, metrics_to_add):
for metric in metrics_to_add:
metric_kind, field = metrics[metric]
parent.metric(metric, A(metric_kind, field=field))
for aggregation, size in aggregations.items():
......@@ -375,9 +386,9 @@ def aggregate_search(
a = A('terms', field=aggregation, size=size, min_doc_count=0, order=dict(_key='asc'))
buckets = search.aggs.bucket(aggregation, a)
add_metrics(buckets)
add_metrics(buckets, aggregation_metrics)
add_metrics(search.aggs)
add_metrics(search.aggs, total_metrics)
if order_by not in search_quantities:
raise KeyError('Unknown order quantity %s' % order_by)
......@@ -388,28 +399,26 @@ def aggregate_search(
total_results = response.hits.total
search_results = [hit.to_dict() for hit in response.hits]
def get_metrics(bucket, metrics_to_get, code_runs):
result = {
metric: bucket[metric]['value']
for metric in metrics_to_get
}
result.update(code_runs=code_runs)
return result
aggregation_results = {
aggregation: {
bucket.key: {
'code_runs': bucket.doc_count,
'total_energies': bucket.total_energies.value,
'geometries': bucket.geometries.value,
'datasets': bucket.datasets.value
}
bucket.key: get_metrics(bucket, aggregation_metrics, bucket.doc_count)
for bucket in getattr(response.aggregations, aggregation).buckets
}
for aggregation in aggregations.keys()
if aggregation not in ['total_energies', 'geometries', 'datasets']
}
metrics = {
'code_runs': total_results,
'total_energies': response.aggregations.total_energies.value,
'geometries': response.aggregations.geometries.value,
'datasets': response.aggregations.datasets.value
}
total_metrics_result = get_metrics(response.aggregations, total_metrics, total_results)
return total_results, search_results, aggregation_results, metrics
return total_results, search_results, aggregation_results, total_metrics_result
def authors(per_page: int = 10, after: str = None, prefix: str = None) -> Tuple[Dict[str, int], str]:
......
......@@ -621,6 +621,27 @@ class TestRepo(UploadFilesBasedTests):
assert len(aggregations['system']) == 1
assert value in aggregations['system']
@pytest.mark.parametrize('metrics', [[], ['total_energies'], ['geometries'], ['datasets'], ['total_energies', 'geometries', 'datasets']])
def test_search_total_metrics(self, client, example_elastic_calcs, no_warn, metrics):
rv = client.get('/repo/?total_metrics=%s' % ','.join(metrics))
assert rv.status_code == 200
data = json.loads(rv.data)
metrics_result = data.get('metrics', None)
assert 'code_runs' in metrics_result
for metric in metrics:
assert metric in metrics_result
@pytest.mark.parametrize('metrics', [[], ['total_energies'], ['geometries'], ['datasets'], ['total_energies', 'geometries', 'datasets']])
def test_search_aggregation_metrics(self, client, example_elastic_calcs, no_warn, metrics):
rv = client.get('/repo/?aggregation_metrics=%s' % ','.join(metrics))
assert rv.status_code == 200
data = json.loads(rv.data)
for aggregations in data.get('aggregations').values():
for metrics_result in aggregations.values():
assert 'code_runs' in metrics_result
for metric in metrics:
assert metric in metrics_result
@pytest.mark.parametrize('n_results, page, per_page', [(2, 1, 5), (1, 1, 1), (0, 2, 3)])
def test_search_pagination(self, client, example_elastic_calcs, no_warn, n_results, page, per_page):
rv = client.get('/repo/?page=%d&per_page=%d' % (page, per_page))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment