Commit 02ee02c1 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added time range to search.

parent 27d8e909
Pipeline #46420 passed with stages
in 15 minutes and 58 seconds
......@@ -145,4 +145,7 @@ class RFC3339DateTime(fields.DateTime):
if isinstance(value, datetime):
return super().format(value.replace(tzinfo=pytz.utc))
else:
str(value)
return str(value)
rfc3339DateTime = RFC3339DateTime()
......@@ -21,10 +21,11 @@ from flask_restplus import Resource, abort, fields
from flask import request, g
from elasticsearch_dsl import Q
from elasticsearch.exceptions import NotFoundError
import datetime
from nomad import search
from .app import api
from .app import api, rfc3339DateTime
from .auth import login_if_available
from .common import pagination_model, pagination_request_parser, calc_route
......@@ -85,6 +86,12 @@ repo_request_parser = pagination_request_parser.copy()
repo_request_parser.add_argument(
'owner', type=str,
help='Specify which calcs to return: ``all``, ``public``, ``user``, ``staging``, default is ``all``')
repo_request_parser.add_argument(
'from_time', type=lambda x: rfc3339DateTime.parse(x),
help='A yyyy-MM-ddTHH:mm:ss (RFC3339) minimum entry time (e.g. upload time)')
repo_request_parser.add_argument(
'until_time', type=lambda x: rfc3339DateTime.parse(x),
help='A yyyy-MM-ddTHH:mm:ss (RFC3339) maximum entry time (e.g. upload time)')
repo_request_parser.add_argument(
'scroll', type=bool, help='Enable scrolling')
repo_request_parser.add_argument(
......@@ -149,6 +156,11 @@ class RepoCalcsResource(Resource):
total_metrics_str = request.args.get('total_metrics', '')
aggregation_metrics_str = request.args.get('aggregation_metrics', '')
from_time = rfc3339DateTime.parse(request.args.get('from_time', '2000-01-01'))
until_time_str = request.args.get('until_time', None)
until_time = rfc3339DateTime.parse(until_time_str) if until_time_str is not None else datetime.datetime.now()
time_range = (from_time, until_time)
total_metrics = [
metric for metric in total_metrics_str.split(',')
if metric in search.metrics_names]
......@@ -198,12 +210,14 @@ class RepoCalcsResource(Resource):
data.pop('order_by', None)
data.pop('total_metrics', None)
data.pop('aggregation_metrics', None)
data.pop('from_time', None)
data.pop('until_time', None)
if scroll:
data.update(scroll_id=scroll_id, size=per_page)
else:
data.update(
per_page=per_page, page=page, order=order, order_by=order_by,
per_page=per_page, page=page, order=order, order_by=order_by, time_range=time_range,
total_metrics=total_metrics, aggregation_metrics=aggregation_metrics)
try:
......
......@@ -21,6 +21,7 @@ from elasticsearch_dsl import Document, InnerDoc, Keyword, Text, Date, \
Object, Boolean, Search, Q, A, analyzer, tokenizer
from elasticsearch_dsl.document import IndexMeta
import elasticsearch.helpers
from datetime import datetime
from nomad import config, datamodel, infrastructure, datamodel, coe_repo, utils
......@@ -214,12 +215,15 @@ for quantity in datamodel.Domain.instance.quantities:
order_default_quantity = quantity.name
def _construct_search(q: Q = None, **kwargs) -> Search:
def _construct_search(q: Q = None, time_range: Tuple[datetime, datetime] = None, **kwargs) -> Search:
search = Search(index=config.elastic.index_name)
if q is not None:
search = search.query(q)
if time_range is not None:
search = search.query('range', upload_time=dict(gte=time_range[0], lte=time_range[1]))
for key, value in kwargs.items():
query_type, field, _ = search_quantities.get(key, (None, None, None))
if query_type is None:
......@@ -294,7 +298,9 @@ def scroll_search(
def aggregate_search(
page: int = 1, per_page: int = 10, order_by: str = order_default_quantity, order: int = -1,
q: Q = None, aggregations: Dict[str, int] = aggregations,
q: Q = None,
time_range: Tuple[datetime, datetime] = None,
aggregations: Dict[str, int] = aggregations,
aggregation_metrics: List[str] = [],
total_metrics: List[str] = [],
**kwargs) -> Tuple[int, List[dict], Dict[str, Dict[str, Dict[str, int]]], Dict[str, int]]:
......@@ -307,7 +313,8 @@ def aggregate_search(
Arguments:
page: The page to return starting with page 1
per_page: Results per page
q: An *elasticsearch_dsl* query used to further filter the results (via `and`)
q: An *elasticsearch_dsl* query used to further filter the results (via ``and``)
time_range: A tuple to filter for uploads within with start, end ``upload_time``.
aggregations: A customized list of aggregations to perform. Keys are index fields,
and values the amount of buckets to return. Only works on *keyword* field.
aggregation_metrics: The metrics used to aggregate over. Can be ``unique_code_runs``, ``datasets``,
......@@ -319,7 +326,7 @@ def aggregate_search(
the aggregation data, and a dictionary with the overall metrics.
"""
search = _construct_search(q, **kwargs)
search = _construct_search(q, time_range, **kwargs)
def add_metrics(parent, metrics_to_add):
for metric in metrics_to_add:
......
......@@ -64,3 +64,22 @@ content-type: application/json
}
}
}
###
# Search migration unpublished calcs
GET http://localhost:19200/fairdi_nomad_migration/_search HTTP/1.1
Content-Type: application/json
{
"size": 0,
"aggs": {
"timeline": {
"date_histogram" : {
"field" : "upload_time",
"interval": "1M",
"format" : "yyyy-MM-dd"
}
}
}
}
\ No newline at end of file
......@@ -20,8 +20,9 @@ import zipfile
import io
import inspect
from passlib.hash import bcrypt
from datetime import datetime
import datetime
from nomad.api.app import rfc3339DateTime
from nomad import coe_repo, search, parsing, files, config
from nomad.files import UploadFiles, PublicUploadFiles
from nomad.processing import Upload, Calc, SUCCESS
......@@ -389,7 +390,7 @@ class TestUploads:
upload = self.assert_upload(rv.data)
self.assert_processing(client, test_user_auth, upload['upload_id'])
metadata = dict(**example_user_metadata)
metadata['_upload_time'] = datetime.now().isoformat()
metadata['_upload_time'] = datetime.datetime.now().isoformat()
self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata)
def test_post_metadata_forbidden(self, client, proc_infra, test_user_auth, no_warn):
......@@ -553,21 +554,27 @@ class TestRepo():
test_user: coe_repo.User, other_test_user: coe_repo.User):
clear_elastic(elastic_infra)
calc_with_metadata = CalcWithMetadata(upload_id=0, calc_id=0)
calc_with_metadata = CalcWithMetadata(upload_id=0, calc_id=0, upload_time=datetime.date.today())
calc_with_metadata.files = ['test/mainfile.txt']
calc_with_metadata.apply_domain_metadata(normalized)
calc_with_metadata.update(calc_id='1', uploader=test_user.to_popo(), published=True, with_embargo=False)
calc_with_metadata.update(
calc_id='1', uploader=test_user.to_popo(), published=True, with_embargo=False)
search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)
calc_with_metadata.update(calc_id='2', uploader=other_test_user.to_popo(), published=True, with_embargo=False)
calc_with_metadata.update(atoms=['Fe'], comment='this is a specific word', formula='AAA', basis_set='zzz')
calc_with_metadata.update(
calc_id='2', uploader=other_test_user.to_popo(), published=True, with_embargo=False,
upload_time=datetime.date.today() - datetime.timedelta(days=5))
calc_with_metadata.update(
atoms=['Fe'], comment='this is a specific word', formula='AAA', basis_set='zzz')
search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)
calc_with_metadata.update(calc_id='3', uploader=other_test_user.to_popo(), published=False, with_embargo=False)
calc_with_metadata.update(
calc_id='3', uploader=other_test_user.to_popo(), published=False, with_embargo=False)
search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)
calc_with_metadata.update(calc_id='4', uploader=other_test_user.to_popo(), published=True, with_embargo=True)
calc_with_metadata.update(
calc_id='4', uploader=other_test_user.to_popo(), published=True, with_embargo=True)
search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True)
def test_own_calc(self, client, example_elastic_calcs, no_warn, test_user_auth):
......@@ -612,6 +619,37 @@ class TestRepo():
for key in ['uploader', 'calc_id', 'formula', 'upload_id']:
assert key in results[0]
@pytest.mark.parametrize('calcs, start, end', [
(2, datetime.date.today() - datetime.timedelta(days=6), datetime.date.today()),
(2, datetime.date.today() - datetime.timedelta(days=5), datetime.date.today()),
(1, datetime.date.today() - datetime.timedelta(days=4), datetime.date.today()),
(1, datetime.date.today(), datetime.date.today()),
(1, datetime.date.today() - datetime.timedelta(days=6), datetime.date.today() - datetime.timedelta(days=5)),
(0, datetime.date.today() - datetime.timedelta(days=7), datetime.date.today() - datetime.timedelta(days=6)),
(2, None, None),
(1, datetime.date.today(), None),
(2, None, datetime.date.today())
])
def test_search_time(self, client, example_elastic_calcs, no_warn, calcs, start, end):
query_string = ''
if start is not None:
query_string = 'from_time=%s' % rfc3339DateTime.format(start)
if end is not None:
if query_string != '':
query_string += '&'
query_string += 'until_time=%s' % rfc3339DateTime.format(end)
if query_string != '':
query_string = '?%s' % query_string
rv = client.get('/repo/%s' % query_string)
assert rv.status_code == 200
data = json.loads(rv.data)
results = data.get('results', None)
assert results is not None
assert isinstance(results, list)
assert len(results) == calcs
@pytest.mark.parametrize('calcs, quantity, value', [
(2, 'system', 'bulk'),
(0, 'system', 'atom'),
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment