diff --git a/nomad/api/app.py b/nomad/api/app.py index 3630a045bfe7008b4d01cb3338f4aa046c82a0ce..e0dc024e887d61fd59fe8970363b788755a86963 100644 --- a/nomad/api/app.py +++ b/nomad/api/app.py @@ -145,4 +145,7 @@ class RFC3339DateTime(fields.DateTime): if isinstance(value, datetime): return super().format(value.replace(tzinfo=pytz.utc)) else: - str(value) + return str(value) + + +rfc3339DateTime = RFC3339DateTime() diff --git a/nomad/api/repo.py b/nomad/api/repo.py index 99eecfc0dc72009763fc488fcc7107ec49465982..6cbe695be0fe4e4a9a29ca3260d2227837e6b2bf 100644 --- a/nomad/api/repo.py +++ b/nomad/api/repo.py @@ -21,10 +21,11 @@ from flask_restplus import Resource, abort, fields from flask import request, g from elasticsearch_dsl import Q from elasticsearch.exceptions import NotFoundError +import datetime from nomad import search -from .app import api +from .app import api, rfc3339DateTime from .auth import login_if_available from .common import pagination_model, pagination_request_parser, calc_route @@ -85,6 +86,12 @@ repo_request_parser = pagination_request_parser.copy() repo_request_parser.add_argument( 'owner', type=str, help='Specify which calcs to return: ``all``, ``public``, ``user``, ``staging``, default is ``all``') +repo_request_parser.add_argument( + 'from_time', type=lambda x: rfc3339DateTime.parse(x), + help='A yyyy-MM-ddTHH:mm:ss (RFC3339) minimum entry time (e.g. upload time)') +repo_request_parser.add_argument( + 'until_time', type=lambda x: rfc3339DateTime.parse(x), + help='A yyyy-MM-ddTHH:mm:ss (RFC3339) maximum entry time (e.g. upload time)') repo_request_parser.add_argument( 'scroll', type=bool, help='Enable scrolling') repo_request_parser.add_argument( @@ -149,6 +156,11 @@ class RepoCalcsResource(Resource): total_metrics_str = request.args.get('total_metrics', '') aggregation_metrics_str = request.args.get('aggregation_metrics', '') + from_time = rfc3339DateTime.parse(request.args.get('from_time', '2000-01-01')) + until_time_str = request.args.get('until_time', None) + until_time = rfc3339DateTime.parse(until_time_str) if until_time_str is not None else datetime.datetime.now() + time_range = (from_time, until_time) + total_metrics = [ metric for metric in total_metrics_str.split(',') if metric in search.metrics_names] @@ -198,12 +210,14 @@ class RepoCalcsResource(Resource): data.pop('order_by', None) data.pop('total_metrics', None) data.pop('aggregation_metrics', None) + data.pop('from_time', None) + data.pop('until_time', None) if scroll: data.update(scroll_id=scroll_id, size=per_page) else: data.update( - per_page=per_page, page=page, order=order, order_by=order_by, + per_page=per_page, page=page, order=order, order_by=order_by, time_range=time_range, total_metrics=total_metrics, aggregation_metrics=aggregation_metrics) try: diff --git a/nomad/search.py b/nomad/search.py index c01571f40754aaea6d2fab99a7944a89a189dd42..378e217130519b140ea7e0a065942715fb4200cf 100644 --- a/nomad/search.py +++ b/nomad/search.py @@ -21,6 +21,7 @@ from elasticsearch_dsl import Document, InnerDoc, Keyword, Text, Date, \ Object, Boolean, Search, Q, A, analyzer, tokenizer from elasticsearch_dsl.document import IndexMeta import elasticsearch.helpers +from datetime import datetime from nomad import config, datamodel, infrastructure, datamodel, coe_repo, utils @@ -214,12 +215,15 @@ for quantity in datamodel.Domain.instance.quantities: order_default_quantity = quantity.name -def _construct_search(q: Q = None, **kwargs) -> Search: +def _construct_search(q: Q = None, time_range: Tuple[datetime, datetime] = None, **kwargs) -> Search: search = Search(index=config.elastic.index_name) if q is not None: search = search.query(q) + if time_range is not None: + search = search.query('range', upload_time=dict(gte=time_range[0], lte=time_range[1])) + for key, value in kwargs.items(): query_type, field, _ = search_quantities.get(key, (None, None, None)) if query_type is None: @@ -294,7 +298,9 @@ def scroll_search( def aggregate_search( page: int = 1, per_page: int = 10, order_by: str = order_default_quantity, order: int = -1, - q: Q = None, aggregations: Dict[str, int] = aggregations, + q: Q = None, + time_range: Tuple[datetime, datetime] = None, + aggregations: Dict[str, int] = aggregations, aggregation_metrics: List[str] = [], total_metrics: List[str] = [], **kwargs) -> Tuple[int, List[dict], Dict[str, Dict[str, Dict[str, int]]], Dict[str, int]]: @@ -307,7 +313,8 @@ def aggregate_search( Arguments: page: The page to return starting with page 1 per_page: Results per page - q: An *elasticsearch_dsl* query used to further filter the results (via `and`) + q: An *elasticsearch_dsl* query used to further filter the results (via ``and``) + time_range: A tuple to filter for uploads within with start, end ``upload_time``. aggregations: A customized list of aggregations to perform. Keys are index fields, and values the amount of buckets to return. Only works on *keyword* field. aggregation_metrics: The metrics used to aggregate over. Can be ``unique_code_runs``, ``datasets``, @@ -319,7 +326,7 @@ def aggregate_search( the aggregation data, and a dictionary with the overall metrics. """ - search = _construct_search(q, **kwargs) + search = _construct_search(q, time_range, **kwargs) def add_metrics(parent, metrics_to_add): for metric in metrics_to_add: diff --git a/ops/scripts/misc.http b/ops/scripts/misc.http index 16e3f9e81133601b12643a70e38c35c684147065..d40d1416593bebfd2cad8d2769b648fe754c3f4d 100644 --- a/ops/scripts/misc.http +++ b/ops/scripts/misc.http @@ -64,3 +64,22 @@ content-type: application/json } } } + +### +# Search migration unpublished calcs + +GET http://localhost:19200/fairdi_nomad_migration/_search HTTP/1.1 +Content-Type: application/json + +{ + "size": 0, + "aggs": { + "timeline": { + "date_histogram" : { + "field" : "upload_time", + "interval": "1M", + "format" : "yyyy-MM-dd" + } + } + } +} \ No newline at end of file diff --git a/tests/test_api.py b/tests/test_api.py index 50668087c35f118c4c6a010c5c148bd6f4df671b..ace61e0d19ba500cc25bb781dfb976dc0578cc37 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -20,8 +20,9 @@ import zipfile import io import inspect from passlib.hash import bcrypt -from datetime import datetime +import datetime +from nomad.api.app import rfc3339DateTime from nomad import coe_repo, search, parsing, files, config from nomad.files import UploadFiles, PublicUploadFiles from nomad.processing import Upload, Calc, SUCCESS @@ -389,7 +390,7 @@ class TestUploads: upload = self.assert_upload(rv.data) self.assert_processing(client, test_user_auth, upload['upload_id']) metadata = dict(**example_user_metadata) - metadata['_upload_time'] = datetime.now().isoformat() + metadata['_upload_time'] = datetime.datetime.now().isoformat() self.assert_published(client, admin_user_auth, upload['upload_id'], proc_infra, metadata) def test_post_metadata_forbidden(self, client, proc_infra, test_user_auth, no_warn): @@ -553,21 +554,27 @@ class TestRepo(): test_user: coe_repo.User, other_test_user: coe_repo.User): clear_elastic(elastic_infra) - calc_with_metadata = CalcWithMetadata(upload_id=0, calc_id=0) + calc_with_metadata = CalcWithMetadata(upload_id=0, calc_id=0, upload_time=datetime.date.today()) calc_with_metadata.files = ['test/mainfile.txt'] calc_with_metadata.apply_domain_metadata(normalized) - calc_with_metadata.update(calc_id='1', uploader=test_user.to_popo(), published=True, with_embargo=False) + calc_with_metadata.update( + calc_id='1', uploader=test_user.to_popo(), published=True, with_embargo=False) search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True) - calc_with_metadata.update(calc_id='2', uploader=other_test_user.to_popo(), published=True, with_embargo=False) - calc_with_metadata.update(atoms=['Fe'], comment='this is a specific word', formula='AAA', basis_set='zzz') + calc_with_metadata.update( + calc_id='2', uploader=other_test_user.to_popo(), published=True, with_embargo=False, + upload_time=datetime.date.today() - datetime.timedelta(days=5)) + calc_with_metadata.update( + atoms=['Fe'], comment='this is a specific word', formula='AAA', basis_set='zzz') search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True) - calc_with_metadata.update(calc_id='3', uploader=other_test_user.to_popo(), published=False, with_embargo=False) + calc_with_metadata.update( + calc_id='3', uploader=other_test_user.to_popo(), published=False, with_embargo=False) search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True) - calc_with_metadata.update(calc_id='4', uploader=other_test_user.to_popo(), published=True, with_embargo=True) + calc_with_metadata.update( + calc_id='4', uploader=other_test_user.to_popo(), published=True, with_embargo=True) search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True) def test_own_calc(self, client, example_elastic_calcs, no_warn, test_user_auth): @@ -612,6 +619,37 @@ class TestRepo(): for key in ['uploader', 'calc_id', 'formula', 'upload_id']: assert key in results[0] + @pytest.mark.parametrize('calcs, start, end', [ + (2, datetime.date.today() - datetime.timedelta(days=6), datetime.date.today()), + (2, datetime.date.today() - datetime.timedelta(days=5), datetime.date.today()), + (1, datetime.date.today() - datetime.timedelta(days=4), datetime.date.today()), + (1, datetime.date.today(), datetime.date.today()), + (1, datetime.date.today() - datetime.timedelta(days=6), datetime.date.today() - datetime.timedelta(days=5)), + (0, datetime.date.today() - datetime.timedelta(days=7), datetime.date.today() - datetime.timedelta(days=6)), + (2, None, None), + (1, datetime.date.today(), None), + (2, None, datetime.date.today()) + ]) + def test_search_time(self, client, example_elastic_calcs, no_warn, calcs, start, end): + query_string = '' + if start is not None: + query_string = 'from_time=%s' % rfc3339DateTime.format(start) + if end is not None: + if query_string != '': + query_string += '&' + query_string += 'until_time=%s' % rfc3339DateTime.format(end) + if query_string != '': + query_string = '?%s' % query_string + + rv = client.get('/repo/%s' % query_string) + assert rv.status_code == 200 + data = json.loads(rv.data) + + results = data.get('results', None) + assert results is not None + assert isinstance(results, list) + assert len(results) == calcs + @pytest.mark.parametrize('calcs, quantity, value', [ (2, 'system', 'bulk'), (0, 'system', 'atom'),