diff --git a/README.md b/README.md index 900374c1e466261f55d95cf9ac33b07a224ec8fd..6f3733a28f55abe98d5bb6d7d9fbf332d8304a72 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ Omitted versions are plain bugfix releases with only minor changes and fixes. ### v0.7.5 - AFLOWLIB prototypes (archive) +- primitive label search +- improved search performance based on excluded fields - improved logs - minor bugfixes diff --git a/nomad/app/api/archive.py b/nomad/app/api/archive.py index f5560040dece9d6dde45bbabfb7875a045f4b6cd..e25a4240572e84bc7f8febb05126dfe41ace74e4 100644 --- a/nomad/app/api/archive.py +++ b/nomad/app/api/archive.py @@ -147,6 +147,7 @@ class ArchiveDownloadResource(Resource): search_request = search.SearchRequest() apply_search_parameters(search_request, args) + search_request.include('calc_id', 'upload_id', 'mainfile') calcs = search_request.execute_scan( order_by='upload_id', @@ -273,6 +274,7 @@ class ArchiveQueryResource(Resource): search_request = search.SearchRequest() apply_search_parameters(search_request, args) + search_request.include('calc_id', 'upload_id', 'mainfile') try: if scroll: diff --git a/nomad/app/api/raw.py b/nomad/app/api/raw.py index d34dce7e0e03554852387fe33f8933335adc3c30..cbc3f8856a9c4d8e875083a9c69cd08fe8bd6001 100644 --- a/nomad/app/api/raw.py +++ b/nomad/app/api/raw.py @@ -421,6 +421,7 @@ class RawFileQueryResource(Resource): search_request = search.SearchRequest() apply_search_parameters(search_request, _raw_file_from_query_parser.parse_args()) + search_request.include('calc_id', 'upload_id', 'mainfile') def path(entry): return '%s/%s' % (entry['upload_id'], entry['mainfile']) diff --git a/nomad/app/api/repo.py b/nomad/app/api/repo.py index 04900ddd6ccd6dba6ad0ea2e961065364c2fcf39..23a34b31debf564d996ff12d15664f1457b75d44 100644 --- a/nomad/app/api/repo.py +++ b/nomad/app/api/repo.py @@ -206,7 +206,9 @@ class RepoCalcsResource(Resource): search_request.totals(metrics_to_use=metrics) if 'exclude' in parsed_args: - search_request.exclude(*parsed_args['exclude']) + excludes = parsed_args['exclude'] + if excludes is not None: + search_request.exclude(*excludes) try: if scroll: @@ -303,7 +305,7 @@ _repo_edit_model = api.model('RepoEdit', { def edit(parsed_query: Dict[str, Any], mongo_update: Dict[str, Any] = None, re_index=True) -> List[str]: # get all calculations that have to change with utils.timer(common.logger, 'edit query executed'): - search_request = search.SearchRequest() + search_request = search.SearchRequest().include('calc_id', 'upload_id') apply_search_parameters(search_request, parsed_query) upload_ids = set() calc_ids = [] @@ -695,7 +697,7 @@ class RepoPidResource(Resource): except ValueError: abort(400, 'Wrong PID format') - search_request = search.SearchRequest() + search_request = search.SearchRequest().include('upload_id', 'calc_id') if g.user is not None: search_request.owner('all', user_id=g.user.user_id) diff --git a/nomad/app/optimade/endpoints.py b/nomad/app/optimade/endpoints.py index 27ac0e74c81a258788bcbe453614f8f66e68dca9..d41f16894db942cb80f8166659fc1564e71ce93a 100644 --- a/nomad/app/optimade/endpoints.py +++ b/nomad/app/optimade/endpoints.py @@ -65,7 +65,7 @@ class CalculationList(Resource): except Exception: abort(400, message='bad parameter types') # TODO Specific json API error handling - search_request = base_search_request() + search_request = base_search_request().include('calc_id') if filter is not None: try: diff --git a/nomad/search.py b/nomad/search.py index f44a12d4db402091ca1363af567bb46c54a26b13..39cdbe45275c209bb1f58cc0c0837e85d556a5c9 100644 --- a/nomad/search.py +++ b/nomad/search.py @@ -544,10 +544,15 @@ class SearchRequest: return self def exclude(self, *args): - """ Exclude certain elastic keys from the search results. """ + """ Exclude certain elastic fields from the search results. """ self._search = self._search.source(excludes=args) return self + def include(self, *args): + """ Include only the given fields in the search results. """ + self._search = self._search.source(includes=args) + return self + def execute(self): """ Exectutes without returning actual results. Only makes sense if the request diff --git a/tests/test_search.py b/tests/test_search.py index e538e84cb3f0025f3ad735950bddf9666b170839..c414ead8a95d057a746e4d3aa1bcd775517ab6dc 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -137,7 +137,7 @@ def test_search_totals(elastic, example_search_data): assert 'quantities' not in results -def test_search_excludes(elastic, example_search_data): +def test_search_exclude(elastic, example_search_data): for item in SearchRequest().execute_paginated()['results']: assert 'atoms' in item @@ -145,6 +145,15 @@ def test_search_excludes(elastic, example_search_data): assert 'atoms' not in item +def test_search_include(elastic, example_search_data): + for item in SearchRequest().execute_paginated()['results']: + assert 'atoms' in item + + for item in SearchRequest().include('calc_id').execute_paginated()['results']: + assert 'atoms' not in item + assert 'calc_id' in item + + @pytest.mark.parametrize("order_by", [None, 'upload_id']) def test_search_quantity( elastic, normalized: parsing.LocalBackend, test_user: datamodel.User,