diff --git a/rest-api-service/app/api_v1_0/routes.py b/rest-api-service/app/api_v1_0/routes.py index d21d446f285782d78ef8257af90f84ce8aef79f5..20c0927c3d6cd1cece709a5a088529c083bf03b7 100644 --- a/rest-api-service/app/api_v1_0/routes.py +++ b/rest-api-service/app/api_v1_0/routes.py @@ -24,6 +24,8 @@ from onelogin.saml2.utils import OneLogin_Saml2_Utils from redis import Redis from urllib.parse import urlparse import re +from elasticsearch import TransportError + from app import es_config from . import api @@ -184,6 +186,11 @@ def get_materials(page, per_page): json_data = request.get_json() if not json_data: raise InvalidEncRequest("No input data provided", 400) + + # check for ecxeeded elasticsearch pagination limit + scroll = json_data.get('search_by').get('pagination') == 'scroll' + if page*per_page >= 1e4 and not scroll: + raise InvalidEncRequest('Too many results requested. Limit is 10000, or use "pagination":"scoll" and subsequently scroll_id.', 510) # create the match part match_query = {} @@ -304,11 +311,29 @@ def get_materials(page, per_page): 'from': (page - 1) * per_page, 'query': es_query } - - es_search_results = es.search(index=es_config['index'], doc_type='material', body=es_request_body) + if scroll: + scroll_id = search_by.get('scroll_id') + es_request_body = { + 'size': per_page, + 'query': es_query + } + if scroll_id is None: + es_search_results = es.search(index = es_config['index'], doc_type = 'material', scroll = '1m', body = es_request_body) + else: + es_request_body['scroll_id'] = str(scroll_id) + try: + es_search_results = es.scroll(scroll_id = scroll_id, scroll = '1m') + except TransportError as e: + raise InvalidEncRequest("Scrolling error: %s" % str(e), 510) + materials_found_list = [render_es_result(hit['_source']) for hit in es_search_results['hits']['hits']] + return materials_found_list, -1, { 'es_query': es_query, 'scroll_id': scroll_id } + scroll_id = es_search_results['_scroll_id'] + else: + es_search_results = es.search(index=es_config['index'], doc_type='material', body=es_request_body) total_es_search_results = es_search_results['hits']['total'] materials_found_list = [render_es_result(hit['_source']) for hit in es_search_results['hits']['hits']] - + if scroll: + return materials_found_list, total_es_search_results, { 'es_query': es_query, 'scroll_id': scroll_id } return materials_found_list, total_es_search_results, { 'es_query': es_query }