From e5fda14790da208dc9dabf2885b3e439f26af331 Mon Sep 17 00:00:00 2001 From: Markus Scheidgen <markus.scheidgen@gmail.com> Date: Thu, 10 Jun 2021 16:45:14 +0200 Subject: [PATCH] Consolidated v1 statistics and aggregations into a single model. --- nomad/app/v1/models.py | 240 +++++++++------ nomad/app/v1/routers/entries.py | 1 - nomad/app/v1/routers/materials.py | 1 - nomad/datamodel/results.py | 6 +- nomad/metainfo/elasticsearch_extension.py | 14 +- nomad/parsing/parsers.py | 2 +- nomad/search/v1.py | 346 +++++++++++----------- tests/app/v1/routers/common.py | 203 +++++++------ tests/app/v1/routers/test_entries.py | 85 +++--- tests/app/v1/routers/test_materials.py | 64 +--- tests/utils.py | 8 +- 11 files changed, 495 insertions(+), 475 deletions(-) diff --git a/nomad/app/v1/models.py b/nomad/app/v1/models.py index 49fb20a530..40de3ec006 100644 --- a/nomad/app/v1/models.py +++ b/nomad/app/v1/models.py @@ -41,15 +41,6 @@ Value = Union[bool, int, float, datetime.datetime, str] ComparableValue = Union[int, float, datetime.datetime, str] -class AggregationOrderType(str, enum.Enum): - ''' - Allows to order statistics or aggregations by either quantity values (`values`) or number - of entries (`entries`). - ''' - values = 'values' - entries = 'entries' - - class HTTPExceptionModel(BaseModel): detail: str @@ -665,8 +656,18 @@ class AggregationPagination(MetadataBasedPagination): order_by: Optional[str] = Field( None, # type: ignore description=strip(''' - The results are ordered by the values of this field. If omitted, default - ordering is applied. + Either the string "count", "value", or the name of a quantity. If omitted the buckets + will be ordered by the item "count". + + If you provide a quantity, all items + in a bucket must have the same value for this quantity. For example, aggregating + entries on `upload_id` and ordering with the buckets by `upload_time` is fine, + because all entries in an upload have the same `upload_time`. The API cannot + check this rule and the results will be unpredictable. + + If you want to order by the bucket values, you can either use "value" or use + the aggregation quantity to `order_by`. The result will be the same, because + the bucket values are the quantity values. ''')) @validator('page') @@ -674,11 +675,16 @@ class AggregationPagination(MetadataBasedPagination): assert page is None, 'Pagination by `page` is not possible for aggregations, use `page_after_value`' return page + @validator('page_size') + def validate_page_size(cls, page_size, values): # pylint: disable=no-self-argument + assert page_size > 0, '0 or smaller page sizes are not allowed for aggregations.' + return page_size + class AggregatedEntities(BaseModel): size: Optional[pydantic.conint(gt=0)] = Field( # type: ignore 1, description=strip(''' - The maximum number of entries that should be returned for each value in the + The number of entries that should be returned for each value in the aggregation. ''')) required: Optional[MetadataRequired] = Field( @@ -687,73 +693,69 @@ class AggregatedEntities(BaseModel): ''')) -class Aggregation(BaseModel): +class AggregationBase(BaseModel): quantity: str = Field( ..., description=strip(''' The manatory name of the quantity for the aggregation. Aggregations can only be computed for those search metadata that have discrete values; an aggregation buckets entries that have the same value for this quantity.''')) - pagination: Optional[AggregationPagination] = Field( - AggregationPagination(), description=strip(''' - Only the data few values are returned for each API call. Pagination allows to - get the next set of values based on the last value in the last call. - ''')) - entries: Optional[AggregatedEntities] = Field( - None, description=strip(''' - Optionally, a set of entries can be returned for each value. - ''')) - - -class StatisticsOrder(BaseModel): - type_: Optional[AggregationOrderType] = Field(AggregationOrderType.entries, alias='type') - direction: Optional[Direction] = Field(Direction.desc) -class Statistic(BaseModel): - quantity: str = Field( - ..., description=strip(''' - The manatory name of the quantity that the statistic is calculated for. Statistics - can only be computed for those search metadata that have discrete values; a statistics - aggregates a certain metric (e.g. the number of entries) over all entries were - this quantity has the same value (bucket aggregation, think historgam here). - - There is one except and these are date/time values quantities (most notably `upload_time`). - Here each statistic value represents an time interval. The interval can - be determined via `datetime_interval`.''')) +class BucketAggregation(AggregationBase): metrics: Optional[List[str]] = Field( [], description=strip(''' - By default the returned statistics will provide the number of entries for each + By default the returned aggregations will provide the number of entries for each value. You can add more metrics. For each metric an additional number will be provided for each value. Metrics are also based on search metadata. Depending on the metric the number will represent either a sum (`calculations` for the number of individual calculation in each code run) or an amount of different values (i.e. `materials` for the amount of different material hashes).''')) - datetime_interval: Optional[pydantic.conint(gt=0)] = Field( # type: ignore + + +class TermsAggregation(BucketAggregation): + pagination: Optional[AggregationPagination] = Field( None, description=strip(''' - While statistics in general are only possible for quantities with discrete values, - these is one exception. These are date/time values quantities (most notably `upload_time`). - Here each statistic value represents an time interval. + Only the data few values are returned for each API call. Aggregation + pagination allows to get all available values by pagination. It also allows to + order values. - A date/time interval is a number of seconds greater than 0. This will only be used for - date/time valued quantities (e.g. `upload_time`). + You can only use pagination (to page through all available values) or size (to + get the size number of values with the most available data). + ''')) + size: Optional[pydantic.conint(gt=0)] = Field( # type: ignore + None, description=strip(''' + Only the data few values are returned for each API call. Pagination allows to + get the next set of values based on the last value in the last call. ''')) value_filter: Optional[pydantic.constr(regex=r'^[a-zA-Z0-9_\-\s]+$')] = Field( # type: ignore None, description=strip(''' An optional filter for values. Only values that contain the filter as substring will be part of the statistics. + + This is only available for non paginated aggregations. ''')) - size: Optional[pydantic.conint(gt=0)] = Field( # type: ignore + entries: Optional[AggregatedEntities] = Field( None, description=strip(''' - An optional maximum size of values in the statistics. The default depends on the - quantity. - ''')) - order: Optional[StatisticsOrder] = Field( - StatisticsOrder(), description=strip(''' - The values in the statistics are either ordered by the entry count or by the - natural ordering of the values. + Optionally, a set of entries can be returned for each value. These are basically + example entries that have the respective bucket value. ''')) +class HistogramAggregation(BucketAggregation): + interval: pydantic.conint(gt=0) # type: ignore + + +class MinMaxAggregation(AggregationBase): + pass + + +class Aggregation(BaseModel): + terms: Optional[TermsAggregation] + histogram: Optional[HistogramAggregation] + date_histogram: Optional[HistogramAggregation] + min_max: Optional[MinMaxAggregation] + + class WithQueryAndPagination(WithQuery): pagination: Optional[MetadataPagination] = Body( None, @@ -769,41 +771,74 @@ class Metadata(WithQueryAndPagination): example={ 'include': ['entry_id', 'mainfile', 'upload_id', 'authors', 'upload_time'] }) - statistics: Optional[Dict[str, Statistic]] = Body( - {}, - description=strip(''' - This allows to define additional statistics that should be returned. - Statistics aggregate entries that show the same quantity values for a given quantity. - A simple example is the number of entries for each `dft.code_name`. These statistics - will be computed only over the query results. This allows to get an overview about - query results. - '''), - example={ - 'by_code_name': { - 'metrics': ['uploads', 'datasets'], - 'quantity': 'dft.code_name' - } - }) aggregations: Optional[Dict[str, Aggregation]] = Body( {}, example={ - 'uploads': { - 'quantity': 'upload_id', - 'pagination': { - 'page_size': 10, - 'order_by': 'upload_time' + 'all_codes': { + 'terms': { + 'quantity': 'results.method.simulation.program_name', + 'entries': { + 'size': 1, + 'required': { + 'include': ['mainfile'] + } + } }, - 'entries': { - 'size': 1, - 'required': { - 'include': ['mainfile'] + }, + 'all_datasets': { + 'terms': { + 'quantity': 'datasets', + 'pagination': { + 'page_size': 100, + 'page_after_value': '<the next_pager_after_value from the last request>' } } } }, description=strip(''' - Defines additional aggregations to return. An aggregation list entries - for the values of a quantity, e.g. to get all uploads and their entries. + Defines additional aggregations to return. There are different types of + aggregations. + + A `terms` aggregation allows to get the values of a quantity that occur in + the search query result data. For each value, a bucket is created with + information about how many entries have the value (or additional metrics). + For example to get all entries that use a certain code, you can use: + ```json + { + "aggregations": { + "all_codes": { + "terms": { + "quantity": "results.method.simulation.program_name" + } + } + } + } + ``` + + Terms aggregations can also be used to paginate though all values of a certain + quantities. Each page will be companied with a `page_after_value` that + can be used to retrieve the next value. For example to go through all datasets + available in the search query: + ```json + { + "aggregations": { + "all_datasets": { + "terms": { + "quantity": "datasets", + "pagination": { + "page_size": 100, + "page_after_value": "<the next_pager_after_value from the last request>" + } + } + } + } + } + ``` + + Other aggregation types are `histogram` and `minmax` (comming soon). + + Multiple aggregations can be used by using different user defined names + (`all_codes`, `all_datasets`). ''')) @@ -860,32 +895,44 @@ files_parameters = parameter_dependency_from_model( 'files_parameters', Files) -class StatisticResponse(Statistic): - data: Dict[str, Dict[str, int]] = Field( - None, description=strip(''' - The returned statistics data as dictionary. The key is a string representation of the values. - The concrete type depends on the quantity that was used to create the statistics. - Each dictionary value is a dictionary itself. The keys are the metric names the - values the metric values. The key `entries` that gives the amount of entries with - this value is always returned.''')) - - -class AggregationDataItem(BaseModel): - data: Optional[List[Dict[str, Any]]] = Field( +class Bucket(BaseModel): + entries: Optional[List[Dict[str, Any]]] = Field( None, description=strip('''The entries that were requested for each value.''')) - size: int = Field( + count: int = Field( None, description=strip('''The amount of entries with this value.''')) + metrics: Optional[Dict[str, int]] + +class HistogramBucket(Bucket): + value: float + start: float + end: float -class AggregationResponse(Aggregation): - pagination: PaginationResponse # type: ignore - data: Dict[str, AggregationDataItem] = Field( + +class TermsBucket(Bucket): + value: str + + +class BucketAggregationResponse(BaseModel): + data: List[TermsBucket] = Field( None, description=strip(''' The aggregation data as a dictionary. The key is a string representation of the values. The dictionary values contain the aggregated data depending if `entries` where requested.''')) +class TermsAggregationResponse(TermsAggregation, BucketAggregationResponse): + pagination: Optional[PaginationResponse] # type: ignore + + +class HistogramAggregationResponse(HistogramAggregation, BucketAggregationResponse): + pass + + +class MixMaxAggregationResponse(MinMaxAggregation): + data: List[float] + + class CodeResponse(BaseModel): curl: str requests: str @@ -894,8 +941,7 @@ class CodeResponse(BaseModel): class MetadataResponse(Metadata): pagination: PaginationResponse = None # type: ignore - statistics: Optional[Dict[str, StatisticResponse]] # type: ignore - aggregations: Optional[Dict[str, AggregationResponse]] # type: ignore + aggregations: Optional[Dict[str, Union[TermsAggregationResponse]]] # type: ignore data: List[Dict[str, Any]] = Field( None, description=strip(''' diff --git a/nomad/app/v1/routers/entries.py b/nomad/app/v1/routers/entries.py index da47c0d734..cd372d5be1 100644 --- a/nomad/app/v1/routers/entries.py +++ b/nomad/app/v1/routers/entries.py @@ -318,7 +318,6 @@ async def post_entries_metadata_query( query=data.query, pagination=data.pagination, required=data.required, - statistics=data.statistics, aggregations=data.aggregations, user_id=user.user_id if user is not None else None) diff --git a/nomad/app/v1/routers/materials.py b/nomad/app/v1/routers/materials.py index 6b9c2c2522..7f5effef47 100644 --- a/nomad/app/v1/routers/materials.py +++ b/nomad/app/v1/routers/materials.py @@ -111,7 +111,6 @@ async def post_entries_metadata_query( query=data.query, pagination=data.pagination, required=data.required, - statistics=data.statistics, aggregations=data.aggregations, user_id=user.user_id if user is not None else None) diff --git a/nomad/datamodel/results.py b/nomad/datamodel/results.py index 66d3afdd7d..556b640d8e 100644 --- a/nomad/datamodel/results.py +++ b/nomad/datamodel/results.py @@ -530,7 +530,7 @@ class Material(MSection): description=""" Classification based on the functional properties. """, - a_elasticsearch=Elasticsearch(material_type, statistics_size=20) + a_elasticsearch=Elasticsearch(material_type, default_aggregation_size=20) ) type_compound = Quantity( type=str, @@ -538,7 +538,7 @@ class Material(MSection): description=""" Classification based on the chemical formula. """, - a_elasticsearch=Elasticsearch(material_type, statistics_size=20) + a_elasticsearch=Elasticsearch(material_type, default_aggregation_size=20) ) elements = Quantity( type=MEnum(chemical_symbols), @@ -647,7 +647,7 @@ class DFT(MSection): type=MEnum(list(xc_treatments.values()) + [unavailable, not_processed]), default=not_processed, description="The libXC based xc functional classification used in the simulation.", - a_elasticsearch=Elasticsearch(material_entry_type, statistics_size=100) + a_elasticsearch=Elasticsearch(material_entry_type, default_aggregation_size=100) ) xc_functional_names = Quantity( type=str, diff --git a/nomad/metainfo/elasticsearch_extension.py b/nomad/metainfo/elasticsearch_extension.py index c333fac316..7f2dd48b84 100644 --- a/nomad/metainfo/elasticsearch_extension.py +++ b/nomad/metainfo/elasticsearch_extension.py @@ -496,9 +496,9 @@ class Elasticsearch(DefinitionAnnotation): will only return values that exist in the search results. This allows to create 0 statistic values and return consistent statistics. If the underlying quantity is an Enum, the values are determined automatically. - statistics_size: - The maximum number of values in a statistic. Default is 10 or the length of - values. + default_aggregation_size: + The of values to return by default if this quantity is used in aggregation. + If no value is given and there are not fixed value, 10 will be used. metrics: If the quantity is used as a metric for aggregating statistics, this has to be used to define a valid elasticsearch metrics aggregations, e.g. @@ -533,7 +533,7 @@ class Elasticsearch(DefinitionAnnotation): field: str = None, es_field: str = None, value: Callable[[MSection], Any] = None, index: bool = True, - values: List[str] = None, statistics_size: int = None, + values: List[str] = None, default_aggregation_size: int = None, metrics: Dict[str, str] = None, many_all: bool = False, auto_include_subsections: bool = False, @@ -552,15 +552,15 @@ class Elasticsearch(DefinitionAnnotation): self._mapping: Dict[str, Any] = None self.values = values - self.statistics_size = statistics_size + self.default_aggregation_size = default_aggregation_size self.metrics = metrics self.many_all = many_all self.auto_include_subsections = auto_include_subsections self.nested = nested - if self.statistics_size is None: - self.statistics_size = len(self.values) if values is not None else 10 + if self.values is not None: + self.default_aggregation_size = len(self.values) @property def mapping(self) -> Dict[str, Any]: diff --git a/nomad/parsing/parsers.py b/nomad/parsing/parsers.py index 0dc0626bc1..9d59174bb0 100644 --- a/nomad/parsing/parsers.py +++ b/nomad/parsing/parsers.py @@ -402,5 +402,5 @@ datamodel.DFTMetadata.code_name.a_search.statistic_values = code_names + [ config.services.unavailable_value, config.services.not_processed_value] results.Simulation.program_name.a_elasticsearch.values = code_names + [ config.services.unavailable_value, config.services.not_processed_value] -results.Simulation.program_name.a_elasticsearch.statistics_size = len( +results.Simulation.program_name.a_elasticsearch.default_aggregation_size = len( results.Simulation.program_name.a_elasticsearch.values) diff --git a/nomad/search/v1.py b/nomad/search/v1.py index 25061d888b..4f97983d5c 100644 --- a/nomad/search/v1.py +++ b/nomad/search/v1.py @@ -28,8 +28,8 @@ from nomad.metainfo.elasticsearch_extension import ( from nomad.app.v1 import models as api_models from nomad.app.v1.models import ( Pagination, PaginationResponse, Query, MetadataRequired, MetadataResponse, Aggregation, - Statistic, StatisticResponse, AggregationOrderType, AggregationResponse, AggregationDataItem, - Value) + Value, AggregationBase, TermsAggregation, BucketAggregation, + TermsAggregationResponse, TermsBucket) from .common import SearchError, _es_to_entry_dict, _owner_es_query @@ -216,158 +216,133 @@ def validate_pagination(pagination: Pagination, doc_type: DocumentType, loc: Lis return order_quantity, page_after_value -def _api_to_es_statistic( - es_search: Search, name: str, statistic: Statistic, doc_type: DocumentType) -> A: +def _api_to_es_aggregation( + es_search: Search, name: str, agg: AggregationBase, doc_type: DocumentType) -> A: ''' - Creates an ES aggregation based on the API's statistic model. + Creates an ES aggregation based on the API's aggregation model. ''' - quantity = validate_quantity(statistic.quantity, loc=['statistic', 'quantity'], doc_type=doc_type) - - if not quantity.aggregateable: - raise QueryValidationError( - 'the statistic quantity cannot be aggregated', - loc=['statistic', 'quantity']) - - if statistic.size is None: - statistic.size = quantity.statistics_size - - if quantity.values is not None: - statistic.size = len(quantity.values) - - terms_kwargs = {} - if statistic.value_filter is not None: - terms_kwargs['include'] = '.*%s.*' % statistic.value_filter - - aggs = es_search.aggs + quantity = validate_quantity(agg.quantity, doc_type=doc_type, loc=['aggregation', 'quantity']) + es_aggs = es_search.aggs for nested_key in doc_type.nested_object_keys: - if statistic.quantity.startswith(nested_key): - aggs = es_search.aggs.bucket('nested_statistic:%s' % name, 'nested', path=nested_key) - - order_type = '_count' if statistic.order.type_ == AggregationOrderType.entries else '_key' - statistic_agg = aggs.bucket('statistic:%s' % name, A( - 'terms', - field=quantity.search_field, - size=statistic.size, - order={order_type: statistic.order.direction.value}, - **terms_kwargs)) - - for metric_name in statistic.metrics: - metrics = doc_type.metrics - if nested_key == 'entries': - metrics = material_entry_type.metrics - if metric_name not in metrics: - raise QueryValidationError( - 'metric must be the qualified name of a suitable search quantity', - loc=['statistic', 'metrics']) - metric_aggregation, metric_quantity = metrics[metric_name] - statistic_agg.metric('metric:%s' % metric_name, A( - metric_aggregation, - field=metric_quantity.qualified_field)) + if agg.quantity.startswith(nested_key): + es_aggs = es_search.aggs.bucket('nested_agg:%s' % name, 'nested', path=nested_key) + # check if quantity and agg type are compatible + if isinstance(agg, TermsAggregation): + if not quantity.aggregateable: + raise QueryValidationError( + 'the aggregation quantity cannot be terms aggregated', + loc=['aggregation', name, 'terms', 'quantity']) + else: + raise NotImplementedError() + + es_agg = None + if isinstance(agg, TermsAggregation): + if agg.pagination is not None: + if agg.size is not None: + raise QueryValidationError( + f'You cannot paginate and provide an extra size parameter.', + loc=['aggregations', name, 'terms', 'pagination']) + + order_quantity, page_after_value = validate_pagination( + agg.pagination, doc_type=doc_type, loc=['aggregation']) + + # We are using elastic searchs 'composite aggregations' here. We do not really + # compose aggregations, but only those pseudo composites allow us to use the + # 'after' feature that allows to scan through all aggregation values. + terms = A('terms', field=quantity.search_field, order=agg.pagination.order.value) + + if order_quantity is None: + composite = { + 'sources': { + name: terms + }, + 'size': agg.pagination.page_size + } -def _es_to_api_statistics( - es_response, name: str, statistic: Statistic, doc_type: DocumentType) -> StatisticResponse: - ''' - Creates a StatisticResponse from elasticsearch response on a request executed with - the given statistics. - ''' - quantity = validate_quantity(statistic.quantity, doc_type=doc_type) + else: + sort_terms = A( + 'terms', + field=order_quantity.search_field, + order=agg.pagination.order.value) + + composite = { + 'sources': [ + {order_quantity.search_field: sort_terms}, + {quantity.search_field: terms} + ], + 'size': agg.pagination.page_size + } + + if page_after_value is not None: + if order_quantity is None: + composite['after'] = {name: page_after_value} + else: + try: + order_value, quantity_value = page_after_value.split(':') + composite['after'] = {quantity.search_field: quantity_value, order_quantity.search_field: order_value} + except Exception: + raise QueryValidationError( + f'The pager_after_value has not the right format.', + loc=['aggregations', name, 'terms', 'pagination', 'page_after_value']) + + es_agg = es_aggs.bucket('agg:%s' % name, 'composite', **composite) + + # additional cardinality to get total + es_aggs.metric('agg:%s:total' % name, 'cardinality', field=quantity.search_field) + else: + if agg.size is None: + if quantity.default_aggregation_size is not None: + agg.size = quantity.default_aggregation_size - es_aggs = es_response.aggs - for nested_key in doc_type.nested_object_keys: - if statistic.quantity.startswith(nested_key): - es_aggs = es_response.aggs[f'nested_statistic:{name}'] + elif quantity.values is not None: + agg.size = len(quantity.values) - es_statistic = es_aggs['statistic:' + name] - statistic_data = {} - for bucket in es_statistic.buckets: - value_data = dict(entries=bucket.doc_count) - for metric in statistic.metrics: - value_data[metric] = bucket['metric:' + metric].value - statistic_data[bucket.key] = value_data + else: + agg.size = 10 - if quantity.values is not None: - for value in quantity.values: - if value not in statistic_data: - statistic_data[value] = dict(entries=0, **{ - metric: 0 for metric in statistic.metrics}) + terms_kwargs = {} + if agg.value_filter is not None: + terms_kwargs['include'] = '.*%s.*' % agg.value_filter - return StatisticResponse(data=statistic_data, **statistic.dict(by_alias=True)) + terms = A('terms', field=quantity.search_field, size=agg.size, **terms_kwargs) + es_agg = es_aggs.bucket('agg:%s' % name, terms) + if agg.entries is not None and agg.entries.size > 0: + kwargs: Dict[str, Any] = {} + if agg.entries.required is not None: + if agg.entries.required.include is not None: + kwargs.update(_source=dict(includes=agg.entries.required.include)) + else: + kwargs.update(_source=dict(excludes=agg.entries.required.exclude)) -def _api_to_es_aggregation( - es_search: Search, name: str, agg: Aggregation, doc_type: DocumentType) -> A: - ''' - Creates an ES aggregation based on the API's aggregation model. - ''' - order_quantity, page_after_value = validate_pagination( - agg.pagination, doc_type=doc_type, loc=['aggration']) + es_agg.metric('entries', A('top_hits', size=agg.entries.size, **kwargs)) - quantity = validate_quantity(agg.quantity, doc_type=doc_type, loc=['aggregation', 'quantity']) - if not quantity.aggregateable: - raise QueryValidationError( - 'the aggregation quantity cannot be aggregated', - - loc=['aggregation', 'quantity']) - - terms = A('terms', field=quantity.search_field, order=agg.pagination.order.value) - - # We are using elastic searchs 'composite aggregations' here. We do not really - # compose aggregations, but only those pseudo composites allow us to use the - # 'after' feature that allows to scan through all aggregation values. - if order_quantity is None: - composite = { - 'sources': { - name: terms - }, - 'size': agg.pagination.page_size - } else: - sort_terms = A('terms', field=order_quantity.search_field, order=agg.pagination.order.value) - composite = { - 'sources': [ - {order_quantity.search_field: sort_terms}, - {quantity.search_field: terms} - ], - 'size': agg.pagination.page_size - } - - if page_after_value is not None: - if order_quantity is None: - composite['after'] = {name: page_after_value} - else: - order_value, quantity_value = page_after_value.split(':') - composite['after'] = {quantity.search_field: quantity_value, order_quantity.search_field: order_value} - - aggs = es_search.aggs - for nested_key in doc_type.nested_object_keys: - if agg.quantity.startswith(nested_key): - aggs = es_search.aggs.bucket('nested_agg:%s' % name, 'nested', path=nested_key) - - composite_agg = aggs.bucket('agg:%s' % name, 'composite', **composite) - - if agg.entries is not None and agg.entries.size > 0: - kwargs: Dict[str, Any] = {} - if agg.entries.required is not None: - if agg.entries.required.include is not None: - kwargs.update(_source=dict(includes=agg.entries.required.include)) - else: - kwargs.update(_source=dict(excludes=agg.entries.required.exclude)) - - composite_agg.metric('entries', A('top_hits', size=agg.entries.size, **kwargs)) - - # additional cardinality to get total - aggs.metric('agg:%s:total' % name, 'cardinality', field=quantity.search_field) + raise NotImplementedError() + + if isinstance(agg, BucketAggregation): + for metric_name in agg.metrics: + metrics = doc_type.metrics + if nested_key == 'entries': + metrics = material_entry_type.metrics + if metric_name not in metrics: + raise QueryValidationError( + 'metric must be the qualified name of a suitable search quantity', + loc=['statistic', 'metrics']) + metric_aggregation, metric_quantity = metrics[metric_name] + es_agg.metric('metric:%s' % metric_name, A( + metric_aggregation, + field=metric_quantity.qualified_field)) def _es_to_api_aggregation( - es_response, name: str, agg: Aggregation, doc_type: DocumentType) -> AggregationResponse: + es_response, name: str, agg: AggregationBase, doc_type: DocumentType): ''' Creates a AggregationResponse from elasticsearch response on a request executed with the given aggregation. ''' - order_by = agg.pagination.order_by quantity = validate_quantity(agg.quantity, doc_type=doc_type) nested = False @@ -379,38 +354,72 @@ def _es_to_api_aggregation( es_agg = es_aggs['agg:' + name] - def get_entries(agg): - if 'entries' in agg: - if nested: - return [{nested_key: item['_source']} for item in agg.entries.hits.hits] + if isinstance(agg, TermsAggregation): + values = set() + + def get_bucket(es_bucket): + if agg.pagination is None: + value = es_bucket['key'] + elif agg.pagination.order_by is None: + value = es_bucket.key[name] else: - return [item['_source'] for item in agg.entries.hits.hits] + value = es_bucket.key[quantity.search_field] + + count = es_bucket.doc_count + metrics = {} + for metric in agg.metrics: + metrics[metric] = es_bucket['metric:' + metric].value + + entries = None + if 'entries' in es_bucket: + if nested: + entries = [{nested_key: item['_source']} for item in es_bucket.entries.hits.hits] + else: + entries = [item['_source'] for item in es_bucket.entries.hits.hits] + + values.add(value) + return TermsBucket(value=value, entries=entries, count=count, metrics=metrics) + + data = [get_bucket(es_bucket) for es_bucket in es_agg.buckets] + aggregation_dict = agg.dict(by_alias=True) + + if agg.pagination is None: + # fill "empty" values + if quantity.values is not None: + for value in quantity.values: + if value not in values: + data.append(TermsBucket( + value=value, count=0, + metrics={metric: 0 for metric in agg.metrics})) + else: - return None + total = es_aggs['agg:%s:total' % name]['value'] + pagination = PaginationResponse(total=total, **aggregation_dict['pagination']) + if pagination.page_after_value is not None and pagination.page_after_value.endswith(':'): + pagination.page_after_value = pagination.page_after_value[0:-1] + + if 'after_key' in es_agg: + after_key = es_agg['after_key'] + if pagination.order_by is None: + pagination.next_page_after_value = after_key[name] + else: + str_values = [str(v) for v in after_key.to_dict().values()] + pagination.next_page_after_value = ':'.join(str_values) + else: + pagination.next_page_after_value = None + + aggregation_dict['pagination'] = pagination - if agg.pagination.order_by is None: - agg_data = { - bucket.key[name]: AggregationDataItem(size=bucket.doc_count, data=get_entries(bucket)) - for bucket in es_agg.buckets} + return TermsAggregationResponse(data=data, **aggregation_dict) else: - agg_data = { - bucket.key[quantity.search_field]: AggregationDataItem(size=bucket.doc_count, data=get_entries(bucket)) - for bucket in es_agg.buckets} - - aggregation_dict = agg.dict(by_alias=True) - pagination = PaginationResponse( - total=es_aggs['agg:%s:total' % name]['value'], - **aggregation_dict.pop('pagination')) - - if 'after_key' in es_agg: - after_key = es_agg['after_key'] - if order_by is None: - pagination.next_page_after_value = after_key[name] - else: - str_values = [str(v) for v in after_key.to_dict().values()] - pagination.next_page_after_value = ':'.join(str_values) + raise NotImplementedError() - return AggregationResponse(data=agg_data, pagination=pagination, **aggregation_dict) + +def _specific_agg(agg: Aggregation) -> Union[TermsAggregation]: + if agg.terms is not None: + return agg.terms + + raise NotImplementedError() def search( @@ -419,7 +428,6 @@ def search( pagination: Pagination = None, required: MetadataRequired = None, aggregations: Dict[str, Aggregation] = {}, - statistics: Dict[str, Statistic] = {}, user_id: str = None, index: Index = entry_index) -> MetadataResponse: @@ -489,13 +497,9 @@ def search( search = search.source(includes=required.include, excludes=required.exclude) - # statistics - for name, statistic in statistics.items(): - _api_to_es_statistic(search, name, statistic, doc_type=doc_type) - # aggregations for name, agg in aggregations.items(): - _api_to_es_aggregation(search, name, agg, doc_type=doc_type) + _api_to_es_aggregation(search, name, _specific_agg(agg), doc_type=doc_type) # execute try: @@ -521,17 +525,11 @@ def search( next_page_after_value=next_page_after_value, **pagination.dict()) - # statistics - if len(statistics) > 0: - more_response_data['statistics'] = cast(Dict[str, Any], { - name: _es_to_api_statistics(es_response, name, statistic, doc_type=doc_type) - for name, statistic in statistics.items()}) - # aggregations if len(aggregations) > 0: more_response_data['aggregations'] = cast(Dict[str, Any], { - name: _es_to_api_aggregation(es_response, name, aggregation, doc_type=doc_type) - for name, aggregation in aggregations.items()}) + name: _es_to_api_aggregation(es_response, name, _specific_agg(agg), doc_type=doc_type) + for name, agg in aggregations.items()}) more_response_data['es_query'] = es_query.to_dict() diff --git a/tests/app/v1/routers/common.py b/tests/app/v1/routers/common.py index dd32eee2e8..765fba5d3f 100644 --- a/tests/app/v1/routers/common.py +++ b/tests/app/v1/routers/common.py @@ -22,7 +22,6 @@ import re from devtools import debug from urllib.parse import urlencode -from nomad.metainfo.elasticsearch_extension import DocumentType from nomad.datamodel import results from tests.utils import assert_at_least, assert_url_query_args @@ -153,37 +152,100 @@ def pagination_test_parameters(elements: str, n_elements: str, crystal_system: s ] -def aggregation_test_parameters(material_prefix: str, entry_prefix: str): - return [ - pytest.param({'quantity': f'{entry_prefix}upload_id', 'pagination': {'order_by': f'{entry_prefix}uploader.user_id'}}, 3, 3, 200, id='order-str'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'pagination': {'order_by': f'{entry_prefix}upload_time'}}, 3, 3, 200, id='order-date'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'pagination': {'order_by': f'{entry_prefix}results.properties.n_calculations'}}, 3, 3, 200, id='order-int'), - pytest.param({'quantity': f'{material_prefix}symmetry.structure_name'}, 0, 0, 200, id='no-results'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'pagination': {'page_after_value': 'id_published'}}, 3, 1, 200, id='after'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'pagination': {'order_by': f'{entry_prefix}uploader.name', 'page_after_value': 'Sheldon Cooper:id_published'}}, 3, 1, 200, id='after-order'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'entries': {'size': 10}}, 3, 3, 200, id='entries'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'entries': {'size': 1}}, 3, 3, 200, id='entries-size'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'entries': {'size': 0}}, -1, -1, 422, id='bad-entries'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'entries': {'size': 10, 'required': {'include': [f'{entry_prefix}entry_id', f'{entry_prefix}uploader.*']}}}, 3, 3, 200, id='entries-include') - ] - - -def statistic_test_parameters(entity_id: str, entry_prefix: str, total: int): - n_code_names = results.Simulation.program_name.a_elasticsearch.statistics_size +def aggregation_test_parameters(entity_id: str, material_prefix: str, entry_prefix: str, total: int): + n_code_names = results.Simulation.program_name.a_elasticsearch.default_aggregation_size program_name = f'{entry_prefix}results.method.simulation.program_name' return [ - pytest.param({'quantity': program_name}, n_code_names, 200, None, id='fixed-values'), - pytest.param({'quantity': program_name, 'metrics': ['uploads']}, n_code_names, 200, None, id='metrics'), - pytest.param({'quantity': program_name, 'metrics': ['does not exist']}, -1, 422, None, id='bad-metric'), - pytest.param({'quantity': entity_id, 'size': 1000}, total, 200, None, id='size-to-large'), - pytest.param({'quantity': entity_id, 'size': 5}, 5, 200, None, id='size'), - pytest.param({'quantity': entity_id, 'size': -1}, -1, 422, None, id='bad-size-1'), - pytest.param({'quantity': entity_id, 'size': 0}, -1, 422, None, id='bad-size-2'), - pytest.param({'quantity': entity_id}, 10 if total > 10 else total, 200, None, id='size-default'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'order': {'type': 'values'}}, 3, 200, 'test_user', id='order-type'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'order': {'direction': 'asc'}}, 3, 200, 'test_user', id='order-direction'), - pytest.param({'quantity': 'does not exist'}, -1, 422, None, id='bad-quantity') + pytest.param( + {'quantity': f'{entry_prefix}upload_id'}, + 3, 3, 200, 'test_user', id='default'), + pytest.param( + { + 'quantity': f'{entry_prefix}upload_id', + 'pagination': {'order_by': f'{entry_prefix}uploader.user_id'} + }, + 3, 3, 200, 'test_user', id='order-str'), + pytest.param( + { + 'quantity': f'{entry_prefix}upload_id', + 'pagination': {'order_by': f'{entry_prefix}upload_time'} + }, + 3, 3, 200, 'test_user', id='order-date'), + pytest.param( + { + 'quantity': f'{entry_prefix}upload_id', + 'pagination': {'order_by': f'{entry_prefix}results.properties.n_calculations'} + }, + 3, 3, 200, 'test_user', id='order-int'), + pytest.param( + {'quantity': f'{material_prefix}symmetry.structure_name'}, + 0, 0, 200, 'test_user', id='no-results'), + pytest.param( + { + 'quantity': f'{entry_prefix}upload_id', + 'pagination': {'page_after_value': 'id_published'} + }, + 3, 1, 200, 'test_user', id='after'), + pytest.param( + { + 'quantity': f'{entry_prefix}upload_id', + 'pagination': { + 'order_by': f'{entry_prefix}uploader.name', + 'page_after_value': 'Sheldon Cooper:id_published' + } + }, + 3, 1, 200, 'test_user', id='after-order'), + pytest.param( + {'quantity': f'{entry_prefix}upload_id', 'entries': {'size': 10}}, + 3, 3, 200, 'test_user', id='entries'), + pytest.param( + {'quantity': f'{entry_prefix}upload_id', 'entries': {'size': 1}}, + 3, 3, 200, 'test_user', id='entries-size'), + pytest.param( + {'quantity': f'{entry_prefix}upload_id', 'entries': {'size': 0}}, + -1, -1, 422, 'test_user', id='bad-entries'), + pytest.param( + { + 'quantity': f'{entry_prefix}upload_id', + 'entries': { + 'size': 10, + 'required': { + 'include': [f'{entry_prefix}entry_id', f'{entry_prefix}uploader.*'] + } + } + }, + 3, 3, 200, 'test_user', id='entries-include'), + pytest.param( + {'quantity': program_name}, + n_code_names, n_code_names, 200, None, id='fixed-values'), + pytest.param( + {'quantity': program_name, 'metrics': ['uploads']}, + n_code_names, n_code_names, 200, None, id='metrics'), + pytest.param( + {'quantity': program_name, 'metrics': ['does not exist']}, + -1, -1, 422, None, id='bad-metric'), + pytest.param( + {'quantity': entity_id, 'size': 1000}, + total, total, 200, None, id='size-to-large'), + pytest.param( + {'quantity': entity_id, 'size': 5}, + total, 5, 200, None, id='size'), + pytest.param( + {'quantity': entity_id, 'size': -1}, + -1, -1, 422, None, id='bad-size-1'), + pytest.param( + {'quantity': entity_id, 'size': 0}, + -1, -1, 422, None, id='bad-size-2'), + pytest.param( + {'quantity': entity_id}, + total, 10 if total > 10 else total, 200, None, id='size-default'), + pytest.param( + {'quantity': f'{entry_prefix}upload_id', 'pagination': {'order': 'asc'}}, + 3, 3, 200, 'test_user', id='order-direction'), + pytest.param( + {'quantity': 'does not exist'}, + -1, -1, 422, None, id='bad-quantity') ] @@ -244,46 +306,6 @@ def assert_metadata_response(response, status_code=None): return response_json -def assert_statistic(response_json, name, statistic, doc_type: DocumentType, size=-1): - assert 'statistics' in response_json - assert name in response_json['statistics'] - statistic_response = response_json['statistics'][name] - for key in ['data', 'size', 'order', 'quantity']: - assert key in statistic_response - - assert_at_least(statistic, statistic_response) - - default_size = doc_type.quantities[statistic['quantity']].statistics_size - assert statistic.get('size', default_size) >= len(statistic_response['data']) - - if size != -1: - assert len(statistic_response['data']) == size - - values = list(statistic_response['data'].keys()) - for index, value in enumerate(values): - data = statistic_response['data'][value] - assert 'entries' in data - for metric in statistic.get('metrics', []): - assert metric in data - - if index < len(values) - 1: - - def order_value(value, data): - if statistic_response['order']['type'] == 'entries': - return data['entries'] - else: - return value - - if statistic_response['order']['direction'] == 'asc': - assert order_value(value, data) <= order_value(values[index + 1], statistic_response['data'][values[index + 1]]) - else: - assert order_value(value, data) >= order_value(values[index + 1], statistic_response['data'][values[index + 1]]) - - if 'order' in statistic: - assert statistic_response['order']['type'] == statistic['order'].get('type', 'entries') - assert statistic_response['order']['direction'] == statistic['order'].get('direction', 'desc') - - def assert_required(data, required, default_key: str): # We flat out all keys in data and then make sure that the full qualified keys in the # data are consistent with the keys given in the required include and exclude. @@ -323,44 +345,39 @@ def assert_required(data, required, default_key: str): assert found_exclude is None, f'{exclude} excluded but found {found_exclude}' -def assert_aggregations(response_json, name, agg, total: int, size: int, default_key: str): +def assert_aggregations( + response_json, name, agg, + total: int = -1, size: int = -1, default_key: str = None): assert 'aggregations' in response_json assert name in response_json['aggregations'] agg_response = response_json['aggregations'][name] - for key in ['data', 'pagination', 'quantity']: + for key in ['data', 'quantity']: assert key in agg_response assert_at_least(agg, agg_response) - n_data = len(agg_response['data']) - assert agg.get('pagination', {}).get('page_size', 10) >= n_data - assert agg_response['pagination']['total'] >= n_data - for item in agg_response['data'].values(): - for key in ['size']: - assert key in item - assert item['size'] > 0 - if size >= 0: - assert n_data == size - if total >= 0: - assert agg_response['pagination']['total'] == total - - if 'entries' in agg: - agg_data = [item['data'][0] for item in agg_response['data'].values()] - else: - agg_data = [{agg['quantity']: value} for value in agg_response['data']] + buckets = agg_response['data'] + n_data = len(buckets) if 'pagination' in agg: - assert_pagination(agg['pagination'], agg_response['pagination'], agg_data, is_get=False) + assert agg_response['pagination']['total'] >= n_data + if size >= 0: + assert n_data == size + if total >= 0: + assert agg_response['pagination']['total'] == total + + assert_pagination(agg.get('pagination', {}), agg_response['pagination'], buckets, is_get=False) else: - assert_pagination({}, agg_response['pagination'], agg_data, order_by=agg['quantity'], is_get=False) + assert total == -1 or total >= n_data + assert size == -1 or size == n_data if 'entries' in agg: - for item in agg_response['data'].values(): - assert 'data' in item - assert agg['entries'].get(size, 10) >= len(item['data']) > 0 + for bucket in agg_response['data']: + assert 'entries' in bucket + assert agg['entries'].get('size', 10) >= len(bucket['entries']) > 0 if 'required' in agg['entries']: - for entry in item['data']: + for entry in bucket['entries']: assert_required(entry, agg['entries']['required'], default_key=default_key) diff --git a/tests/app/v1/routers/test_entries.py b/tests/app/v1/routers/test_entries.py index 85fd89577c..0f4ae6de16 100644 --- a/tests/app/v1/routers/test_entries.py +++ b/tests/app/v1/routers/test_entries.py @@ -30,10 +30,10 @@ from tests.test_files import example_mainfile_contents, append_raw_files # pyli from .common import ( assert_response, assert_base_metadata_response, assert_metadata_response, - assert_statistic, assert_required, assert_aggregations, assert_pagination, + assert_required, assert_aggregations, assert_pagination, perform_metadata_test, post_query_test_parameters, get_query_test_parameters, perform_owner_test, owner_test_parameters, pagination_test_parameters, - aggregation_test_parameters, statistic_test_parameters) + aggregation_test_parameters) from ..conftest import example_data as data # pylint: disable=unused-import ''' @@ -308,59 +308,50 @@ def assert_archive(archive, required=None): assert key in archive -n_code_names = results.Simulation.program_name.a_elasticsearch.statistics_size +n_code_names = results.Simulation.program_name.a_elasticsearch.default_aggregation_size program_name = 'results.method.simulation.program_name' -@pytest.mark.parametrize( - 'statistic, size, status_code, user', - statistic_test_parameters(entity_id='entry_id', entry_prefix='', total=23) + [ - pytest.param({'quantity': 'entry_id', 'value_filter': '_0'}, 9, 200, None, id='filter'), - pytest.param({'quantity': 'entry_id', 'value_filter': '.*_0.*'}, -1, 422, None, id='bad-filter')]) -def test_entries_statistics(client, data, test_user_auth, statistic, size, status_code, user): - statistics = {'test_statistic': statistic} - headers = {} - if user == 'test_user': - headers = test_user_auth - - response_json = perform_entries_metadata_test( - client, headers=headers, owner='visible', statistics=statistics, - status_code=status_code, http_method='post') - - if response_json is None: - return - - assert_statistic(response_json, 'test_statistic', statistic, size=size, doc_type=entry_type) - - -# TODO is this really the desired behavior -def test_entries_statistics_ignore_size(client, data): - statistic = {'quantity': program_name, 'size': 10} - statistics = {'test_statistic': statistic} - response_json = perform_entries_metadata_test( - client, statistics=statistics, status_code=200, http_method='post') - statistic.update(size=n_code_names) - assert_statistic(response_json, 'test_statistic', statistic, size=n_code_names, doc_type=entry_type) - - def test_entries_all_statistics(client, data): - statistics = { - quantity: {'quantity': quantity, 'metrics': [metric for metric in entry_type.metrics]} + aggregations = { + quantity: { + 'terms': { + 'quantity': quantity, 'metrics': [metric for metric in entry_type.metrics] + } + } for quantity in entry_type.quantities if entry_type.quantities[quantity].aggregateable} response_json = perform_entries_metadata_test( - client, statistics=statistics, status_code=200, http_method='post') - for name, statistic in statistics.items(): - assert_statistic(response_json, name, statistic, doc_type=entry_type) + client, aggregations=aggregations, status_code=200, http_method='post') + for name, agg in aggregations.items(): + assert_aggregations(response_json, name, agg['terms']) @pytest.mark.parametrize( - 'aggregation, total, size, status_code', - aggregation_test_parameters(material_prefix='results.material.', entry_prefix='') + [ - pytest.param({'quantity': 'upload_id', 'entries': {'size': 10, 'required': {'exclude': ['files', 'mainfile']}}}, 3, 3, 200, id='entries-exclude') + 'aggregation, total, size, status_code, user', + aggregation_test_parameters(entity_id='entry_id', material_prefix='results.material.', entry_prefix='', total=23) + [ + pytest.param( + { + 'quantity': 'upload_id', + 'entries': { + 'size': 10, + 'required': {'exclude': ['files', 'mainfile']} + } + }, + 3, 3, 200, 'test_user', id='entries-exclude'), + pytest.param( + {'quantity': 'entry_id', 'value_filter': '_0'}, + 9, 9, 200, None, id='filter'), + pytest.param( + {'quantity': 'entry_id', 'value_filter': '.*_0.*'}, + -1, -1, 422, None, id='bad-filter') ]) -def test_entries_aggregations(client, data, test_user_auth, aggregation, total, size, status_code): - headers = test_user_auth - aggregations = {'test_agg_name': aggregation} +def test_entries_aggregations(client, data, test_user_auth, aggregation, total, size, status_code, user): + headers = {} + if user == 'test_user': + headers = test_user_auth + + aggregations = {'test_agg_name': {'terms': aggregation}} + response_json = perform_entries_metadata_test( client, headers=headers, owner='visible', aggregations=aggregations, pagination=dict(page_size=0), @@ -369,7 +360,9 @@ def test_entries_aggregations(client, data, test_user_auth, aggregation, total, if response_json is None: return - assert_aggregations(response_json, 'test_agg_name', aggregation, total=total, size=size, default_key='entry_id') + assert_aggregations( + response_json, 'test_agg_name', aggregation, total=total, size=size, + default_key='entry_id') @pytest.mark.parametrize('required, status_code', [ diff --git a/tests/app/v1/routers/test_materials.py b/tests/app/v1/routers/test_materials.py index c64544ba4b..4f2374401c 100644 --- a/tests/app/v1/routers/test_materials.py +++ b/tests/app/v1/routers/test_materials.py @@ -19,16 +19,15 @@ import pytest from urllib.parse import urlencode -from nomad.metainfo.elasticsearch_extension import material_entry_type, material_type +from nomad.metainfo.elasticsearch_extension import material_entry_type from tests.test_files import example_mainfile_contents # pylint: disable=unused-import from .common import ( assert_pagination, assert_metadata_response, assert_required, assert_aggregations, - assert_statistic, perform_metadata_test, perform_owner_test, owner_test_parameters, post_query_test_parameters, get_query_test_parameters, pagination_test_parameters, - aggregation_test_parameters, statistic_test_parameters) + aggregation_test_parameters) from ..conftest import example_data as data # pylint: disable=unused-import ''' @@ -50,32 +49,17 @@ def perform_materials_metadata_test(*args, **kwargs): program_name = 'entries.results.method.simulation.program_name' -# # TODO is this really the desired behavior -# def test_entries_statistics_ignore_size(client, data): -# statistic = {'quantity': program_name, 'size': 10} -# statistics = {'test_statistic': statistic} -# response_json = perform_materials_metadata_test( -# client, statistics=statistics, status_code=200, http_method='post') -# statistic.update(size=n_code_names) -# assert_statistic(response_json, 'test_statistic', statistic, size=n_code_names) - - -# def test_entries_all_statistics(client, data): -# statistics = { -# quantity: {'quantity': quantity, 'metrics': [metric for metric in entry_type.metrics]} -# for quantity in entry_type.quantities if entry_type.quantities[quantity].aggregateable} -# response_json = perform_materials_metadata_test( -# client, statistics=statistics, status_code=200, http_method='post') -# for name, statistic in statistics.items(): -# assert_statistic(response_json, name, statistic) +@pytest.mark.parametrize( + 'aggregation, total, size, status_code, user', + aggregation_test_parameters( + entity_id='material_id', material_prefix='', entry_prefix='entries.', total=6)) +def test_materials_aggregations(client, data, test_user_auth, aggregation, total, size, status_code, user): + headers = {} + if user == 'test_user': + headers = test_user_auth + aggregations = {'test_agg_name': {'terms': aggregation}} -@pytest.mark.parametrize( - 'aggregation, total, size, status_code', - aggregation_test_parameters(material_prefix='', entry_prefix='entries.')) -def test_materials_aggregations(client, data, test_user_auth, aggregation, total, size, status_code): - headers = test_user_auth - aggregations = {'test_agg_name': aggregation} response_json = perform_materials_metadata_test( client, headers=headers, owner='visible', aggregations=aggregations, pagination=dict(page_size=0), @@ -85,30 +69,8 @@ def test_materials_aggregations(client, data, test_user_auth, aggregation, total return assert_aggregations( - response_json, 'test_agg_name', aggregation, total=total, size=size, default_key='material_id') - - -@pytest.mark.parametrize( - 'statistic, size, status_code, user', - statistic_test_parameters(entity_id='material_id', entry_prefix='entries.', total=6)) -def test_materials_statistics(client, data, test_user_auth, statistic, size, status_code, user): - statistics = {'test_statistic': statistic} - headers = {} - if user == 'test_user': - headers = test_user_auth - - response_json = perform_materials_metadata_test( - client, headers=headers, owner='visible', statistics=statistics, - pagination=dict(page_size=0), status_code=status_code, http_method='post') - - if response_json is None: - return - - if statistic['quantity'].startswith('entries.'): - doc_type = material_entry_type - else: - doc_type = material_type - assert_statistic(response_json, 'test_statistic', statistic, size=size, doc_type=doc_type) + response_json, 'test_agg_name', aggregation, total=total, size=size, + default_key='material_id') @pytest.mark.parametrize('required, status_code', [ diff --git a/tests/utils.py b/tests/utils.py index 0bd1aeeaee..30d5bbacbe 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -236,13 +236,19 @@ class ExampleData: if entry_metadata is None: entry_metadata = entry_archive.m_create(EntryMetadata) + upload_time = None + if upload_id in self.uploads: + upload_time = self.uploads[upload_id].get('upload_time') + if upload_time is None: + upload_time = self._next_time_stamp() + entry_metadata.m_update( calc_id=entry_id, upload_id=upload_id, mainfile=mainfile, calc_hash='dummy_hash_' + entry_id, domain='dft', - upload_time=self._next_time_stamp(), + upload_time=upload_time, published=True, processed=True, with_embargo=False, -- GitLab