diff --git a/nomad/app/v1/models.py b/nomad/app/v1/models.py index 49fb20a5309dcd68608c82941d004b5ee4ffbbf7..40de3ec00602bbf57ee0fbdda32aad10cc5fa0d9 100644 --- a/nomad/app/v1/models.py +++ b/nomad/app/v1/models.py @@ -41,15 +41,6 @@ Value = Union[bool, int, float, datetime.datetime, str] ComparableValue = Union[int, float, datetime.datetime, str] -class AggregationOrderType(str, enum.Enum): - ''' - Allows to order statistics or aggregations by either quantity values (`values`) or number - of entries (`entries`). - ''' - values = 'values' - entries = 'entries' - - class HTTPExceptionModel(BaseModel): detail: str @@ -665,8 +656,18 @@ class AggregationPagination(MetadataBasedPagination): order_by: Optional[str] = Field( None, # type: ignore description=strip(''' - The results are ordered by the values of this field. If omitted, default - ordering is applied. + Either the string "count", "value", or the name of a quantity. If omitted the buckets + will be ordered by the item "count". + + If you provide a quantity, all items + in a bucket must have the same value for this quantity. For example, aggregating + entries on `upload_id` and ordering with the buckets by `upload_time` is fine, + because all entries in an upload have the same `upload_time`. The API cannot + check this rule and the results will be unpredictable. + + If you want to order by the bucket values, you can either use "value" or use + the aggregation quantity to `order_by`. The result will be the same, because + the bucket values are the quantity values. ''')) @validator('page') @@ -674,11 +675,16 @@ class AggregationPagination(MetadataBasedPagination): assert page is None, 'Pagination by `page` is not possible for aggregations, use `page_after_value`' return page + @validator('page_size') + def validate_page_size(cls, page_size, values): # pylint: disable=no-self-argument + assert page_size > 0, '0 or smaller page sizes are not allowed for aggregations.' + return page_size + class AggregatedEntities(BaseModel): size: Optional[pydantic.conint(gt=0)] = Field( # type: ignore 1, description=strip(''' - The maximum number of entries that should be returned for each value in the + The number of entries that should be returned for each value in the aggregation. ''')) required: Optional[MetadataRequired] = Field( @@ -687,73 +693,69 @@ class AggregatedEntities(BaseModel): ''')) -class Aggregation(BaseModel): +class AggregationBase(BaseModel): quantity: str = Field( ..., description=strip(''' The manatory name of the quantity for the aggregation. Aggregations can only be computed for those search metadata that have discrete values; an aggregation buckets entries that have the same value for this quantity.''')) - pagination: Optional[AggregationPagination] = Field( - AggregationPagination(), description=strip(''' - Only the data few values are returned for each API call. Pagination allows to - get the next set of values based on the last value in the last call. - ''')) - entries: Optional[AggregatedEntities] = Field( - None, description=strip(''' - Optionally, a set of entries can be returned for each value. - ''')) - - -class StatisticsOrder(BaseModel): - type_: Optional[AggregationOrderType] = Field(AggregationOrderType.entries, alias='type') - direction: Optional[Direction] = Field(Direction.desc) -class Statistic(BaseModel): - quantity: str = Field( - ..., description=strip(''' - The manatory name of the quantity that the statistic is calculated for. Statistics - can only be computed for those search metadata that have discrete values; a statistics - aggregates a certain metric (e.g. the number of entries) over all entries were - this quantity has the same value (bucket aggregation, think historgam here). - - There is one except and these are date/time values quantities (most notably `upload_time`). - Here each statistic value represents an time interval. The interval can - be determined via `datetime_interval`.''')) +class BucketAggregation(AggregationBase): metrics: Optional[List[str]] = Field( [], description=strip(''' - By default the returned statistics will provide the number of entries for each + By default the returned aggregations will provide the number of entries for each value. You can add more metrics. For each metric an additional number will be provided for each value. Metrics are also based on search metadata. Depending on the metric the number will represent either a sum (`calculations` for the number of individual calculation in each code run) or an amount of different values (i.e. `materials` for the amount of different material hashes).''')) - datetime_interval: Optional[pydantic.conint(gt=0)] = Field( # type: ignore + + +class TermsAggregation(BucketAggregation): + pagination: Optional[AggregationPagination] = Field( None, description=strip(''' - While statistics in general are only possible for quantities with discrete values, - these is one exception. These are date/time values quantities (most notably `upload_time`). - Here each statistic value represents an time interval. + Only the data few values are returned for each API call. Aggregation + pagination allows to get all available values by pagination. It also allows to + order values. - A date/time interval is a number of seconds greater than 0. This will only be used for - date/time valued quantities (e.g. `upload_time`). + You can only use pagination (to page through all available values) or size (to + get the size number of values with the most available data). + ''')) + size: Optional[pydantic.conint(gt=0)] = Field( # type: ignore + None, description=strip(''' + Only the data few values are returned for each API call. Pagination allows to + get the next set of values based on the last value in the last call. ''')) value_filter: Optional[pydantic.constr(regex=r'^[a-zA-Z0-9_\-\s]+$')] = Field( # type: ignore None, description=strip(''' An optional filter for values. Only values that contain the filter as substring will be part of the statistics. + + This is only available for non paginated aggregations. ''')) - size: Optional[pydantic.conint(gt=0)] = Field( # type: ignore + entries: Optional[AggregatedEntities] = Field( None, description=strip(''' - An optional maximum size of values in the statistics. The default depends on the - quantity. - ''')) - order: Optional[StatisticsOrder] = Field( - StatisticsOrder(), description=strip(''' - The values in the statistics are either ordered by the entry count or by the - natural ordering of the values. + Optionally, a set of entries can be returned for each value. These are basically + example entries that have the respective bucket value. ''')) +class HistogramAggregation(BucketAggregation): + interval: pydantic.conint(gt=0) # type: ignore + + +class MinMaxAggregation(AggregationBase): + pass + + +class Aggregation(BaseModel): + terms: Optional[TermsAggregation] + histogram: Optional[HistogramAggregation] + date_histogram: Optional[HistogramAggregation] + min_max: Optional[MinMaxAggregation] + + class WithQueryAndPagination(WithQuery): pagination: Optional[MetadataPagination] = Body( None, @@ -769,41 +771,74 @@ class Metadata(WithQueryAndPagination): example={ 'include': ['entry_id', 'mainfile', 'upload_id', 'authors', 'upload_time'] }) - statistics: Optional[Dict[str, Statistic]] = Body( - {}, - description=strip(''' - This allows to define additional statistics that should be returned. - Statistics aggregate entries that show the same quantity values for a given quantity. - A simple example is the number of entries for each `dft.code_name`. These statistics - will be computed only over the query results. This allows to get an overview about - query results. - '''), - example={ - 'by_code_name': { - 'metrics': ['uploads', 'datasets'], - 'quantity': 'dft.code_name' - } - }) aggregations: Optional[Dict[str, Aggregation]] = Body( {}, example={ - 'uploads': { - 'quantity': 'upload_id', - 'pagination': { - 'page_size': 10, - 'order_by': 'upload_time' + 'all_codes': { + 'terms': { + 'quantity': 'results.method.simulation.program_name', + 'entries': { + 'size': 1, + 'required': { + 'include': ['mainfile'] + } + } }, - 'entries': { - 'size': 1, - 'required': { - 'include': ['mainfile'] + }, + 'all_datasets': { + 'terms': { + 'quantity': 'datasets', + 'pagination': { + 'page_size': 100, + 'page_after_value': '<the next_pager_after_value from the last request>' } } } }, description=strip(''' - Defines additional aggregations to return. An aggregation list entries - for the values of a quantity, e.g. to get all uploads and their entries. + Defines additional aggregations to return. There are different types of + aggregations. + + A `terms` aggregation allows to get the values of a quantity that occur in + the search query result data. For each value, a bucket is created with + information about how many entries have the value (or additional metrics). + For example to get all entries that use a certain code, you can use: + ```json + { + "aggregations": { + "all_codes": { + "terms": { + "quantity": "results.method.simulation.program_name" + } + } + } + } + ``` + + Terms aggregations can also be used to paginate though all values of a certain + quantities. Each page will be companied with a `page_after_value` that + can be used to retrieve the next value. For example to go through all datasets + available in the search query: + ```json + { + "aggregations": { + "all_datasets": { + "terms": { + "quantity": "datasets", + "pagination": { + "page_size": 100, + "page_after_value": "<the next_pager_after_value from the last request>" + } + } + } + } + } + ``` + + Other aggregation types are `histogram` and `minmax` (comming soon). + + Multiple aggregations can be used by using different user defined names + (`all_codes`, `all_datasets`). ''')) @@ -860,32 +895,44 @@ files_parameters = parameter_dependency_from_model( 'files_parameters', Files) -class StatisticResponse(Statistic): - data: Dict[str, Dict[str, int]] = Field( - None, description=strip(''' - The returned statistics data as dictionary. The key is a string representation of the values. - The concrete type depends on the quantity that was used to create the statistics. - Each dictionary value is a dictionary itself. The keys are the metric names the - values the metric values. The key `entries` that gives the amount of entries with - this value is always returned.''')) - - -class AggregationDataItem(BaseModel): - data: Optional[List[Dict[str, Any]]] = Field( +class Bucket(BaseModel): + entries: Optional[List[Dict[str, Any]]] = Field( None, description=strip('''The entries that were requested for each value.''')) - size: int = Field( + count: int = Field( None, description=strip('''The amount of entries with this value.''')) + metrics: Optional[Dict[str, int]] + +class HistogramBucket(Bucket): + value: float + start: float + end: float -class AggregationResponse(Aggregation): - pagination: PaginationResponse # type: ignore - data: Dict[str, AggregationDataItem] = Field( + +class TermsBucket(Bucket): + value: str + + +class BucketAggregationResponse(BaseModel): + data: List[TermsBucket] = Field( None, description=strip(''' The aggregation data as a dictionary. The key is a string representation of the values. The dictionary values contain the aggregated data depending if `entries` where requested.''')) +class TermsAggregationResponse(TermsAggregation, BucketAggregationResponse): + pagination: Optional[PaginationResponse] # type: ignore + + +class HistogramAggregationResponse(HistogramAggregation, BucketAggregationResponse): + pass + + +class MixMaxAggregationResponse(MinMaxAggregation): + data: List[float] + + class CodeResponse(BaseModel): curl: str requests: str @@ -894,8 +941,7 @@ class CodeResponse(BaseModel): class MetadataResponse(Metadata): pagination: PaginationResponse = None # type: ignore - statistics: Optional[Dict[str, StatisticResponse]] # type: ignore - aggregations: Optional[Dict[str, AggregationResponse]] # type: ignore + aggregations: Optional[Dict[str, Union[TermsAggregationResponse]]] # type: ignore data: List[Dict[str, Any]] = Field( None, description=strip(''' diff --git a/nomad/app/v1/routers/entries.py b/nomad/app/v1/routers/entries.py index da47c0d7341da238402ad5dc1f5e99eb13eef8c3..cd372d5be157395b0da16e65e9eff45bde6255b3 100644 --- a/nomad/app/v1/routers/entries.py +++ b/nomad/app/v1/routers/entries.py @@ -318,7 +318,6 @@ async def post_entries_metadata_query( query=data.query, pagination=data.pagination, required=data.required, - statistics=data.statistics, aggregations=data.aggregations, user_id=user.user_id if user is not None else None) diff --git a/nomad/app/v1/routers/materials.py b/nomad/app/v1/routers/materials.py index 6b9c2c252275e47bc798d373b368e34207c9f906..7f5effef47cf366204c7f9ad45346553cfd2258d 100644 --- a/nomad/app/v1/routers/materials.py +++ b/nomad/app/v1/routers/materials.py @@ -111,7 +111,6 @@ async def post_entries_metadata_query( query=data.query, pagination=data.pagination, required=data.required, - statistics=data.statistics, aggregations=data.aggregations, user_id=user.user_id if user is not None else None) diff --git a/nomad/datamodel/results.py b/nomad/datamodel/results.py index 66d3afdd7d7b45a2d17b34e2f8cf510eda0b8aa2..556b640d8e27ae1624b559121bea51a9934f6b5a 100644 --- a/nomad/datamodel/results.py +++ b/nomad/datamodel/results.py @@ -530,7 +530,7 @@ class Material(MSection): description=""" Classification based on the functional properties. """, - a_elasticsearch=Elasticsearch(material_type, statistics_size=20) + a_elasticsearch=Elasticsearch(material_type, default_aggregation_size=20) ) type_compound = Quantity( type=str, @@ -538,7 +538,7 @@ class Material(MSection): description=""" Classification based on the chemical formula. """, - a_elasticsearch=Elasticsearch(material_type, statistics_size=20) + a_elasticsearch=Elasticsearch(material_type, default_aggregation_size=20) ) elements = Quantity( type=MEnum(chemical_symbols), @@ -647,7 +647,7 @@ class DFT(MSection): type=MEnum(list(xc_treatments.values()) + [unavailable, not_processed]), default=not_processed, description="The libXC based xc functional classification used in the simulation.", - a_elasticsearch=Elasticsearch(material_entry_type, statistics_size=100) + a_elasticsearch=Elasticsearch(material_entry_type, default_aggregation_size=100) ) xc_functional_names = Quantity( type=str, diff --git a/nomad/metainfo/elasticsearch_extension.py b/nomad/metainfo/elasticsearch_extension.py index c333fac316c583973eec2c690d187565e9a24167..7f2dd48b84c311fc10fa4839a7bcdf7444c43a95 100644 --- a/nomad/metainfo/elasticsearch_extension.py +++ b/nomad/metainfo/elasticsearch_extension.py @@ -496,9 +496,9 @@ class Elasticsearch(DefinitionAnnotation): will only return values that exist in the search results. This allows to create 0 statistic values and return consistent statistics. If the underlying quantity is an Enum, the values are determined automatically. - statistics_size: - The maximum number of values in a statistic. Default is 10 or the length of - values. + default_aggregation_size: + The of values to return by default if this quantity is used in aggregation. + If no value is given and there are not fixed value, 10 will be used. metrics: If the quantity is used as a metric for aggregating statistics, this has to be used to define a valid elasticsearch metrics aggregations, e.g. @@ -533,7 +533,7 @@ class Elasticsearch(DefinitionAnnotation): field: str = None, es_field: str = None, value: Callable[[MSection], Any] = None, index: bool = True, - values: List[str] = None, statistics_size: int = None, + values: List[str] = None, default_aggregation_size: int = None, metrics: Dict[str, str] = None, many_all: bool = False, auto_include_subsections: bool = False, @@ -552,15 +552,15 @@ class Elasticsearch(DefinitionAnnotation): self._mapping: Dict[str, Any] = None self.values = values - self.statistics_size = statistics_size + self.default_aggregation_size = default_aggregation_size self.metrics = metrics self.many_all = many_all self.auto_include_subsections = auto_include_subsections self.nested = nested - if self.statistics_size is None: - self.statistics_size = len(self.values) if values is not None else 10 + if self.values is not None: + self.default_aggregation_size = len(self.values) @property def mapping(self) -> Dict[str, Any]: diff --git a/nomad/parsing/parsers.py b/nomad/parsing/parsers.py index 0dc0626bc19c73716bf1bbac270c9a009250918f..9d59174bb0e955b0976e3c7fea6f103331e2378a 100644 --- a/nomad/parsing/parsers.py +++ b/nomad/parsing/parsers.py @@ -402,5 +402,5 @@ datamodel.DFTMetadata.code_name.a_search.statistic_values = code_names + [ config.services.unavailable_value, config.services.not_processed_value] results.Simulation.program_name.a_elasticsearch.values = code_names + [ config.services.unavailable_value, config.services.not_processed_value] -results.Simulation.program_name.a_elasticsearch.statistics_size = len( +results.Simulation.program_name.a_elasticsearch.default_aggregation_size = len( results.Simulation.program_name.a_elasticsearch.values) diff --git a/nomad/search/v1.py b/nomad/search/v1.py index 25061d888be1e616d30cb9cead4bed0379c85273..4f97983d5ca4becfb9e5e33fbfc2262c2a4a3c07 100644 --- a/nomad/search/v1.py +++ b/nomad/search/v1.py @@ -28,8 +28,8 @@ from nomad.metainfo.elasticsearch_extension import ( from nomad.app.v1 import models as api_models from nomad.app.v1.models import ( Pagination, PaginationResponse, Query, MetadataRequired, MetadataResponse, Aggregation, - Statistic, StatisticResponse, AggregationOrderType, AggregationResponse, AggregationDataItem, - Value) + Value, AggregationBase, TermsAggregation, BucketAggregation, + TermsAggregationResponse, TermsBucket) from .common import SearchError, _es_to_entry_dict, _owner_es_query @@ -216,158 +216,133 @@ def validate_pagination(pagination: Pagination, doc_type: DocumentType, loc: Lis return order_quantity, page_after_value -def _api_to_es_statistic( - es_search: Search, name: str, statistic: Statistic, doc_type: DocumentType) -> A: +def _api_to_es_aggregation( + es_search: Search, name: str, agg: AggregationBase, doc_type: DocumentType) -> A: ''' - Creates an ES aggregation based on the API's statistic model. + Creates an ES aggregation based on the API's aggregation model. ''' - quantity = validate_quantity(statistic.quantity, loc=['statistic', 'quantity'], doc_type=doc_type) - - if not quantity.aggregateable: - raise QueryValidationError( - 'the statistic quantity cannot be aggregated', - loc=['statistic', 'quantity']) - - if statistic.size is None: - statistic.size = quantity.statistics_size - - if quantity.values is not None: - statistic.size = len(quantity.values) - - terms_kwargs = {} - if statistic.value_filter is not None: - terms_kwargs['include'] = '.*%s.*' % statistic.value_filter - - aggs = es_search.aggs + quantity = validate_quantity(agg.quantity, doc_type=doc_type, loc=['aggregation', 'quantity']) + es_aggs = es_search.aggs for nested_key in doc_type.nested_object_keys: - if statistic.quantity.startswith(nested_key): - aggs = es_search.aggs.bucket('nested_statistic:%s' % name, 'nested', path=nested_key) - - order_type = '_count' if statistic.order.type_ == AggregationOrderType.entries else '_key' - statistic_agg = aggs.bucket('statistic:%s' % name, A( - 'terms', - field=quantity.search_field, - size=statistic.size, - order={order_type: statistic.order.direction.value}, - **terms_kwargs)) - - for metric_name in statistic.metrics: - metrics = doc_type.metrics - if nested_key == 'entries': - metrics = material_entry_type.metrics - if metric_name not in metrics: - raise QueryValidationError( - 'metric must be the qualified name of a suitable search quantity', - loc=['statistic', 'metrics']) - metric_aggregation, metric_quantity = metrics[metric_name] - statistic_agg.metric('metric:%s' % metric_name, A( - metric_aggregation, - field=metric_quantity.qualified_field)) + if agg.quantity.startswith(nested_key): + es_aggs = es_search.aggs.bucket('nested_agg:%s' % name, 'nested', path=nested_key) + # check if quantity and agg type are compatible + if isinstance(agg, TermsAggregation): + if not quantity.aggregateable: + raise QueryValidationError( + 'the aggregation quantity cannot be terms aggregated', + loc=['aggregation', name, 'terms', 'quantity']) + else: + raise NotImplementedError() + + es_agg = None + if isinstance(agg, TermsAggregation): + if agg.pagination is not None: + if agg.size is not None: + raise QueryValidationError( + f'You cannot paginate and provide an extra size parameter.', + loc=['aggregations', name, 'terms', 'pagination']) + + order_quantity, page_after_value = validate_pagination( + agg.pagination, doc_type=doc_type, loc=['aggregation']) + + # We are using elastic searchs 'composite aggregations' here. We do not really + # compose aggregations, but only those pseudo composites allow us to use the + # 'after' feature that allows to scan through all aggregation values. + terms = A('terms', field=quantity.search_field, order=agg.pagination.order.value) + + if order_quantity is None: + composite = { + 'sources': { + name: terms + }, + 'size': agg.pagination.page_size + } -def _es_to_api_statistics( - es_response, name: str, statistic: Statistic, doc_type: DocumentType) -> StatisticResponse: - ''' - Creates a StatisticResponse from elasticsearch response on a request executed with - the given statistics. - ''' - quantity = validate_quantity(statistic.quantity, doc_type=doc_type) + else: + sort_terms = A( + 'terms', + field=order_quantity.search_field, + order=agg.pagination.order.value) + + composite = { + 'sources': [ + {order_quantity.search_field: sort_terms}, + {quantity.search_field: terms} + ], + 'size': agg.pagination.page_size + } + + if page_after_value is not None: + if order_quantity is None: + composite['after'] = {name: page_after_value} + else: + try: + order_value, quantity_value = page_after_value.split(':') + composite['after'] = {quantity.search_field: quantity_value, order_quantity.search_field: order_value} + except Exception: + raise QueryValidationError( + f'The pager_after_value has not the right format.', + loc=['aggregations', name, 'terms', 'pagination', 'page_after_value']) + + es_agg = es_aggs.bucket('agg:%s' % name, 'composite', **composite) + + # additional cardinality to get total + es_aggs.metric('agg:%s:total' % name, 'cardinality', field=quantity.search_field) + else: + if agg.size is None: + if quantity.default_aggregation_size is not None: + agg.size = quantity.default_aggregation_size - es_aggs = es_response.aggs - for nested_key in doc_type.nested_object_keys: - if statistic.quantity.startswith(nested_key): - es_aggs = es_response.aggs[f'nested_statistic:{name}'] + elif quantity.values is not None: + agg.size = len(quantity.values) - es_statistic = es_aggs['statistic:' + name] - statistic_data = {} - for bucket in es_statistic.buckets: - value_data = dict(entries=bucket.doc_count) - for metric in statistic.metrics: - value_data[metric] = bucket['metric:' + metric].value - statistic_data[bucket.key] = value_data + else: + agg.size = 10 - if quantity.values is not None: - for value in quantity.values: - if value not in statistic_data: - statistic_data[value] = dict(entries=0, **{ - metric: 0 for metric in statistic.metrics}) + terms_kwargs = {} + if agg.value_filter is not None: + terms_kwargs['include'] = '.*%s.*' % agg.value_filter - return StatisticResponse(data=statistic_data, **statistic.dict(by_alias=True)) + terms = A('terms', field=quantity.search_field, size=agg.size, **terms_kwargs) + es_agg = es_aggs.bucket('agg:%s' % name, terms) + if agg.entries is not None and agg.entries.size > 0: + kwargs: Dict[str, Any] = {} + if agg.entries.required is not None: + if agg.entries.required.include is not None: + kwargs.update(_source=dict(includes=agg.entries.required.include)) + else: + kwargs.update(_source=dict(excludes=agg.entries.required.exclude)) -def _api_to_es_aggregation( - es_search: Search, name: str, agg: Aggregation, doc_type: DocumentType) -> A: - ''' - Creates an ES aggregation based on the API's aggregation model. - ''' - order_quantity, page_after_value = validate_pagination( - agg.pagination, doc_type=doc_type, loc=['aggration']) + es_agg.metric('entries', A('top_hits', size=agg.entries.size, **kwargs)) - quantity = validate_quantity(agg.quantity, doc_type=doc_type, loc=['aggregation', 'quantity']) - if not quantity.aggregateable: - raise QueryValidationError( - 'the aggregation quantity cannot be aggregated', - - loc=['aggregation', 'quantity']) - - terms = A('terms', field=quantity.search_field, order=agg.pagination.order.value) - - # We are using elastic searchs 'composite aggregations' here. We do not really - # compose aggregations, but only those pseudo composites allow us to use the - # 'after' feature that allows to scan through all aggregation values. - if order_quantity is None: - composite = { - 'sources': { - name: terms - }, - 'size': agg.pagination.page_size - } else: - sort_terms = A('terms', field=order_quantity.search_field, order=agg.pagination.order.value) - composite = { - 'sources': [ - {order_quantity.search_field: sort_terms}, - {quantity.search_field: terms} - ], - 'size': agg.pagination.page_size - } - - if page_after_value is not None: - if order_quantity is None: - composite['after'] = {name: page_after_value} - else: - order_value, quantity_value = page_after_value.split(':') - composite['after'] = {quantity.search_field: quantity_value, order_quantity.search_field: order_value} - - aggs = es_search.aggs - for nested_key in doc_type.nested_object_keys: - if agg.quantity.startswith(nested_key): - aggs = es_search.aggs.bucket('nested_agg:%s' % name, 'nested', path=nested_key) - - composite_agg = aggs.bucket('agg:%s' % name, 'composite', **composite) - - if agg.entries is not None and agg.entries.size > 0: - kwargs: Dict[str, Any] = {} - if agg.entries.required is not None: - if agg.entries.required.include is not None: - kwargs.update(_source=dict(includes=agg.entries.required.include)) - else: - kwargs.update(_source=dict(excludes=agg.entries.required.exclude)) - - composite_agg.metric('entries', A('top_hits', size=agg.entries.size, **kwargs)) - - # additional cardinality to get total - aggs.metric('agg:%s:total' % name, 'cardinality', field=quantity.search_field) + raise NotImplementedError() + + if isinstance(agg, BucketAggregation): + for metric_name in agg.metrics: + metrics = doc_type.metrics + if nested_key == 'entries': + metrics = material_entry_type.metrics + if metric_name not in metrics: + raise QueryValidationError( + 'metric must be the qualified name of a suitable search quantity', + loc=['statistic', 'metrics']) + metric_aggregation, metric_quantity = metrics[metric_name] + es_agg.metric('metric:%s' % metric_name, A( + metric_aggregation, + field=metric_quantity.qualified_field)) def _es_to_api_aggregation( - es_response, name: str, agg: Aggregation, doc_type: DocumentType) -> AggregationResponse: + es_response, name: str, agg: AggregationBase, doc_type: DocumentType): ''' Creates a AggregationResponse from elasticsearch response on a request executed with the given aggregation. ''' - order_by = agg.pagination.order_by quantity = validate_quantity(agg.quantity, doc_type=doc_type) nested = False @@ -379,38 +354,72 @@ def _es_to_api_aggregation( es_agg = es_aggs['agg:' + name] - def get_entries(agg): - if 'entries' in agg: - if nested: - return [{nested_key: item['_source']} for item in agg.entries.hits.hits] + if isinstance(agg, TermsAggregation): + values = set() + + def get_bucket(es_bucket): + if agg.pagination is None: + value = es_bucket['key'] + elif agg.pagination.order_by is None: + value = es_bucket.key[name] else: - return [item['_source'] for item in agg.entries.hits.hits] + value = es_bucket.key[quantity.search_field] + + count = es_bucket.doc_count + metrics = {} + for metric in agg.metrics: + metrics[metric] = es_bucket['metric:' + metric].value + + entries = None + if 'entries' in es_bucket: + if nested: + entries = [{nested_key: item['_source']} for item in es_bucket.entries.hits.hits] + else: + entries = [item['_source'] for item in es_bucket.entries.hits.hits] + + values.add(value) + return TermsBucket(value=value, entries=entries, count=count, metrics=metrics) + + data = [get_bucket(es_bucket) for es_bucket in es_agg.buckets] + aggregation_dict = agg.dict(by_alias=True) + + if agg.pagination is None: + # fill "empty" values + if quantity.values is not None: + for value in quantity.values: + if value not in values: + data.append(TermsBucket( + value=value, count=0, + metrics={metric: 0 for metric in agg.metrics})) + else: - return None + total = es_aggs['agg:%s:total' % name]['value'] + pagination = PaginationResponse(total=total, **aggregation_dict['pagination']) + if pagination.page_after_value is not None and pagination.page_after_value.endswith(':'): + pagination.page_after_value = pagination.page_after_value[0:-1] + + if 'after_key' in es_agg: + after_key = es_agg['after_key'] + if pagination.order_by is None: + pagination.next_page_after_value = after_key[name] + else: + str_values = [str(v) for v in after_key.to_dict().values()] + pagination.next_page_after_value = ':'.join(str_values) + else: + pagination.next_page_after_value = None + + aggregation_dict['pagination'] = pagination - if agg.pagination.order_by is None: - agg_data = { - bucket.key[name]: AggregationDataItem(size=bucket.doc_count, data=get_entries(bucket)) - for bucket in es_agg.buckets} + return TermsAggregationResponse(data=data, **aggregation_dict) else: - agg_data = { - bucket.key[quantity.search_field]: AggregationDataItem(size=bucket.doc_count, data=get_entries(bucket)) - for bucket in es_agg.buckets} - - aggregation_dict = agg.dict(by_alias=True) - pagination = PaginationResponse( - total=es_aggs['agg:%s:total' % name]['value'], - **aggregation_dict.pop('pagination')) - - if 'after_key' in es_agg: - after_key = es_agg['after_key'] - if order_by is None: - pagination.next_page_after_value = after_key[name] - else: - str_values = [str(v) for v in after_key.to_dict().values()] - pagination.next_page_after_value = ':'.join(str_values) + raise NotImplementedError() - return AggregationResponse(data=agg_data, pagination=pagination, **aggregation_dict) + +def _specific_agg(agg: Aggregation) -> Union[TermsAggregation]: + if agg.terms is not None: + return agg.terms + + raise NotImplementedError() def search( @@ -419,7 +428,6 @@ def search( pagination: Pagination = None, required: MetadataRequired = None, aggregations: Dict[str, Aggregation] = {}, - statistics: Dict[str, Statistic] = {}, user_id: str = None, index: Index = entry_index) -> MetadataResponse: @@ -489,13 +497,9 @@ def search( search = search.source(includes=required.include, excludes=required.exclude) - # statistics - for name, statistic in statistics.items(): - _api_to_es_statistic(search, name, statistic, doc_type=doc_type) - # aggregations for name, agg in aggregations.items(): - _api_to_es_aggregation(search, name, agg, doc_type=doc_type) + _api_to_es_aggregation(search, name, _specific_agg(agg), doc_type=doc_type) # execute try: @@ -521,17 +525,11 @@ def search( next_page_after_value=next_page_after_value, **pagination.dict()) - # statistics - if len(statistics) > 0: - more_response_data['statistics'] = cast(Dict[str, Any], { - name: _es_to_api_statistics(es_response, name, statistic, doc_type=doc_type) - for name, statistic in statistics.items()}) - # aggregations if len(aggregations) > 0: more_response_data['aggregations'] = cast(Dict[str, Any], { - name: _es_to_api_aggregation(es_response, name, aggregation, doc_type=doc_type) - for name, aggregation in aggregations.items()}) + name: _es_to_api_aggregation(es_response, name, _specific_agg(agg), doc_type=doc_type) + for name, agg in aggregations.items()}) more_response_data['es_query'] = es_query.to_dict() diff --git a/tests/app/v1/routers/common.py b/tests/app/v1/routers/common.py index dd32eee2e84be90ed5dc92f0a454ce687effc2ca..765fba5d3f202e59b39bb3206c2ffd5bbb1f47b9 100644 --- a/tests/app/v1/routers/common.py +++ b/tests/app/v1/routers/common.py @@ -22,7 +22,6 @@ import re from devtools import debug from urllib.parse import urlencode -from nomad.metainfo.elasticsearch_extension import DocumentType from nomad.datamodel import results from tests.utils import assert_at_least, assert_url_query_args @@ -153,37 +152,100 @@ def pagination_test_parameters(elements: str, n_elements: str, crystal_system: s ] -def aggregation_test_parameters(material_prefix: str, entry_prefix: str): - return [ - pytest.param({'quantity': f'{entry_prefix}upload_id', 'pagination': {'order_by': f'{entry_prefix}uploader.user_id'}}, 3, 3, 200, id='order-str'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'pagination': {'order_by': f'{entry_prefix}upload_time'}}, 3, 3, 200, id='order-date'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'pagination': {'order_by': f'{entry_prefix}results.properties.n_calculations'}}, 3, 3, 200, id='order-int'), - pytest.param({'quantity': f'{material_prefix}symmetry.structure_name'}, 0, 0, 200, id='no-results'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'pagination': {'page_after_value': 'id_published'}}, 3, 1, 200, id='after'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'pagination': {'order_by': f'{entry_prefix}uploader.name', 'page_after_value': 'Sheldon Cooper:id_published'}}, 3, 1, 200, id='after-order'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'entries': {'size': 10}}, 3, 3, 200, id='entries'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'entries': {'size': 1}}, 3, 3, 200, id='entries-size'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'entries': {'size': 0}}, -1, -1, 422, id='bad-entries'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'entries': {'size': 10, 'required': {'include': [f'{entry_prefix}entry_id', f'{entry_prefix}uploader.*']}}}, 3, 3, 200, id='entries-include') - ] - - -def statistic_test_parameters(entity_id: str, entry_prefix: str, total: int): - n_code_names = results.Simulation.program_name.a_elasticsearch.statistics_size +def aggregation_test_parameters(entity_id: str, material_prefix: str, entry_prefix: str, total: int): + n_code_names = results.Simulation.program_name.a_elasticsearch.default_aggregation_size program_name = f'{entry_prefix}results.method.simulation.program_name' return [ - pytest.param({'quantity': program_name}, n_code_names, 200, None, id='fixed-values'), - pytest.param({'quantity': program_name, 'metrics': ['uploads']}, n_code_names, 200, None, id='metrics'), - pytest.param({'quantity': program_name, 'metrics': ['does not exist']}, -1, 422, None, id='bad-metric'), - pytest.param({'quantity': entity_id, 'size': 1000}, total, 200, None, id='size-to-large'), - pytest.param({'quantity': entity_id, 'size': 5}, 5, 200, None, id='size'), - pytest.param({'quantity': entity_id, 'size': -1}, -1, 422, None, id='bad-size-1'), - pytest.param({'quantity': entity_id, 'size': 0}, -1, 422, None, id='bad-size-2'), - pytest.param({'quantity': entity_id}, 10 if total > 10 else total, 200, None, id='size-default'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'order': {'type': 'values'}}, 3, 200, 'test_user', id='order-type'), - pytest.param({'quantity': f'{entry_prefix}upload_id', 'order': {'direction': 'asc'}}, 3, 200, 'test_user', id='order-direction'), - pytest.param({'quantity': 'does not exist'}, -1, 422, None, id='bad-quantity') + pytest.param( + {'quantity': f'{entry_prefix}upload_id'}, + 3, 3, 200, 'test_user', id='default'), + pytest.param( + { + 'quantity': f'{entry_prefix}upload_id', + 'pagination': {'order_by': f'{entry_prefix}uploader.user_id'} + }, + 3, 3, 200, 'test_user', id='order-str'), + pytest.param( + { + 'quantity': f'{entry_prefix}upload_id', + 'pagination': {'order_by': f'{entry_prefix}upload_time'} + }, + 3, 3, 200, 'test_user', id='order-date'), + pytest.param( + { + 'quantity': f'{entry_prefix}upload_id', + 'pagination': {'order_by': f'{entry_prefix}results.properties.n_calculations'} + }, + 3, 3, 200, 'test_user', id='order-int'), + pytest.param( + {'quantity': f'{material_prefix}symmetry.structure_name'}, + 0, 0, 200, 'test_user', id='no-results'), + pytest.param( + { + 'quantity': f'{entry_prefix}upload_id', + 'pagination': {'page_after_value': 'id_published'} + }, + 3, 1, 200, 'test_user', id='after'), + pytest.param( + { + 'quantity': f'{entry_prefix}upload_id', + 'pagination': { + 'order_by': f'{entry_prefix}uploader.name', + 'page_after_value': 'Sheldon Cooper:id_published' + } + }, + 3, 1, 200, 'test_user', id='after-order'), + pytest.param( + {'quantity': f'{entry_prefix}upload_id', 'entries': {'size': 10}}, + 3, 3, 200, 'test_user', id='entries'), + pytest.param( + {'quantity': f'{entry_prefix}upload_id', 'entries': {'size': 1}}, + 3, 3, 200, 'test_user', id='entries-size'), + pytest.param( + {'quantity': f'{entry_prefix}upload_id', 'entries': {'size': 0}}, + -1, -1, 422, 'test_user', id='bad-entries'), + pytest.param( + { + 'quantity': f'{entry_prefix}upload_id', + 'entries': { + 'size': 10, + 'required': { + 'include': [f'{entry_prefix}entry_id', f'{entry_prefix}uploader.*'] + } + } + }, + 3, 3, 200, 'test_user', id='entries-include'), + pytest.param( + {'quantity': program_name}, + n_code_names, n_code_names, 200, None, id='fixed-values'), + pytest.param( + {'quantity': program_name, 'metrics': ['uploads']}, + n_code_names, n_code_names, 200, None, id='metrics'), + pytest.param( + {'quantity': program_name, 'metrics': ['does not exist']}, + -1, -1, 422, None, id='bad-metric'), + pytest.param( + {'quantity': entity_id, 'size': 1000}, + total, total, 200, None, id='size-to-large'), + pytest.param( + {'quantity': entity_id, 'size': 5}, + total, 5, 200, None, id='size'), + pytest.param( + {'quantity': entity_id, 'size': -1}, + -1, -1, 422, None, id='bad-size-1'), + pytest.param( + {'quantity': entity_id, 'size': 0}, + -1, -1, 422, None, id='bad-size-2'), + pytest.param( + {'quantity': entity_id}, + total, 10 if total > 10 else total, 200, None, id='size-default'), + pytest.param( + {'quantity': f'{entry_prefix}upload_id', 'pagination': {'order': 'asc'}}, + 3, 3, 200, 'test_user', id='order-direction'), + pytest.param( + {'quantity': 'does not exist'}, + -1, -1, 422, None, id='bad-quantity') ] @@ -244,46 +306,6 @@ def assert_metadata_response(response, status_code=None): return response_json -def assert_statistic(response_json, name, statistic, doc_type: DocumentType, size=-1): - assert 'statistics' in response_json - assert name in response_json['statistics'] - statistic_response = response_json['statistics'][name] - for key in ['data', 'size', 'order', 'quantity']: - assert key in statistic_response - - assert_at_least(statistic, statistic_response) - - default_size = doc_type.quantities[statistic['quantity']].statistics_size - assert statistic.get('size', default_size) >= len(statistic_response['data']) - - if size != -1: - assert len(statistic_response['data']) == size - - values = list(statistic_response['data'].keys()) - for index, value in enumerate(values): - data = statistic_response['data'][value] - assert 'entries' in data - for metric in statistic.get('metrics', []): - assert metric in data - - if index < len(values) - 1: - - def order_value(value, data): - if statistic_response['order']['type'] == 'entries': - return data['entries'] - else: - return value - - if statistic_response['order']['direction'] == 'asc': - assert order_value(value, data) <= order_value(values[index + 1], statistic_response['data'][values[index + 1]]) - else: - assert order_value(value, data) >= order_value(values[index + 1], statistic_response['data'][values[index + 1]]) - - if 'order' in statistic: - assert statistic_response['order']['type'] == statistic['order'].get('type', 'entries') - assert statistic_response['order']['direction'] == statistic['order'].get('direction', 'desc') - - def assert_required(data, required, default_key: str): # We flat out all keys in data and then make sure that the full qualified keys in the # data are consistent with the keys given in the required include and exclude. @@ -323,44 +345,39 @@ def assert_required(data, required, default_key: str): assert found_exclude is None, f'{exclude} excluded but found {found_exclude}' -def assert_aggregations(response_json, name, agg, total: int, size: int, default_key: str): +def assert_aggregations( + response_json, name, agg, + total: int = -1, size: int = -1, default_key: str = None): assert 'aggregations' in response_json assert name in response_json['aggregations'] agg_response = response_json['aggregations'][name] - for key in ['data', 'pagination', 'quantity']: + for key in ['data', 'quantity']: assert key in agg_response assert_at_least(agg, agg_response) - n_data = len(agg_response['data']) - assert agg.get('pagination', {}).get('page_size', 10) >= n_data - assert agg_response['pagination']['total'] >= n_data - for item in agg_response['data'].values(): - for key in ['size']: - assert key in item - assert item['size'] > 0 - if size >= 0: - assert n_data == size - if total >= 0: - assert agg_response['pagination']['total'] == total - - if 'entries' in agg: - agg_data = [item['data'][0] for item in agg_response['data'].values()] - else: - agg_data = [{agg['quantity']: value} for value in agg_response['data']] + buckets = agg_response['data'] + n_data = len(buckets) if 'pagination' in agg: - assert_pagination(agg['pagination'], agg_response['pagination'], agg_data, is_get=False) + assert agg_response['pagination']['total'] >= n_data + if size >= 0: + assert n_data == size + if total >= 0: + assert agg_response['pagination']['total'] == total + + assert_pagination(agg.get('pagination', {}), agg_response['pagination'], buckets, is_get=False) else: - assert_pagination({}, agg_response['pagination'], agg_data, order_by=agg['quantity'], is_get=False) + assert total == -1 or total >= n_data + assert size == -1 or size == n_data if 'entries' in agg: - for item in agg_response['data'].values(): - assert 'data' in item - assert agg['entries'].get(size, 10) >= len(item['data']) > 0 + for bucket in agg_response['data']: + assert 'entries' in bucket + assert agg['entries'].get('size', 10) >= len(bucket['entries']) > 0 if 'required' in agg['entries']: - for entry in item['data']: + for entry in bucket['entries']: assert_required(entry, agg['entries']['required'], default_key=default_key) diff --git a/tests/app/v1/routers/test_entries.py b/tests/app/v1/routers/test_entries.py index 85fd89577c0a1aee82252d395434113e51f8d399..0f4ae6de165dcf33138e7238eb4aae5ace314ed8 100644 --- a/tests/app/v1/routers/test_entries.py +++ b/tests/app/v1/routers/test_entries.py @@ -30,10 +30,10 @@ from tests.test_files import example_mainfile_contents, append_raw_files # pyli from .common import ( assert_response, assert_base_metadata_response, assert_metadata_response, - assert_statistic, assert_required, assert_aggregations, assert_pagination, + assert_required, assert_aggregations, assert_pagination, perform_metadata_test, post_query_test_parameters, get_query_test_parameters, perform_owner_test, owner_test_parameters, pagination_test_parameters, - aggregation_test_parameters, statistic_test_parameters) + aggregation_test_parameters) from ..conftest import example_data as data # pylint: disable=unused-import ''' @@ -308,59 +308,50 @@ def assert_archive(archive, required=None): assert key in archive -n_code_names = results.Simulation.program_name.a_elasticsearch.statistics_size +n_code_names = results.Simulation.program_name.a_elasticsearch.default_aggregation_size program_name = 'results.method.simulation.program_name' -@pytest.mark.parametrize( - 'statistic, size, status_code, user', - statistic_test_parameters(entity_id='entry_id', entry_prefix='', total=23) + [ - pytest.param({'quantity': 'entry_id', 'value_filter': '_0'}, 9, 200, None, id='filter'), - pytest.param({'quantity': 'entry_id', 'value_filter': '.*_0.*'}, -1, 422, None, id='bad-filter')]) -def test_entries_statistics(client, data, test_user_auth, statistic, size, status_code, user): - statistics = {'test_statistic': statistic} - headers = {} - if user == 'test_user': - headers = test_user_auth - - response_json = perform_entries_metadata_test( - client, headers=headers, owner='visible', statistics=statistics, - status_code=status_code, http_method='post') - - if response_json is None: - return - - assert_statistic(response_json, 'test_statistic', statistic, size=size, doc_type=entry_type) - - -# TODO is this really the desired behavior -def test_entries_statistics_ignore_size(client, data): - statistic = {'quantity': program_name, 'size': 10} - statistics = {'test_statistic': statistic} - response_json = perform_entries_metadata_test( - client, statistics=statistics, status_code=200, http_method='post') - statistic.update(size=n_code_names) - assert_statistic(response_json, 'test_statistic', statistic, size=n_code_names, doc_type=entry_type) - - def test_entries_all_statistics(client, data): - statistics = { - quantity: {'quantity': quantity, 'metrics': [metric for metric in entry_type.metrics]} + aggregations = { + quantity: { + 'terms': { + 'quantity': quantity, 'metrics': [metric for metric in entry_type.metrics] + } + } for quantity in entry_type.quantities if entry_type.quantities[quantity].aggregateable} response_json = perform_entries_metadata_test( - client, statistics=statistics, status_code=200, http_method='post') - for name, statistic in statistics.items(): - assert_statistic(response_json, name, statistic, doc_type=entry_type) + client, aggregations=aggregations, status_code=200, http_method='post') + for name, agg in aggregations.items(): + assert_aggregations(response_json, name, agg['terms']) @pytest.mark.parametrize( - 'aggregation, total, size, status_code', - aggregation_test_parameters(material_prefix='results.material.', entry_prefix='') + [ - pytest.param({'quantity': 'upload_id', 'entries': {'size': 10, 'required': {'exclude': ['files', 'mainfile']}}}, 3, 3, 200, id='entries-exclude') + 'aggregation, total, size, status_code, user', + aggregation_test_parameters(entity_id='entry_id', material_prefix='results.material.', entry_prefix='', total=23) + [ + pytest.param( + { + 'quantity': 'upload_id', + 'entries': { + 'size': 10, + 'required': {'exclude': ['files', 'mainfile']} + } + }, + 3, 3, 200, 'test_user', id='entries-exclude'), + pytest.param( + {'quantity': 'entry_id', 'value_filter': '_0'}, + 9, 9, 200, None, id='filter'), + pytest.param( + {'quantity': 'entry_id', 'value_filter': '.*_0.*'}, + -1, -1, 422, None, id='bad-filter') ]) -def test_entries_aggregations(client, data, test_user_auth, aggregation, total, size, status_code): - headers = test_user_auth - aggregations = {'test_agg_name': aggregation} +def test_entries_aggregations(client, data, test_user_auth, aggregation, total, size, status_code, user): + headers = {} + if user == 'test_user': + headers = test_user_auth + + aggregations = {'test_agg_name': {'terms': aggregation}} + response_json = perform_entries_metadata_test( client, headers=headers, owner='visible', aggregations=aggregations, pagination=dict(page_size=0), @@ -369,7 +360,9 @@ def test_entries_aggregations(client, data, test_user_auth, aggregation, total, if response_json is None: return - assert_aggregations(response_json, 'test_agg_name', aggregation, total=total, size=size, default_key='entry_id') + assert_aggregations( + response_json, 'test_agg_name', aggregation, total=total, size=size, + default_key='entry_id') @pytest.mark.parametrize('required, status_code', [ diff --git a/tests/app/v1/routers/test_materials.py b/tests/app/v1/routers/test_materials.py index c64544ba4b7def6098da74b944045f0164ce7d1f..4f2374401cd407c26123fcd9ccac2ad174d6b2a1 100644 --- a/tests/app/v1/routers/test_materials.py +++ b/tests/app/v1/routers/test_materials.py @@ -19,16 +19,15 @@ import pytest from urllib.parse import urlencode -from nomad.metainfo.elasticsearch_extension import material_entry_type, material_type +from nomad.metainfo.elasticsearch_extension import material_entry_type from tests.test_files import example_mainfile_contents # pylint: disable=unused-import from .common import ( assert_pagination, assert_metadata_response, assert_required, assert_aggregations, - assert_statistic, perform_metadata_test, perform_owner_test, owner_test_parameters, post_query_test_parameters, get_query_test_parameters, pagination_test_parameters, - aggregation_test_parameters, statistic_test_parameters) + aggregation_test_parameters) from ..conftest import example_data as data # pylint: disable=unused-import ''' @@ -50,32 +49,17 @@ def perform_materials_metadata_test(*args, **kwargs): program_name = 'entries.results.method.simulation.program_name' -# # TODO is this really the desired behavior -# def test_entries_statistics_ignore_size(client, data): -# statistic = {'quantity': program_name, 'size': 10} -# statistics = {'test_statistic': statistic} -# response_json = perform_materials_metadata_test( -# client, statistics=statistics, status_code=200, http_method='post') -# statistic.update(size=n_code_names) -# assert_statistic(response_json, 'test_statistic', statistic, size=n_code_names) - - -# def test_entries_all_statistics(client, data): -# statistics = { -# quantity: {'quantity': quantity, 'metrics': [metric for metric in entry_type.metrics]} -# for quantity in entry_type.quantities if entry_type.quantities[quantity].aggregateable} -# response_json = perform_materials_metadata_test( -# client, statistics=statistics, status_code=200, http_method='post') -# for name, statistic in statistics.items(): -# assert_statistic(response_json, name, statistic) +@pytest.mark.parametrize( + 'aggregation, total, size, status_code, user', + aggregation_test_parameters( + entity_id='material_id', material_prefix='', entry_prefix='entries.', total=6)) +def test_materials_aggregations(client, data, test_user_auth, aggregation, total, size, status_code, user): + headers = {} + if user == 'test_user': + headers = test_user_auth + aggregations = {'test_agg_name': {'terms': aggregation}} -@pytest.mark.parametrize( - 'aggregation, total, size, status_code', - aggregation_test_parameters(material_prefix='', entry_prefix='entries.')) -def test_materials_aggregations(client, data, test_user_auth, aggregation, total, size, status_code): - headers = test_user_auth - aggregations = {'test_agg_name': aggregation} response_json = perform_materials_metadata_test( client, headers=headers, owner='visible', aggregations=aggregations, pagination=dict(page_size=0), @@ -85,30 +69,8 @@ def test_materials_aggregations(client, data, test_user_auth, aggregation, total return assert_aggregations( - response_json, 'test_agg_name', aggregation, total=total, size=size, default_key='material_id') - - -@pytest.mark.parametrize( - 'statistic, size, status_code, user', - statistic_test_parameters(entity_id='material_id', entry_prefix='entries.', total=6)) -def test_materials_statistics(client, data, test_user_auth, statistic, size, status_code, user): - statistics = {'test_statistic': statistic} - headers = {} - if user == 'test_user': - headers = test_user_auth - - response_json = perform_materials_metadata_test( - client, headers=headers, owner='visible', statistics=statistics, - pagination=dict(page_size=0), status_code=status_code, http_method='post') - - if response_json is None: - return - - if statistic['quantity'].startswith('entries.'): - doc_type = material_entry_type - else: - doc_type = material_type - assert_statistic(response_json, 'test_statistic', statistic, size=size, doc_type=doc_type) + response_json, 'test_agg_name', aggregation, total=total, size=size, + default_key='material_id') @pytest.mark.parametrize('required, status_code', [ diff --git a/tests/utils.py b/tests/utils.py index 0bd1aeeaee1bd93070ff64589bccebdd775255c2..30d5bbacbe95fd8e935931163afa594276fdf009 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -236,13 +236,19 @@ class ExampleData: if entry_metadata is None: entry_metadata = entry_archive.m_create(EntryMetadata) + upload_time = None + if upload_id in self.uploads: + upload_time = self.uploads[upload_id].get('upload_time') + if upload_time is None: + upload_time = self._next_time_stamp() + entry_metadata.m_update( calc_id=entry_id, upload_id=upload_id, mainfile=mainfile, calc_hash='dummy_hash_' + entry_id, domain='dft', - upload_time=self._next_time_stamp(), + upload_time=upload_time, published=True, processed=True, with_embargo=False,