Commit 0c5f2446 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'refactor-pagination' into 'v0.10.1'

Refactored pagination models

See merge request !292
parents f1d6df72 0b74c8b8
Pipeline #97685 passed with stages
in 27 minutes
......@@ -31,7 +31,7 @@ from nomad.utils import strip
from nomad.metainfo import Datetime, MEnum
from nomad.metainfo.search_extension import metrics, search_quantities, search_sub_sections
from .utils import parameter_dependency_from_model
from .utils import parameter_dependency_from_model, update_url_query_arguments
User = datamodel.User.m_def.a_pydantic.model
......@@ -438,34 +438,198 @@ metadata_required_parameters = parameter_dependency_from_model(
class Pagination(BaseModel):
''' Defines the order, size, and page of results. '''
size: Optional[int] = Field(
page_size: Optional[int] = Field(
10, description=strip('''
The page size, e.g. the maximum number of items contained in one response.
A `size` of 0 will omit any results.
A `page_size` of 0 will return no results.
'''))
order_by: Optional[str] = Field(
calc_id, # type: ignore
None, # type: ignore
description=strip('''
The results are ordered by the values of this field. The response
either contains the first `size` value or the next `size` values after `after`.
The results are ordered by the values of this field. If omitted, default
ordering is applied.
'''))
order: Optional[Direction] = Field(
Direction.asc, description=strip('''
The order direction of the results based on `order_by`. Its either
ascending `asc` or decending `desc`.
The ordering direction of the results based on `order_by`. Its either
ascending `asc` or decending `desc`. Default is `asc`.
'''))
after: Optional[str] = Field(
page_after_value: Optional[str] = Field(
None, description=strip('''
This attribute defines the position after which the page begins, and is used
to navigate through the total list of results.
When requesting the first page, no value should be provided for
`page_after_value`. Each response will contain a value `next_page_after_value`,
which can be used to obtain the next page (by setting `page_after_value` in
your next request to this value).
The field is encoded as a string, and the format of `page_after_value` and
`next_page_after_value` depends on which API method is used.
Some API functions additionally allows a simplified navigation, by specifying
the page number in the key `page`. It is however always possible to use
`page_after_value` and `next_page_after_value` to iterate through the results.
'''))
page: Optional[int] = Field(
None, description=strip('''
A request for the page after this value, i.e. the next `size` values behind `after`.
This depends on the `order_by`.
Each response contains the `after` value for the *next* request following
the defined order.
The after value and its type depends on the API operation and potentially on
the `order_by` field and its type.
The after value will always be a string encoded value. It might be an `order_by` value, or an index.
The after value might contain an id as *tie breaker*, if `order_by` is not the unique.
The *tie breaker* will be `:` separated, e.g. `<value>:<id>`.
The number of the page (1-based). When provided in a request, this attribute
can be used instead of `page_after_value` to jump to a particular results page.
**NOTE #1**: the option to request pages by submitting the `page` number is
limited. There are api calls where this attribute cannot be used for indexing,
or where it can only be used partially. **If you want to just iterate through
all the results, aways use the `page_after_value` and `next_page_after_value`!**
**NOTE #2**: In a request, you should never specify *both* `page` and
`page_after_value` (at most one index can be provided).
'''))
@validator('page_size')
def validate_page_size(cls, page_size): # pylint: disable=no-self-argument
assert page_size >= 0, 'page_size must be >= 0'
return page_size
@validator('order_by')
def validate_order_by(cls, order_by): # pylint: disable=no-self-argument
'''
Override this in your Pagination class to ensure that a valid attribute is selected.
This method has to be implemented!
'''
raise NotImplementedError('Validation of `order_by` not implemented!')
@validator('page_after_value')
def validate_page_after_value(cls, page_after_value, values): # pylint: disable=no-self-argument
'''
Override this in your Pagination class to implement validation of the
`page_after_value` value.
This method has to be implemented!
'''
raise NotImplementedError('Validation of `page_after_value` not implemented!')
@validator('page')
def validate_page(cls, page, values): # pylint: disable=no-self-argument
if page is not None:
assert page >= 1, 'page must be >= 1'
return page
@root_validator(skip_on_failure=True)
def validate_values(cls, values): # pylint: disable=no-self-argument
page = values.get('page')
page_after_value = values.get('page_after_value')
page_size = values.get('page_size')
assert page is None or page_after_value is None, 'Cannot specify both `page` and `page_after_value'
if page_size == 0:
assert page is None, 'Cannot specify `page` when `page_size` is set to 0'
assert page_after_value is None, 'Cannot specify `page_after_value` when `page_size` is set to 0'
return values
def get_simple_index(self):
'''
If simple, index-based pagination is used, this method can be used to get the
corresponding index (0-based). It will look on either `page` or `page_after_value`.
If neither index is provided, we return 0 (i.e. the first index).
'''
if self.page is None and self.page_after_value is None:
return 0
if self.page is not None:
return (self.page - 1) * self.page_size
rv = int(self.page_after_value) + 1
assert rv >= 0
return rv
class PaginationResponse(Pagination):
total: int = Field(
..., description=strip('''
The total number of results that fit the given query. This is independent of
any pagination and aggregations.
'''))
next_page_after_value: Optional[str] = Field(
None, description=strip('''
The *next* value to be used as `page_after_value` in a follow up requests, to get
the next page of results. If no more results are available, `next_page_after_value`
will not be set.
'''))
page_url: Optional[str] = Field(
None, description=strip('''
The url of the current page. Only applicable for GET requests.
'''))
next_page_url: Optional[str] = Field(
None, description=strip('''
The url to get the next page. Only applicable for GET requests.
'''))
prev_page_url: Optional[str] = Field(
None, description=strip('''
The url to get the previous page. **NOTE:** Only applicable for some API methods,
(namely, where indexing by `page` is possible), and only for GET requests.
'''))
first_page_url: Optional[str] = Field(
None, description=strip('''
The url to get the first page. Only applicable for GET requests.
'''))
@validator('order_by')
def validate_order_by(cls, order_by): # pylint: disable=no-self-argument
# No validation - behaviour of this field depends on api method
return order_by
@validator('page_after_value')
def validate_page_after_value(cls, page_after_value, values): # pylint: disable=no-self-argument
# No validation - behaviour of this field depends on api method
return page_after_value
@root_validator(skip_on_failure=True)
def validate_values(cls, values): # pylint: disable=no-self-argument
# No validation
return values
def populate_urls(self, request: Request):
'''
Populates the urls (`page_url`, `next_page_url`, `first_page_url` from the
request and `next_page_after_value`. Only applicable for GET requests.
'''
assert request.method.upper() == 'GET', 'Trying to populate urls, but method is not GET.'
original_url = str(request.url)
self.page_url = original_url
if self.page_size:
self.first_page_url = update_url_query_arguments(
original_url, page=None, page_after_value=None)
if self.next_page_after_value:
self.next_page_url = update_url_query_arguments(
original_url, page=None, page_after_value=self.next_page_after_value)
if self.page and self.page > 1:
self.prev_page_url = update_url_query_arguments(
original_url, page=self.page - 1, page_after_value=None)
def populate_simple_index_and_urls(self, request: Request):
'''
If simple, index-based pagination is used, this method can be used to populate
the `page`, `page_after_value` and urls (if it is a GET request) automatically.
Assumes that the field `total` is populated.
'''
if not self.page_size:
self.page = 1
self.page_after_value = None
self.next_page_after_value = None
else:
ind = self.get_simple_index()
self.page = ind // self.page_size + 1
self.page_after_value = None if self.page == 1 else str(ind - 1)
if self.page_size * self.page >= self.total:
self.next_page_after_value = None
else:
self.next_page_after_value = str(ind + self.page_size - 1)
if request.method.upper() == 'GET':
self.populate_urls(request)
class EntryBasedPagination(Pagination):
order_by: Optional[str] = Field(
calc_id, # type: ignore
description=strip('''
The results are ordered by the values of this field. If omitted, default
ordering is applied.
'''))
@validator('order_by')
......@@ -478,30 +642,52 @@ class Pagination(BaseModel):
assert quantity.definition.is_scalar, 'the order_by quantity must be a scalar'
return order_by
@validator('size')
def validate_size(cls, size): # pylint: disable=no-self-argument
assert size >= 0, 'size must be positive integer'
return size
@validator('after')
def validate_after(cls, after, values): # pylint: disable=no-self-argument
@validator('page_after_value')
def validate_page_after_value(cls, page_after_value, values): # pylint: disable=no-self-argument
order_by = values.get('order_by', calc_id)
if after is not None and order_by is not None and order_by != calc_id and ':' not in after:
after = '%s:' % after
return after
if page_after_value is not None and order_by is not None and order_by != calc_id:
if ':' not in page_after_value:
page_after_value = '%s:' % page_after_value
return page_after_value
pagination_parameters = parameter_dependency_from_model(
'pagination_parameters', Pagination)
class EntryPagination(EntryBasedPagination):
page: Optional[int] = Field(
None, description=strip('''
For simple, index-based pagination, this should contain the number of the
requested page (1-based). When provided in a request, this attribute can be
used instead of `page_after_value` to jump to a particular results page.
However, you can only retreive up to the 10.000th entry with a page number.
Only one, `page_after_value` *or* `page` can be provided.
'''))
@validator('page')
def validate_page(cls, page, values): # pylint: disable=no-self-argument
if page is not None:
assert page > 0, 'Page has to be larger than 1.'
assert page * values.get('page_size', 10) < 10000, 'Pagination by `page` is limited to 10.000 entries.'
return page
class AggregationPagination(Pagination):
entry_pagination_parameters = parameter_dependency_from_model(
'entry_pagination_parameters', EntryPagination)
class AggregationPagination(EntryBasedPagination):
order_by: Optional[str] = Field(
None, description=strip('''
The search results are ordered by the values of this quantity. The response
either contains the first `size` value or the next `size` values after `after`.
None, # type: ignore
description=strip('''
The results are ordered by the values of this field. If omitted, default
ordering is applied.
'''))
@validator('page')
def validate_page(cls, page, values): # pylint: disable=no-self-argument
assert page is None, 'Pagination by `page` is not possible for aggregations, use `page_after_value`'
return page
class AggregatedEntities(BaseModel):
size: Optional[pydantic.conint(gt=0)] = Field( # type: ignore
......@@ -590,10 +776,10 @@ class Statistic(BaseModel):
class WithQueryAndPagination(WithQuery):
pagination: Optional[Pagination] = Body(
pagination: Optional[EntryPagination] = Body(
None,
example={
'size': 5,
'page_size': 5,
'order_by': 'upload_time'
})
......@@ -625,7 +811,7 @@ class EntriesMetadata(WithQueryAndPagination):
'uploads': {
'quantity': 'upload_id',
'pagination': {
'size': 10,
'page_size': 10,
'order_by': 'upload_time'
},
'entries': {
......@@ -773,7 +959,7 @@ class EntriesArchiveDownload(WithQuery):
class EntriesRaw(WithQuery):
pagination: Optional[Pagination] = Body(None)
pagination: Optional[EntryPagination] = Body(None)
class EntriesRawDownload(WithQuery):
......@@ -784,17 +970,6 @@ class EntriesRawDownload(WithQuery):
})
class PaginationResponse(Pagination):
total: int = Field(..., description=strip('''
The total number of entries that fit the given `query`. This is independent of
any pagination and aggregations.
'''))
next_after: Optional[str] = Field(None, description=strip('''
The *next* after value to be used as `after` in a follow up requests for the
next page of results.
'''))
class StatisticResponse(Statistic):
data: Dict[str, Dict[str, int]] = Field(
None, description=strip('''
......@@ -828,7 +1003,7 @@ class CodeResponse(BaseModel):
class EntriesMetadataResponse(EntriesMetadata):
pagination: PaginationResponse
pagination: PaginationResponse # type: ignore
statistics: Optional[Dict[str, StatisticResponse]] # type: ignore
aggregations: Optional[Dict[str, AggregationResponse]] # type: ignore
data: List[Dict[str, Any]] = Field(
......@@ -851,7 +1026,7 @@ class EntryRaw(BaseModel):
class EntriesRawResponse(EntriesRaw):
pagination: PaginationResponse = Field(None)
pagination: PaginationResponse = Field(None) # type: ignore
data: List[EntryRaw] = Field(None)
......@@ -875,7 +1050,7 @@ class EntryArchive(BaseModel):
class EntriesArchiveResponse(EntriesArchive):
pagination: PaginationResponse = Field(None)
pagination: PaginationResponse = Field(None) # type: ignore
data: List[EntryArchive] = Field(None)
......
......@@ -16,9 +16,11 @@
# limitations under the License.
#
import re
from typing import cast, Optional, List
from fastapi import APIRouter, Depends, Query as FastApiQuery, Path, HTTPException, status
from pydantic import BaseModel, Field
from fastapi import (
APIRouter, Request, Depends, Query as FastApiQuery, Path, HTTPException, status)
from pydantic import BaseModel, Field, validator
from datetime import datetime
import enum
......@@ -30,10 +32,10 @@ from nomad.doi import DOI
from .auth import get_required_user
from .entries import _do_exaustive_search
from ..utils import create_responses
from ..utils import create_responses, parameter_dependency_from_model
from ..models import (
pagination_parameters, Pagination, PaginationResponse, Query, HTTPExceptionModel,
User, Direction, Owner, Any_)
Pagination, PaginationResponse, Query, HTTPExceptionModel, User,
Direction, Owner, Any_)
router = APIRouter()
......@@ -77,6 +79,25 @@ _dataset_is_fixed_response = status.HTTP_400_BAD_REQUEST, {
Dataset = datamodel.Dataset.m_def.a_pydantic.model
class DatasetPagination(Pagination):
@validator('order_by')
def validate_order_by(cls, order_by): # pylint: disable=no-self-argument
# TODO: need real validation
if order_by is None:
return order_by
assert re.match('^[a-zA-Z0-9_]+$', order_by), 'order_by must be alphanumeric'
return order_by
@validator('page_after_value')
def validate_page_after_value(cls, page_after_value, values): # pylint: disable=no-self-argument
# Validation handled elsewhere
return page_after_value
dataset_pagination_parameters = parameter_dependency_from_model(
'dataset_pagination_parameters', DatasetPagination)
class DatasetsResponse(BaseModel):
pagination: PaginationResponse = Field(None)
data: List[Dataset] = Field(None) # type: ignore
......@@ -106,11 +127,12 @@ class DatasetCreate(BaseModel): # type: ignore
response_model_exclude_unset=True,
response_model_exclude_none=True)
async def get_datasets(
request: Request,
dataset_id: str = FastApiQuery(None),
name: str = FastApiQuery(None),
user_id: str = FastApiQuery(None),
dataset_type: str = FastApiQuery(None),
pagination: Pagination = Depends(pagination_parameters)):
pagination: DatasetPagination = Depends(dataset_pagination_parameters)):
'''
Retrieves all datasets that match the given criteria.
'''
......@@ -125,15 +147,11 @@ async def get_datasets(
mongodb_query = mongodb_query.order_by(order_by)
start = 0
if pagination.after is not None:
start = int(pagination.after)
end = start + pagination.size
start = pagination.get_simple_index()
end = start + pagination.page_size
pagination_response = PaginationResponse(
total=mongodb_query.count(),
next_after=str(end),
**pagination.dict()) # type: ignore
pagination_response = PaginationResponse(total=mongodb_query.count(), **pagination.dict())
pagination_response.populate_simple_index_and_urls(request)
return {
'pagination': pagination_response,
......
......@@ -17,7 +17,7 @@
#
from typing import Dict, Iterator, Any, List, Set, cast
from fastapi import APIRouter, Depends, Path, status, HTTPException
from fastapi import APIRouter, Request, Depends, Path, status, HTTPException
from fastapi.responses import StreamingResponse
import os.path
import io
......@@ -33,9 +33,9 @@ from nomad.archive import (
from .auth import get_optional_user
from ..utils import create_streamed_zipfile, File, create_responses
from ..models import (
Pagination, WithQuery, MetadataRequired, EntriesMetadataResponse, EntriesMetadata,
EntryPagination, WithQuery, MetadataRequired, EntriesMetadataResponse, EntriesMetadata,
EntryMetadataResponse, query_parameters, metadata_required_parameters, Files, Query,
pagination_parameters, files_parameters, User, Owner, HTTPExceptionModel, EntriesRaw,
entry_pagination_parameters, files_parameters, User, Owner, HTTPExceptionModel, EntriesRaw,
EntriesRawResponse, EntriesRawDownload, EntryRaw, EntryRawFile, EntryRawResponse,
EntriesArchiveDownload, EntryArchiveResponse, EntriesArchive, EntriesArchiveResponse,
ArchiveRequired)
......@@ -101,6 +101,7 @@ def perform_search(*args, **kwargs):
response_model_exclude_unset=True,
response_model_exclude_none=True)
async def post_entries_metadata_query(
request: Request,
data: EntriesMetadata,
user: User = Depends(get_optional_user)):
......@@ -139,8 +140,9 @@ async def post_entries_metadata_query(
response_model_exclude_unset=True,
response_model_exclude_none=True)
async def get_entries_metadata(
request: Request,
with_query: WithQuery = Depends(query_parameters),
pagination: Pagination = Depends(pagination_parameters),
pagination: EntryPagination = Depends(entry_pagination_parameters),
required: MetadataRequired = Depends(metadata_required_parameters),
user: User = Depends(get_optional_user)):
'''
......@@ -155,27 +157,29 @@ async def get_entries_metadata(
`gt`, `lt`, `lte`.
'''
return perform_search(
res = perform_search(
owner=with_query.owner, query=with_query.query,
pagination=pagination, required=required,
user_id=user.user_id if user is not None else None)
res.pagination.populate_urls(request)
return res
def _do_exaustive_search(owner: Owner, query: Query, include: List[str], user: User) -> Iterator[Dict[str, Any]]:
after = None
page_after_value = None
while True:
response = perform_search(
owner=owner, query=query,
pagination=Pagination(size=100, after=after, order_by='upload_id'),
pagination=EntryPagination(size=100, page_after_value=page_after_value, order_by='upload_id'),
required=MetadataRequired(include=include),
user_id=user.user_id if user is not None else None)
after = response.pagination.next_after
page_after_value = response.pagination.next_page_after_value
for result in response.data:
yield result
if after is None or len(response.data) == 0:
if page_after_value is None or len(response.data) == 0:
break
......@@ -218,7 +222,7 @@ def _create_entry_raw(entry_metadata: Dict[str, Any], uploads: _Uploads):
def _answer_entries_raw_request(
owner: Owner, query: Query, pagination: Pagination, user: User):
owner: Owner, query: Query, pagination: EntryPagination, user: User):
if owner == Owner.all_:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=strip('''
......@@ -256,7 +260,7 @@ def _answer_entries_raw_download_request(owner: Owner, query: Query, files: File
response = perform_search(
owner=owner, query=query,
pagination=Pagination(size=0),
pagination=EntryPagination(page_size=0),
required=MetadataRequired(include=[]),
user_id=user.user_id if user is not None else None)
......@@ -349,7 +353,8 @@ _entries_raw_query_docstring = strip('''
responses=create_responses(_bad_owner_response),
response_model_exclude_unset=True,
response_model_exclude_none=True)
async def post_entries_raw_query(data: EntriesRaw, user: User = Depends(get_optional_user)):
async def post_entries_raw_query(
request: Request, data: EntriesRaw, user: User = Depends(get_optional_user)):
return _answer_entries_raw_request(
owner=data.owner, query=data.query, pagination=data.pagination, user=user)
......@@ -365,12 +370,15 @@ async def post_entries_raw_query(data: EntriesRaw, user: User = Depends(get_opti
response_model_exclude_none=True,
responses=create_responses(_bad_owner_response))
async def get_entries_raw(
request: Request,
with_query: WithQuery = Depends(query_parameters),
pagination: Pagination = Depends(pagination_parameters),
pagination: EntryPagination = Depends(entry_pagination_parameters),
user: User = Depends(get_optional_user)):
return _answer_entries_raw_request(
res = _answer_entries_raw_request(
owner=with_query.owner, query=with_query.query, pagination=pagination, user=user)
res.pagination.populate_urls(request)
return res
_entries_raw_download_query_docstring = strip('''
......@@ -440,7 +448,7 @@ def _read_archive(entry_metadata, uploads, required):
def _answer_entries_archive_request(
owner: Owner, query: Query, pagination: Pagination, required: ArchiveRequired,
owner: Owner, query: Query, pagination: EntryPagination, required: ArchiveRequired,
user: User):
if owner == Owner.all_:
......@@ -527,7 +535,7 @@ _entries_archive_docstring = strip('''
response_model_exclude_none=True,
responses=create_responses(_bad_owner_response, _bad_archive_required_response))
async def post_entries_archive_query(
data: EntriesArchive, user: User = Depends(get_optional_user)):
request: Request, data: EntriesArchive, user: User = Depends(get_optional_user)):
return _answer_entries_archive_request(
owner=data.owner, query=data.query, pagination=data.pagination,
......@@ -544,13 +552,16 @@ async def post_entries_archive_query(
response_model_exclude_none=True,
responses=create_responses(_bad_owner_response, _bad_archive_required_response))
async def get_entries_archive_query(
request: Request,
with_query: WithQuery = Depends(query_parameters),
pagination: Pagination = Depends(pagination_parameters),
pagination: EntryPagination = Depends(entry_pagination_parameters),
user: User = Depends(get_optional_user)):
return _answer_entries_archive_request(
res = _answer_entries_archive_request(
owner=with_query.owner, query=with_query.query, pagination=pagination,
required=None, user=user)
res.pagination.populate_urls(request)
return res
def _answer_entries_archive_download_request(
......@@ -566,7 +577,7 @@ def _answer_entries_archive_download_request(
response = perform_search(
owner=owner, query=query,
pagination=Pagination(size=0),
pagination=EntryPagination(page_size=0),