Commit f7688734 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added dataset oprations to fastapi api.

parent 5654950a
......@@ -23,7 +23,7 @@ import traceback
from nomad import config, utils
from nomad.app import app as flask_app
from nomad.app_fastapi.routers import users, entries, auth
from nomad.app_fastapi.routers import users, entries, auth, datasets
logger = utils.get_logger(__name__)
......@@ -144,5 +144,6 @@ async def unicorn_exception_handler(request: Request, e: Exception):
app.include_router(auth.router, prefix='/api/v1/auth')
app.include_router(users.router, prefix='/api/v1/users')
app.include_router(entries.router, prefix='/api/v1/entries')
app.include_router(datasets.router, prefix='/api/v1/datasets')
app.mount('/', WSGIMiddleware(flask_app))
......@@ -33,15 +33,11 @@ from nomad.app_fastapi.utils import parameter_dependency_from_model
from nomad.metainfo.search_extension import metrics, search_quantities
class User(BaseModel):
user_id: str
email: Optional[str] = None
first_name: Optional[str] = None
last_name: Optional[str] = None
User = datamodel.User.m_def.a_pydantic.model
calc_id = 'calc_id'
Metric = enum.Enum('Metric', {name: name for name in metrics}) # type: ignore
Quantity = enum.Enum('Quantity', {name: name for name in search_quantities}) # type: ignore
AggregateableQuantity = enum.Enum('AggregateableQuantity', { # type: ignore
name: name for name in search_quantities
if search_quantities[name].aggregateable})
......@@ -441,33 +437,32 @@ class Pagination(BaseModel):
size: Optional[int] = Field(
10, description=strip('''
The page size, e.g. the maximum number of entries contained in one response.
A `size` of 0 will omit any results; this is useful, when there is only
interest in other data, e.g. `aggregations` or `statistics`.
The page size, e.g. the maximum number of items contained in one response.
A `size` of 0 will omit any results.
'''))
order_by: Optional[Quantity] = Field(
Quantity.calc_id, # type: ignore
order_by: Optional[str] = Field(
calc_id, # type: ignore
description=strip('''
The search results are ordered by the values of this quantity. The response
The results are ordered by the values of this field. The response
either contains the first `size` value or the next `size` values after `after`.
'''))
order: Optional[Direction] = Field(
Direction.asc, description=strip('''
The order direction of the search results based on `order_by`. Its either
The order direction of the results based on `order_by`. Its either
ascending `asc` or decending `desc`.
'''))
after: Optional[str] = Field(
None, description=strip('''
A request for the page after this value, i.e. the next `size` values behind `after`.
This depends on the `order_by` and the potentially used aggregation.
This depends on the `order_by`.
Each response contains the `after` value for the *next* request following
the defined order.
The after value and its type depends on the `order_by` quantity and its type.
The after value will always be a string encoded value. The after value will
also contain the entry id as a *tie breaker*, if
`order_by` is not the entry's id. The *tie breaker* will be `:` separated, e.g.
`<value>:<id>`.
The after value and its type depends on the API operation and potentially on
the `order_by` field and its type.
The after value will always be a string encoded value. It might be an `order_by` value, or an index.
The after value might contain an id as *tie breaker*, if `order_by` is not the unique.
The *tie breaker* will be `:` separated, e.g. `<value>:<id>`.
'''))
@validator('order_by')
......@@ -475,8 +470,8 @@ class Pagination(BaseModel):
if order_by is None:
return order_by
assert order_by.value in search_quantities, 'order_by must be a valid search quantity'
quantity = search_quantities[order_by.value]
assert order_by in search_quantities, 'order_by must be a valid search quantity'
quantity = search_quantities[order_by]
assert quantity.definition.is_scalar, 'the order_by quantity must be a scalar'
return order_by
......@@ -487,8 +482,8 @@ class Pagination(BaseModel):
@validator('after')
def validate_after(cls, after, values): # pylint: disable=no-self-argument
order_by = values.get('order_by', Quantity.calc_id)
if after is not None and order_by is not None and order_by != Quantity.calc_id and ':' not in after:
order_by = values.get('order_by', calc_id)
if after is not None and order_by is not None and order_by != calc_id and ':' not in after:
after = '%s:' % after
return after
......@@ -498,7 +493,7 @@ pagination_parameters = parameter_dependency_from_model(
class AggregationPagination(Pagination):
order_by: Optional[Quantity] = Field(
order_by: Optional[str] = Field(
None, description=strip('''
The search results are ordered by the values of this quantity. The response
either contains the first `size` value or the next `size` values after `after`.
......
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from typing import Optional, List
from fastapi import APIRouter, Depends, Query as FastApiQuery, Path, HTTPException, status
from pydantic import BaseModel, Field
from datetime import datetime
import enum
from nomad import utils, datamodel
from nomad.utils import strip, create_uuid
from nomad.datamodel import Dataset as DatasetDefinitionCls
from nomad.search import search
from nomad.doi import DOI
from nomad.app_fastapi.routers.auth import get_required_user
from nomad.app_fastapi.utils import create_responses
from nomad.app_fastapi.models import (
pagination_parameters, Pagination, PaginationResponse, Query,
HTTPExceptionModel, User, Direction, MetadataRequired)
from .entries import _do_exaustive_search
router = APIRouter()
default_tag = 'datasets'
logger = utils.get_logger(__name__)
_bad_id_response = status.HTTP_404_NOT_FOUND, {
'model': HTTPExceptionModel,
'description': strip('''
Dataset not found. The given id does not match any dataset.''')}
_bad_user_response = status.HTTP_401_UNAUTHORIZED, {
'model': HTTPExceptionModel,
'description': strip('''
The dataset can only be edited by the user who created the dataset.''')}
_bad_dataset_type_response = status.HTTP_400_BAD_REQUEST, {
'model': HTTPExceptionModel,
'description': strip('''
Only dataset with type 'foreign' can be edited. To edit 'owned' datasets,
edit the entries in the dataset.''')}
_bad_owned_dataset_response = status.HTTP_400_BAD_REQUEST, {
'model': HTTPExceptionModel,
'description': strip('''
The requested action cannot be performed for this type of dataset.
Owned datasets can only have entries that where uploaded by the user that
creates the dataset.
''')}
_existing_name_response = status.HTTP_400_BAD_REQUEST, {
'model': HTTPExceptionModel,
'description': strip('''
The chosen dataset name is already taken. Datesets of the same user must have a
unique name.
''')}
_dataset_is_fixed_response = status.HTTP_400_BAD_REQUEST, {
'model': HTTPExceptionModel,
'description': strip('''
The dataset already as a DOI and cannot be changed anymore.
''')}
Dataset = datamodel.Dataset.m_def.a_pydantic.model
class DatasetsResponse(BaseModel):
pagination: PaginationResponse = Field(None)
data: List[Dataset] = Field(None) # type: ignore
class DatasetResponse(BaseModel):
dataset_id: str = Field(..., description=strip('''The unique dataset id. '''))
data: Dataset = Field() # type: ignore
class DatasetType(str, enum.Enum):
owned = 'owned',
foreign = 'foreign'
class DatasetCreate(BaseModel): # type: ignore
name: Optional[str] = Field(None, description='The new name for the dataset.')
dataset_type: Optional[DatasetType] = Field(None)
query: Optional[Query] = Field(None)
entries: Optional[List[str]] = Field(None)
@router.get(
'/', tags=[default_tag],
summary='Get a list of datasets',
response_model=DatasetsResponse,
response_model_exclude_unset=True,
response_model_exclude_none=True)
async def get_datasets(
name: str = FastApiQuery(None),
user_id: str = FastApiQuery(None),
dataset_type: str = FastApiQuery(None),
pagination: Pagination = Depends(pagination_parameters)):
'''
Retrieves all datasets that match the given criteria.
'''
mongodb_objects = DatasetDefinitionCls.m_def.a_mongo.objects
query_params = dict(name=name, user_id=user_id, dataset_type=dataset_type)
query_params = {k: v for k, v in query_params.items() if v is not None}
mongodb_query = mongodb_objects(**query_params)
order_by = pagination.order_by if pagination.order_by is not None else 'dataset_id'
if pagination.order == Direction.desc:
order_by = '-' + order_by
mongodb_query = mongodb_query.order_by(order_by)
start = 0
if pagination.after is not None:
start = int(pagination.after)
end = start + pagination.size
pagination_response = PaginationResponse(
total=mongodb_query.count(),
next_after=str(end),
**pagination.dict()) # type: ignore
return {
'pagination': pagination_response,
'data': list(mongodb_query[start:end])}
@router.get(
'/{dataset_id}', tags=[default_tag],
summary='Get a list of datasets',
response_model=DatasetResponse,
responses=create_responses(_bad_id_response),
response_model_exclude_unset=True,
response_model_exclude_none=True)
async def get_dataset(
dataset_id: str = Path(..., description='The unique dataset id of the dataset to retrieve.')):
'''
Retrieves the dataset with the given id.
'''
mongodb_objects = DatasetDefinitionCls.m_def.a_mongo.objects
dataset = mongodb_objects(dataset_id=dataset_id).first()
if dataset is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail='The dataset with the given id does not exist.')
return {
'dataset_id': dataset_id,
'data': dataset}
@router.post(
'/', tags=[default_tag],
summary='Create a new dataset',
response_model=DatasetResponse,
responses=create_responses(_existing_name_response, _bad_dataset_type_response),
response_model_exclude_unset=True,
response_model_exclude_none=True)
async def post_datasets(
create: DatasetCreate, user: User = Depends(get_required_user)):
'''
Create a new dataset.
'''
now = datetime.now()
dataset_type = create.dataset_type if create.dataset_type is not None else 'owned'
# check if name already exists
existing_dataset = DatasetDefinitionCls.m_def.a_mongo.objects(
user_id=user.user_id, name=create.name).first()
if existing_dataset is not None:
raise HTTPException(
status_code=_existing_name_response[0],
detail=_existing_name_response[1]['description'])
# create dataset
dataset = DatasetDefinitionCls(
dataset_id=create_uuid(),
name=create.name,
user_id=user.user_id,
created=now,
modified=now,
dataset_type=dataset_type)
if dataset_type == 'owned':
raise HTTPException(
status_code=_bad_dataset_type_response[0],
detail=_bad_dataset_type_response[1]['description'])
# get all entry ids
if create.query is not None:
entries = _do_exaustive_search(
owner='public', query=create.query, user=user,
include=['calc_id'])
dataset.entries = [entry['calc_id'] for entry in entries]
elif create.entries is not None:
dataset.entries = create.entries
dataset.a_mongo.create()
return {
'dataset_id': dataset.dataset_id,
'data': dataset}
@router.delete(
'/{dataset_id}', tags=[default_tag],
summary='Delete a dataset',
response_model=DatasetResponse,
responses=create_responses(_bad_id_response, _dataset_is_fixed_response, _bad_user_response, _bad_dataset_type_response),
response_model_exclude_unset=True,
response_model_exclude_none=True)
async def delete_dataset(
dataset_id: str = Path(..., description='The unique dataset id of the dataset to delete.'),
user: User = Depends(get_required_user)):
'''
Delete an dataset.
'''
dataset = DatasetDefinitionCls.m_def.a_mongo.objects(dataset_id=dataset_id).first()
if dataset is None:
raise HTTPException(
status_code=_bad_id_response[0],
detail=_bad_id_response[1]['description'])
if dataset.doi is not None:
raise HTTPException(
status_code=_existing_name_response[0],
detail=_dataset_is_fixed_response[1]['description'])
if dataset.user_id != user.user_id:
raise HTTPException(
status_code=_bad_user_response[0],
detail=_bad_user_response[1]['description'])
if dataset.dataset_type == 'owned':
raise HTTPException(
status_code=_bad_dataset_type_response[0],
detail=_bad_dataset_type_response[1]['description'])
dataset.delete()
return {
'dataset_id': dataset.dataset_id,
'data': dataset}
@router.post(
'/{dataset_id}/doi', tags=[default_tag],
summary='Assign a DOI to a dataset',
response_model=DatasetResponse,
responses=create_responses(_bad_id_response, _dataset_is_fixed_response, _bad_user_response),
response_model_exclude_unset=True,
response_model_exclude_none=True)
async def assign_doi(
dataset_id: str = Path(..., description='The unique dataset id of the dataset to delete.'),
user: User = Depends(get_required_user)):
'''
Assign a DOI to a dataset.
'''
dataset = DatasetDefinitionCls.m_def.a_mongo.objects(dataset_id=dataset_id).first()
if dataset is None:
raise HTTPException(
status_code=_bad_id_response[0],
detail=_bad_id_response[1]['description'])
if dataset.doi is not None:
raise HTTPException(
status_code=_existing_name_response[0],
detail=_dataset_is_fixed_response[1]['description'])
if dataset.user_id != user.user_id:
raise HTTPException(
status_code=_bad_user_response[0],
detail=_bad_user_response[1]['description'])
doi = DOI.create(title='NOMAD dataset: %s' % dataset.name, user=user)
doi.create_draft()
doi.make_findable()
dataset.doi = doi.doi
dataset.save()
return {
'dataset_id': dataset.dataset_id,
'data': dataset}
......@@ -28,6 +28,7 @@ from nomad.metainfo.search_extension import Search
from nomad.metainfo.elastic_extension import ElasticDocument
from nomad.metainfo.mongoengine_extension import Mongo, MongoDocument
from nomad.datamodel.metainfo.common_dft import FastAccess
from nomad.metainfo.pydantic_extension import PydanticModel
from .dft import DFTMetadata
from .ems import EMSMetadata
......@@ -91,6 +92,8 @@ class User(Author):
is_admin: Bool that indicated, iff the user the use admin user
'''
m_def = metainfo.Section(a_pydantic=PydanticModel())
user_id = metainfo.Quantity(
type=str,
a_search=Search())
......@@ -183,8 +186,14 @@ class Dataset(metainfo.MSection):
pid: The original NOMAD CoE Repository dataset PID. Old DOIs still reference
datasets based on this id. Is not used for new datasets.
created: The date when the dataset was first created.
modified: The date when the dataset was last modified. An owned dataset can only
be extended after a DOI was assigned. A foreign dataset cannot be changed
once a DOI was assigned.
dataset_type: The type determined if a dataset is owned, i.e. was created by
the uploader/owner of the contained entries; or if a dataset is foreign,
i.e. it was created by someone not necessarily related to the entries.
'''
m_def = metainfo.Section(a_mongo=MongoDocument())
m_def = metainfo.Section(a_mongo=MongoDocument(), a_pydantic=PydanticModel())
dataset_id = metainfo.Quantity(
type=str,
......@@ -208,6 +217,14 @@ class Dataset(metainfo.MSection):
type=metainfo.Datetime,
a_mongo=Mongo(index=True),
a_search=Search())
modified = metainfo.Quantity(
type=metainfo.Datetime,
a_mongo=Mongo(index=True),
a_search=Search())
dataset_type = metainfo.Quantity(
type=metainfo.MEnum('owned', 'foreign'),
a_mongo=Mongo(index=True),
a_search=Search())
class DatasetReference(metainfo.Reference):
......
......@@ -20,7 +20,7 @@ from flask_restplus import fields
from nomad.app.common import RFC3339DateTime
from .metainfo import Section, Quantity, Datetime, Capitalized
from .metainfo import Section, Quantity, Datetime, Capitalized, MEnum
def field(quantity: Quantity):
......@@ -36,6 +36,8 @@ def field(quantity: Quantity):
field = fields.Boolean
elif quantity.type == Datetime:
field = RFC3339DateTime
elif isinstance(quantity.type, MEnum):
field = fields.String
else:
raise NotImplementedError
......
......@@ -35,7 +35,9 @@ sections from mongoengine. The annotation key is 'mongo'.
from typing import Any, Dict, List
from .metainfo import DefinitionAnnotation, SectionAnnotation, Annotation, MSection, Datetime, Quantity
from .metainfo import (
DefinitionAnnotation, SectionAnnotation, Annotation, MSection, Datetime, Quantity,
MEnum)
class Mongo(DefinitionAnnotation):
......@@ -99,6 +101,8 @@ class MongoDocument(SectionAnnotation):
field = me.BooleanField
elif quantity.type == Datetime:
field = me.DateTimeField
elif isinstance(quantity.type, MEnum):
field = me.StringField
else:
raise NotImplementedError
......
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
Allows to create pydantic models from section definitions.
'''
from typing import cast
from pydantic import create_model, Field, BaseConfig
from datetime import datetime
from .metainfo import DefinitionAnnotation, Definition, Section, Quantity, Datetime, MEnum, Capitalized
class _OrmConfig(BaseConfig):
orm_mode = True
class PydanticModel(DefinitionAnnotation):
'''
This annotation class can be used to extend metainfo sections. It will create a
pydantic model from the section definition. Its a SectionAnnotation and allows
to create pydantic model instances from section instances.
Attributes:
model: The pydantic model that represents the section defintion.
'''
def __init__(self):
self.model = None
def to_pydantic(self, section):
''' Returns the pydantic model instance for the given section. '''
return self.model.from_orm(section)
def init_annotation(self, definition: Definition):
section_definition = cast(Section, definition)
name = section_definition.name
def create_field(quantity: Quantity):
pydantic_type: type = None
if quantity.type == Datetime:
pydantic_type = datetime
elif isinstance(quantity.type, MEnum):
pydantic_type = str
elif quantity.type == Capitalized:
pydantic_type = str
else:
pydantic_type = quantity.type
return pydantic_type, Field(quantity.default, description=quantity.description)
fields = {
name: create_field(quantity)
for name, quantity in section_definition.all_quantities.items()
}
self.model = create_model(name, __config__=_OrmConfig, **fields)
......@@ -956,9 +956,9 @@ def _api_to_es_aggregation(es_search: Search, name: str, agg: Aggregation) -> A:
if order_by is None:
composite = dict(sources={name: terms}, size=agg.pagination.size)
else:
order_quantity = search_quantities[order_by.value]
order_quantity = search_quantities[order_by]
sort_terms = A('terms', field=order_quantity.search_field, order=agg.pagination.order.value)
composite = dict(sources=[{order_by.value: sort_terms}, {quantity.name: terms}], size=agg.pagination.size)
composite = dict(sources=[{order_by: sort_terms}, {quantity.name: terms}], size=agg.pagination.size)
if agg.pagination.after is not None:
if order_by is None:
......@@ -1047,7 +1047,7 @@ def search(
search = Search(