Commit 436c9373 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added dataset models and API. #196

parent 4edf939e
Pipeline #62120 failed with stages
in 15 minutes and 22 seconds
......@@ -26,4 +26,4 @@ There is a separate documentation for the API endpoints from a client perspectiv
"""
from .api import blueprint
from . import info, auth, admin, upload, repo, archive, raw, mirror
from . import info, auth, admin, upload, repo, archive, raw, mirror, dataset
from typing import Dict, Any
from flask import request, g
from flask_restplus import Resource, fields, abort
import mongoengine as me
from nomad import utils
from nomad.metainfo import MSection, Quantity, Section
from nomad.app.utils import with_logger
from .api import api
from .auth import authenticate
from .common import pagination_model, pagination_request_parser
ns = api.namespace(
'datasets',
description='Datasets allow to create sets of related data.')
class Dataset(MSection):
""" A Dataset is attached to one or many entries to form a set of data.
Args:
dataset_id: The unique identifier for this dataset as a string. It should be
a randomly generated UUID, similar to other nomad ids.
name: The human readable name of the dataset as string. The dataset name must be
unique for the user.
user_id: The unique user_id of the owner and creator of this dataset. The owner
must not change after creation.
doi: The optional Document Object Identifier (DOI) associated with this dataset.
Nomad can register DOIs that link back to the respective representation of
the dataset in the nomad UI. This quantity holds the string representation of
this DOI. There is only one per dataset.
"""
dataset_id = Quantity(type=str, a_me=dict(primary_key=True))
name = Quantity(type=str, a_me=dict(index=True))
user_id = Quantity(type=str, a_me=dict(index=True))
doi = Quantity(type=str, a_me=dict(index=True))
def generate_flask_restplus_model(section_def: Section):
def generate_field(quantity: Quantity):
field = None
if quantity.type == int:
field = fields.Integer
elif quantity.type == float:
field = fields.Float
elif quantity.type == str:
field = fields.String
elif quantity.type == bool:
field = fields.Boolean
else:
raise NotImplementedError
result = field(description=quantity.description)
if len(quantity.shape) == 0:
return result
elif len(quantity.shape) == 1:
return fields.List(result)
else:
raise NotImplementedError
return api.model(section_def.name, {
name: generate_field(quantity)
for name, quantity in section_def.all_quantities.items()
})
dataset_model = generate_flask_restplus_model(Dataset.m_def)
dataset_list_model = api.model('DatasetList', {
'pagination': fields.Nested(model=pagination_model),
'results': fields.List(fields.Nested(model=dataset_model, skip_none=True))
})
def generate_mongoengine(section_def: Section):
def generate_field(quantity: Quantity):
annotation = quantity.m_annotations.get('me', {})
annotation.pop('index', None)
field = None
if quantity.type == int:
field = me.IntField
elif quantity.type == float:
field = me.FloatField
elif quantity.type == str:
field = me.StringField
elif quantity.type == bool:
field = me.BooleanField
else:
raise NotImplementedError
result = field(default=quantity.default, **annotation)
if len(quantity.shape) == 0:
return result
elif len(quantity.shape) == 1:
return me.ListField(result)
else:
raise NotImplementedError
indexes = [
quantity.name
for quantity in section_def.all_quantities.values()
if quantity.m_annotations.get('me', {}).get('index', False)]
dct: Dict[str, Any] = dict()
if len(indexes) > 0:
dct.update(meta=dict(indexes=indexes))
dct.update(**{
name: generate_field(quantity)
for name, quantity in section_def.all_quantities.items()
})
return type(section_def.name, (me.Document,), dct)
DatasetME = generate_mongoengine(Dataset.m_def)
@ns.route('/')
class DatasetListResource(Resource):
@api.doc('list_datasets')
@api.marshal_with(dataset_list_model, skip_none=True, code=200, description='Dateset send')
@api.expect(pagination_request_parser)
@authenticate(required=True)
def get(self):
""" Retrieve a list of all datasets of the authenticated user. """
try:
page = int(request.args.get('page', 1))
per_page = int(request.args.get('per_page', 10))
except Exception:
abort(400, message='bad parameter types')
result_query = DatasetME.objects(user_id=g.user.user_id)
return dict(
pagination=dict(total=result_query.count(), page=page, per_page=per_page),
results=result_query[(page - 1) * per_page: page * per_page]), 200
@api.doc('create_dataset')
@api.response(400, 'The provided data is malformed or a dataset with the name already exists')
@api.marshal_with(dataset_model, skip_none=True, code=200, description='Dateset send')
@api.expect(dataset_model)
@authenticate(required=True)
def put(self):
""" Creates a new dataset. """
data = request.get_json()
if data is None:
data = {}
# unique name
name = data.get('name', None)
if name is None:
abort(400, 'Must provide a dataset name.')
if DatasetME.objects(user_id=g.user.user_id, name=name).count() > 0:
abort(400, 'A dataset with name %s does already exist for the current user.' % name)
# only admin can set user or doi
if any(key in data for key in ['user_id', 'doi', 'dataset_id']):
if not g.user.is_admin():
abort(400, 'The dataset contains information you are not allowed to set.')
# no other keys
if any(key not in Dataset.m_def.all_quantities for key in data):
abort(400, 'The dataset contains unknown keys.')
if 'user_id' not in data:
data['user_id'] = g.user.user_id
dataset_id = data.pop('dataset_id', utils.create_uuid())
return DatasetME(dataset_id=dataset_id, **data).save(), 200
@ns.route('/<string:name>')
@api.doc(params=dict(name='The name of the requested dataset.'))
class DatasetResource(Resource):
@api.doc('get_dataset')
@api.response(404, 'The dataset does not exist')
@api.marshal_with(dataset_model, skip_none=True, code=200, description='Dateset send')
@authenticate(required=True)
def get(self, name: str):
""" Retrieve a dataset by name. """
result = DatasetME.objects(user_id=g.user.user_id, name=name).first()
if result is None:
abort(404, 'Dataset with name %s does not exist for current user' % name)
return result
@api.doc('assign_doi')
@api.response(404, 'The dataset does not exist')
@api.response(400, 'The dataset already has a DOI')
@api.marshal_with(dataset_model, skip_none=True, code=200, description='DOI assigned')
@authenticate(required=True)
@with_logger
def post(self, name: str, logger):
""" Assign a DOI to the dataset. """
result = DatasetME.objects(user_id=g.user.user_id, name=name).first()
if result is None:
abort(404, 'Dataset with name %s does not exist for current user' % name)
logger.error('assign datasets is not implemented', user_id=g.user.user_id)
return result
@api.doc('delete_dataset')
@api.response(404, 'The dataset does not exist')
@api.response(400, 'The dataset has a DOI and cannot be deleted')
@api.marshal_with(dataset_model, skip_none=True, code=200, description='Dateset deleted')
@authenticate(required=True)
def delete(self, name: str):
""" Assign a DOI to the dataset. """
result = DatasetME.objects(user_id=g.user.user_id, name=name).first()
if result is None:
abort(404, 'Dataset with name %s does not exist for current user' % name)
if result.doi is not None:
abort(400, 'Dataset with name %s has a DOI and cannot be deleted' % name)
result.delete()
return result
......@@ -30,6 +30,7 @@ from nomad import search, parsing, files, config, utils, infrastructure
from nomad.files import UploadFiles, PublicUploadFiles
from nomad.processing import Upload, Calc, SUCCESS
from nomad.datamodel import UploadWithMetadata, CalcWithMetadata, User
from nomad.app.api.dataset import DatasetME
from tests.conftest import create_auth_headers, clear_elastic
from tests.test_files import example_file, example_file_mainfile, example_file_contents
......@@ -1151,3 +1152,80 @@ class TestMirror:
data = json.loads(rv.data)
assert data[0]['upload_id'] == published.upload_id
class TestDataset:
@pytest.fixture()
def example_datasets(self, mongo, test_user):
DatasetME(dataset_id='1', user_id=test_user.user_id, name='ds1').save()
DatasetME(dataset_id='2', user_id=test_user.user_id, name='ds2', doi='test_doi').save()
def assert_dataset(self, dataset, name: str = None, doi: bool = False):
assert 'dataset_id' in dataset
assert 'user_id' in dataset
assert ('doi' in dataset) == doi
assert dataset.get('name') is not None
if name is not None:
assert dataset.get('name') == name
def test_create_dataset(self, api, test_user_auth):
rv = api.put(
'/datasets/', headers=test_user_auth,
data=json.dumps(dict(name='test_dataset')),
content_type='application/json')
assert rv.status_code == 200
data = json.loads(rv.data)
self.assert_dataset(data, 'test_dataset')
@pytest.mark.parametrize('data', [
dict(name='test_name', doi='something'),
dict(name='test_name', dataset_id='something'),
dict(name='test_name', user_id='something'),
dict(name='test_name', unknown_key='something'),
dict()])
def test_create_dataset_bad_data(self, api, test_user_auth, data):
rv = api.put(
'/datasets/', headers=test_user_auth,
data=json.dumps(data),
content_type='application/json')
assert rv.status_code >= 400
def test_get_datasets(self, api, test_user_auth, example_datasets):
rv = api.get('/datasets/', headers=test_user_auth)
assert rv.status_code == 200
data = json.loads(rv.data)
assert 'pagination' in data
assert data['pagination']['total'] == 2
assert len(data['results']) == 2
for dataset in data['results']:
if dataset['name'] == 'ds2':
self.assert_dataset(dataset, doi=True)
else:
self.assert_dataset(dataset)
def test_get_dataset(self, api, test_user_auth, example_datasets):
rv = api.get('/datasets/ds1', headers=test_user_auth)
assert rv.status_code == 200
data = json.loads(rv.data)
self.assert_dataset(data, name='ds1')
def test_get_dataset_missing(self, api, other_test_user_auth, example_datasets):
rv = api.get('/datasets/ds1', headers=other_test_user_auth)
assert rv.status_code == 404
def test_post_dataset(self, api, test_user_auth, example_datasets):
rv = api.post('/datasets/ds1', headers=test_user_auth)
# TODO the actual DOI part needs to be implemented
assert rv.status_code == 200
def test_delete_dataset(self, api, test_user_auth, example_datasets):
rv = api.delete('/datasets/ds1', headers=test_user_auth)
assert rv.status_code == 200
data = json.loads(rv.data)
self.assert_dataset(data, name='ds1')
api.get('/datasets/ds1', headers=test_user_auth).status_code == 404
def test_get_dataset_with_doi(self, api, test_user_auth, example_datasets):
rv = api.delete('/datasets/ds2', headers=test_user_auth)
assert rv.status_code == 400
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment