Commit 6eb62cd8 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Dataset edit related checks and improvements.

parent e26b46a4
Pipeline #65183 passed with stages
in 19 minutes and 12 seconds
......@@ -301,7 +301,7 @@ export class EntryListUnstyled extends React.Component {
{moreActions}
</React.Fragment>
const selectActions = createActions({query: selectQuery, buttonProps: {color: 'secondary'}})
const allActions = createActions({query: query}, actions)
const allActions = actions
return (
<div className={classes.root}>
......
......@@ -21,7 +21,11 @@ class SearchContext extends React.Component {
}
static emptyResponse = {
statistics: {}
statistics: {
total: {
all: {}
}
}
}
static type = React.createContext()
......
......@@ -16,7 +16,7 @@ from flask import request, g
from flask_restplus import Resource, fields, abort
import re
from nomad import utils
from nomad import utils, processing as proc
from nomad.app.utils import with_logger
from nomad.datamodel import Dataset
from nomad.metainfo.flask_restplus import generate_flask_restplus_model
......@@ -136,6 +136,15 @@ class DatasetResource(Resource):
if result.doi is not None:
abort(400, 'Dataset with name %s already has a DOI' % name)
# check if the DOI can be created
published_values = proc.Calc.objects(metadata__datasets=result.dataset_id).distinct('metadata.published')
if False in published_values:
abort(400, 'Dataset must not contain non published entries.')
if True not in published_values:
abort(400, 'Dataset must not be empty.')
# set the DOI
doi = DOI.create(title='NOMAD dataset: %s' % result.name, user=g.user)
doi.create_draft()
......@@ -161,7 +170,7 @@ class DatasetResource(Resource):
@authenticate(required=True)
@with_logger
def delete(self, name: str, logger):
""" Assign a DOI to the dataset. """
""" Delete the dataset. """
try:
result = Dataset.m_def.m_x('me').get(user_id=g.user.user_id, name=name)
except KeyError:
......
......@@ -20,6 +20,7 @@ meta-data.
from typing import List, Dict, Any
from flask_restplus import Resource, abort, fields
from flask import request, g
from elasticsearch_dsl import Q
from elasticsearch.exceptions import NotFoundError
import elasticsearch.helpers
......@@ -344,43 +345,48 @@ repo_edit_model = api.model('RepoEdit', {
quantity.name: repo_edit_action_field(quantity)
for quantity in UserMetadata.m_def.all_quantities.values()
}), skip_none=True,
description='Each action specifies a single value (even for multi valued quantities).')
description='Each action specifies a single value (even for multi valued quantities).'),
'success': fields.Boolean(description='If the overall edit can/could be done. Only in API response.'),
'message': fields.String(description='A message that details the overall edit result. Only in API response.')
})
def edit(parsed_query: Dict[str, Any], logger, mongo_update: Dict[str, Any] = None, re_index=True) -> List[str]:
# get all calculations that have to change
search_request = search.SearchRequest()
add_query(search_request, parsed_query)
upload_ids = set()
calc_ids = []
for hit in search_request.execute_scan():
calc_ids.append(hit['calc_id'])
upload_ids.add(hit['upload_id'])
with utils.timer(logger, 'edit query executed'):
search_request = search.SearchRequest()
add_query(search_request, parsed_query)
upload_ids = set()
calc_ids = []
for hit in search_request.execute_scan():
calc_ids.append(hit['calc_id'])
upload_ids.add(hit['upload_id'])
# perform the update on the mongo db
if mongo_update is not None:
n_updated = proc.Calc.objects(calc_id__in=calc_ids).update(multi=True, **mongo_update)
if n_updated != len(calc_ids):
logger.error('edit repo did not update all entries', payload=mongo_update)
with utils.timer(logger, 'edit mongo update executed', size=len(calc_ids)):
if mongo_update is not None:
n_updated = proc.Calc.objects(calc_id__in=calc_ids).update(multi=True, **mongo_update)
if n_updated != len(calc_ids):
logger.error('edit repo did not update all entries', payload=mongo_update)
# re-index the affected entries in elastic search
if re_index:
def elastic_updates():
for calc in proc.Calc.objects(calc_id__in=calc_ids):
entry = search.Entry.from_calc_with_metadata(
datamodel.CalcWithMetadata(**calc['metadata']))
entry = entry.to_dict(include_meta=True)
entry['_op_type'] = 'index'
yield entry
_, failed = elasticsearch.helpers.bulk(
infrastructure.elastic_client, elastic_updates(), stats_only=True)
search.refresh()
if failed > 0:
logger.error(
'edit repo with failed elastic updates',
payload=mongo_update, nfailed=len(failed))
with utils.timer(logger, 'edit elastic update executed', size=len(calc_ids)):
if re_index:
def elastic_updates():
for calc in proc.Calc.objects(calc_id__in=calc_ids):
entry = search.Entry.from_calc_with_metadata(
datamodel.CalcWithMetadata(**calc['metadata']))
entry = entry.to_dict(include_meta=True)
entry['_op_type'] = 'index'
yield entry
_, failed = elasticsearch.helpers.bulk(
infrastructure.elastic_client, elastic_updates(), stats_only=True)
search.refresh()
if failed > 0:
logger.error(
'edit repo with failed elastic updates',
payload=mongo_update, nfailed=len(failed))
return list(upload_ids)
......@@ -431,91 +437,127 @@ class EditRepoCalcsResource(Resource):
parsed_query['owner'] = owner
# checking the edit actions and preparing a mongo update on the fly
json_data['success'] = True
mongo_update = {}
uploader_ids = None
lift_embargo = False
for action_quantity_name, quantity_actions in actions.items():
quantity = UserMetadata.m_def.all_quantities.get(action_quantity_name)
if quantity is None:
abort(400, 'Unknown quantity %s' % action_quantity_name)
quantity_flask = quantity.m_x('flask', {})
if quantity_flask.get('admin_only', False):
if not g.user.is_admin():
abort(404, 'Only the admin user can set %s' % quantity.name)
if isinstance(quantity_actions, list) == quantity.is_scalar:
abort(400, 'Wrong shape for quantity %s' % action_quantity_name)
if not isinstance(quantity_actions, list):
quantity_actions = [quantity_actions]
flask_verify = quantity_flask.get('verify', None)
mongo_key = 'metadata__%s' % quantity.name
has_error = False
for action in quantity_actions:
action['success'] = True
action['message'] = None
action_value = action.get('value')
action_value = action_value if action_value is None else action_value.strip()
if action_value is None:
mongo_value = None
elif action_value == '':
mongo_value = None
elif flask_verify == datamodel.User:
try:
mongo_value = User.get(user_id=action_value).user_id
except KeyError:
action['success'] = False
has_error = True
action['message'] = 'User does not exist'
continue
removed_datasets = None
with utils.timer(logger, 'edit verified'):
for action_quantity_name, quantity_actions in actions.items():
quantity = UserMetadata.m_def.all_quantities.get(action_quantity_name)
if quantity is None:
abort(400, 'Unknown quantity %s' % action_quantity_name)
quantity_flask = quantity.m_x('flask', {})
if quantity_flask.get('admin_only', False):
if not g.user.is_admin():
abort(404, 'Only the admin user can set %s' % quantity.name)
if isinstance(quantity_actions, list) == quantity.is_scalar:
abort(400, 'Wrong shape for quantity %s' % action_quantity_name)
if not isinstance(quantity_actions, list):
quantity_actions = [quantity_actions]
flask_verify = quantity_flask.get('verify', None)
mongo_key = 'metadata__%s' % quantity.name
has_error = False
for action in quantity_actions:
action['success'] = True
action['message'] = None
action_value = action.get('value')
action_value = action_value if action_value is None else action_value.strip()
if action_value is None:
mongo_value = None
if uploader_ids is None:
uploader_ids = get_uploader_ids(parsed_query)
if action_value in uploader_ids:
action['success'] = False
has_error = True
action['message'] = 'This user is already an uploader of one entry in the query'
continue
elif flask_verify == datamodel.Dataset:
try:
mongo_value = Dataset.m_def.m_x('me').get(
user_id=g.user.user_id, name=action_value).dataset_id
except KeyError:
action['message'] = 'Dataset does not exist and will be created'
elif action_value == '':
mongo_value = None
if not verify:
dataset = Dataset(
dataset_id=utils.create_uuid(), user_id=g.user.user_id,
name=action_value)
dataset.m_x('me').create()
mongo_value = dataset.dataset_id
elif action_quantity_name == 'with_embargo':
# ignore the actual value ... just lift the embargo
mongo_value = False
lift_embargo = True
else:
mongo_value = action_value
if len(quantity.shape) == 0:
mongo_update[mongo_key] = mongo_value
else:
mongo_values = mongo_update.setdefault(mongo_key, [])
if mongo_value is not None:
if mongo_value in mongo_values:
elif flask_verify == datamodel.User:
try:
mongo_value = User.get(user_id=action_value).user_id
except KeyError:
action['success'] = False
has_error = True
action['message'] = 'Duplicate values are not allowed'
action['message'] = 'User does not exist'
continue
mongo_values.append(mongo_value)
if len(quantity_actions) == 0 and len(quantity.shape) > 0:
mongo_update[mongo_key] = []
if uploader_ids is None:
uploader_ids = get_uploader_ids(parsed_query)
if action_value in uploader_ids:
action['success'] = False
has_error = True
action['message'] = 'This user is already an uploader of one entry in the query'
continue
elif flask_verify == datamodel.Dataset:
try:
mongo_value = Dataset.m_def.m_x('me').get(
user_id=g.user.user_id, name=action_value).dataset_id
except KeyError:
action['message'] = 'Dataset does not exist and will be created'
mongo_value = None
if not verify:
dataset = Dataset(
dataset_id=utils.create_uuid(), user_id=g.user.user_id,
name=action_value)
dataset.m_x('me').create()
mongo_value = dataset.dataset_id
elif action_quantity_name == 'with_embargo':
# ignore the actual value ... just lift the embargo
mongo_value = False
lift_embargo = True
# check if necessary
search_request = search.SearchRequest()
add_query(search_request, parsed_query)
search_request.q = search_request.q & Q('term', with_embargo=True)
if search_request.execute()['total'] == 0:
action['success'] = False
has_error = True
action['message'] = 'There is no embargo to lift'
continue
else:
mongo_value = action_value
if len(quantity.shape) == 0:
mongo_update[mongo_key] = mongo_value
else:
mongo_values = mongo_update.setdefault(mongo_key, [])
if mongo_value is not None:
if mongo_value in mongo_values:
action['success'] = False
has_error = True
action['message'] = 'Duplicate values are not allowed'
continue
mongo_values.append(mongo_value)
if len(quantity_actions) == 0 and len(quantity.shape) > 0:
mongo_update[mongo_key] = []
if action_quantity_name == 'datasets':
# check if datasets edit is allowed and if datasets have to be removed
search_request = search.SearchRequest()
add_query(search_request, parsed_query)
search_request.quantity(name='dataset_id')
old_datasets = list(
search_request.execute()['quantities']['dataset_id']['values'].keys())
removed_datasets = []
for dataset_id in old_datasets:
if dataset_id not in mongo_update.get(mongo_key, []):
removed_datasets.append(dataset_id)
doi_ds = Dataset.m_def.m_x('me').objects(
dataset_id__in=removed_datasets, doi__ne=None).first()
if doi_ds is not None:
json_data['success'] = False
json_data['message'] = json_data.get('message', '') + \
'Edit would remove entries from a dataset with DOI (%s) ' % doi_ds.name
has_error = True
# stop here, if client just wants to verify its actions
if verify:
......@@ -534,6 +576,10 @@ class EditRepoCalcsResource(Resource):
upload = proc.Upload.get(upload_id)
upload.re_pack()
# remove old datasets
if removed_datasets is not None:
Dataset.m_def.m_x('me').objects(dataset_id__in=removed_datasets).delete()
return json_data, 200
......
......@@ -74,7 +74,9 @@ class Calc(Proc):
('upload_id', 'parser'),
('upload_id', 'tasks_status'),
('upload_id', 'process_status'),
('upload_id', 'metadata.nomad_version')
('upload_id', 'metadata.nomad_version'),
'metadata.published',
'metadata.datasets'
]
}
......
......@@ -36,6 +36,7 @@ from tests.test_files import example_file, example_file_mainfile, example_file_c
from tests.test_files import create_staging_upload, create_public_upload, assert_upload_files
from tests.test_search import assert_search_upload
from tests.processing import test_data as test_processing
from tests.utils import assert_exception
from tests.app.test_app import BlueprintClient
......@@ -1189,6 +1190,28 @@ class TestEditRepo():
self.assert_edit(rv, quantity='datasets', success=True, message=False)
assert self.mongo(1, datasets=[self.example_dataset.dataset_id])
def test_edit_ds_remove_doi(self):
rv = self.perform_edit(
datasets=[self.example_dataset.name], query=dict(upload_id='upload_1'))
assert rv.status_code == 200
rv = self.api.post('/datasets/%s' % self.example_dataset.name, headers=self.test_user_auth)
assert rv.status_code == 200
rv = self.perform_edit(datasets=[], query=dict(upload_id='upload_1'))
assert rv.status_code == 400
data = json.loads(rv.data)
assert not data['success']
assert self.example_dataset.name in data['message']
assert Dataset.m_def.m_x('me').get(dataset_id=self.example_dataset.dataset_id) is not None
def test_edit_ds_remove(self):
rv = self.perform_edit(
datasets=[self.example_dataset.name], query=dict(upload_id='upload_1'))
assert rv.status_code == 200
rv = self.perform_edit(datasets=[], query=dict(upload_id='upload_1'))
assert rv.status_code == 200
with assert_exception(KeyError):
assert Dataset.m_def.m_x('me').get(dataset_id=self.example_dataset.dataset_id) is None
def test_edit_ds_user_namespace(self, test_user):
assert Dataset.m_def.m_x('me').objects(
name=self.other_example_dataset.name).first() is not None
......@@ -1246,6 +1269,24 @@ def test_edit_lift_embargo(api, published, other_test_user_auth):
f.read()
@pytest.mark.timeout(config.tests.default_timeout)
def test_edit_lift_embargo_unnecessary(api, published_wo_user_metadata, other_test_user_auth):
example_calc = Calc.objects(upload_id=published_wo_user_metadata.upload_id).first()
assert not example_calc.metadata['with_embargo']
rv = api.post(
'/repo/edit', headers=other_test_user_auth, content_type='application/json',
data=json.dumps({
'actions': {
'with_embargo': {
'value': 'lift'
}
}
}))
assert rv.status_code == 400
data = json.loads(rv.data)
assert not data['actions']['with_embargo']['success']
class TestRaw(UploadFilesBasedTests):
def assert_zip_file(self, rv, files: int = -1, basename: bool = None):
......@@ -1607,7 +1648,20 @@ class TestDataset:
self.assert_dataset(data, name='ds1', doi=True)
self.assert_dataset_entry(api, '1', True, True, headers=test_user_auth)
def test_resolve_doi(self, api, example_dataset_with_entry):
def test_assign_doi_empty(self, api, test_user_auth, example_datasets):
rv = api.post('/datasets/ds1', headers=test_user_auth)
assert rv.status_code == 400
def test_assign_doi_unpublished(self, api, test_user_auth, example_datasets):
calc = CalcWithMetadata(
calc_id='1', upload_id='1', published=False, with_embargo=False, datasets=['1'])
Calc(
calc_id='1', upload_id='1', create_time=datetime.datetime.now(),
metadata=calc.to_dict()).save()
rv = api.post('/datasets/ds1', headers=test_user_auth)
assert rv.status_code == 400
def test_resolve_doi(self, api, example_datasets):
rv = api.get('/datasets/doi/test_doi')
assert rv.status_code == 200
data = json.loads(rv.data)
......
......@@ -581,6 +581,21 @@ def published(non_empty_processed: processing.Upload, example_user_metadata) ->
return non_empty_processed
@pytest.mark.timeout(config.tests.default_timeout)
@pytest.fixture(scope='function')
def published_wo_user_metadata(non_empty_processed: processing.Upload) -> processing.Upload:
"""
Provides a processed upload. Upload was uploaded with test_user.
"""
non_empty_processed.publish_upload()
try:
non_empty_processed.block_until_complete(interval=.01)
except Exception:
pass
return non_empty_processed
@pytest.fixture
def reset_config():
""" Fixture that resets the log-level after test. """
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment