From 6eb62cd8309abc9f7f2b678ad43bb388ca0f1499 Mon Sep 17 00:00:00 2001 From: Markus Scheidgen <markus.scheidgen@gmail.com> Date: Mon, 9 Dec 2019 13:40:08 +0100 Subject: [PATCH] Dataset edit related checks and improvements. --- gui/src/components/search/EntryList.js | 2 +- gui/src/components/search/SearchContext.js | 6 +- nomad/app/api/dataset.py | 13 +- nomad/app/api/repo.py | 254 ++++++++++++--------- nomad/processing/data.py | 4 +- tests/app/test_api.py | 56 ++++- tests/conftest.py | 15 ++ 7 files changed, 240 insertions(+), 110 deletions(-) diff --git a/gui/src/components/search/EntryList.js b/gui/src/components/search/EntryList.js index 3804969535..3d18297641 100644 --- a/gui/src/components/search/EntryList.js +++ b/gui/src/components/search/EntryList.js @@ -301,7 +301,7 @@ export class EntryListUnstyled extends React.Component { {moreActions} </React.Fragment> const selectActions = createActions({query: selectQuery, buttonProps: {color: 'secondary'}}) - const allActions = createActions({query: query}, actions) + const allActions = actions return ( <div className={classes.root}> diff --git a/gui/src/components/search/SearchContext.js b/gui/src/components/search/SearchContext.js index 50ec415092..3c3c9b7396 100644 --- a/gui/src/components/search/SearchContext.js +++ b/gui/src/components/search/SearchContext.js @@ -21,7 +21,11 @@ class SearchContext extends React.Component { } static emptyResponse = { - statistics: {} + statistics: { + total: { + all: {} + } + } } static type = React.createContext() diff --git a/nomad/app/api/dataset.py b/nomad/app/api/dataset.py index aec01cf5fa..0876b390b7 100644 --- a/nomad/app/api/dataset.py +++ b/nomad/app/api/dataset.py @@ -16,7 +16,7 @@ from flask import request, g from flask_restplus import Resource, fields, abort import re -from nomad import utils +from nomad import utils, processing as proc from nomad.app.utils import with_logger from nomad.datamodel import Dataset from nomad.metainfo.flask_restplus import generate_flask_restplus_model @@ -136,6 +136,15 @@ class DatasetResource(Resource): if result.doi is not None: abort(400, 'Dataset with name %s already has a DOI' % name) + # check if the DOI can be created + published_values = proc.Calc.objects(metadata__datasets=result.dataset_id).distinct('metadata.published') + + if False in published_values: + abort(400, 'Dataset must not contain non published entries.') + + if True not in published_values: + abort(400, 'Dataset must not be empty.') + # set the DOI doi = DOI.create(title='NOMAD dataset: %s' % result.name, user=g.user) doi.create_draft() @@ -161,7 +170,7 @@ class DatasetResource(Resource): @authenticate(required=True) @with_logger def delete(self, name: str, logger): - """ Assign a DOI to the dataset. """ + """ Delete the dataset. """ try: result = Dataset.m_def.m_x('me').get(user_id=g.user.user_id, name=name) except KeyError: diff --git a/nomad/app/api/repo.py b/nomad/app/api/repo.py index 78a8bc269e..0ca5280c25 100644 --- a/nomad/app/api/repo.py +++ b/nomad/app/api/repo.py @@ -20,6 +20,7 @@ meta-data. from typing import List, Dict, Any from flask_restplus import Resource, abort, fields from flask import request, g +from elasticsearch_dsl import Q from elasticsearch.exceptions import NotFoundError import elasticsearch.helpers @@ -344,43 +345,48 @@ repo_edit_model = api.model('RepoEdit', { quantity.name: repo_edit_action_field(quantity) for quantity in UserMetadata.m_def.all_quantities.values() }), skip_none=True, - description='Each action specifies a single value (even for multi valued quantities).') + description='Each action specifies a single value (even for multi valued quantities).'), + 'success': fields.Boolean(description='If the overall edit can/could be done. Only in API response.'), + 'message': fields.String(description='A message that details the overall edit result. Only in API response.') }) def edit(parsed_query: Dict[str, Any], logger, mongo_update: Dict[str, Any] = None, re_index=True) -> List[str]: # get all calculations that have to change - search_request = search.SearchRequest() - add_query(search_request, parsed_query) - upload_ids = set() - calc_ids = [] - for hit in search_request.execute_scan(): - calc_ids.append(hit['calc_id']) - upload_ids.add(hit['upload_id']) + with utils.timer(logger, 'edit query executed'): + search_request = search.SearchRequest() + add_query(search_request, parsed_query) + upload_ids = set() + calc_ids = [] + for hit in search_request.execute_scan(): + calc_ids.append(hit['calc_id']) + upload_ids.add(hit['upload_id']) # perform the update on the mongo db - if mongo_update is not None: - n_updated = proc.Calc.objects(calc_id__in=calc_ids).update(multi=True, **mongo_update) - if n_updated != len(calc_ids): - logger.error('edit repo did not update all entries', payload=mongo_update) + with utils.timer(logger, 'edit mongo update executed', size=len(calc_ids)): + if mongo_update is not None: + n_updated = proc.Calc.objects(calc_id__in=calc_ids).update(multi=True, **mongo_update) + if n_updated != len(calc_ids): + logger.error('edit repo did not update all entries', payload=mongo_update) # re-index the affected entries in elastic search - if re_index: - def elastic_updates(): - for calc in proc.Calc.objects(calc_id__in=calc_ids): - entry = search.Entry.from_calc_with_metadata( - datamodel.CalcWithMetadata(**calc['metadata'])) - entry = entry.to_dict(include_meta=True) - entry['_op_type'] = 'index' - yield entry - - _, failed = elasticsearch.helpers.bulk( - infrastructure.elastic_client, elastic_updates(), stats_only=True) - search.refresh() - if failed > 0: - logger.error( - 'edit repo with failed elastic updates', - payload=mongo_update, nfailed=len(failed)) + with utils.timer(logger, 'edit elastic update executed', size=len(calc_ids)): + if re_index: + def elastic_updates(): + for calc in proc.Calc.objects(calc_id__in=calc_ids): + entry = search.Entry.from_calc_with_metadata( + datamodel.CalcWithMetadata(**calc['metadata'])) + entry = entry.to_dict(include_meta=True) + entry['_op_type'] = 'index' + yield entry + + _, failed = elasticsearch.helpers.bulk( + infrastructure.elastic_client, elastic_updates(), stats_only=True) + search.refresh() + if failed > 0: + logger.error( + 'edit repo with failed elastic updates', + payload=mongo_update, nfailed=len(failed)) return list(upload_ids) @@ -431,91 +437,127 @@ class EditRepoCalcsResource(Resource): parsed_query['owner'] = owner # checking the edit actions and preparing a mongo update on the fly + json_data['success'] = True mongo_update = {} uploader_ids = None lift_embargo = False - for action_quantity_name, quantity_actions in actions.items(): - quantity = UserMetadata.m_def.all_quantities.get(action_quantity_name) - if quantity is None: - abort(400, 'Unknown quantity %s' % action_quantity_name) - - quantity_flask = quantity.m_x('flask', {}) - if quantity_flask.get('admin_only', False): - if not g.user.is_admin(): - abort(404, 'Only the admin user can set %s' % quantity.name) - - if isinstance(quantity_actions, list) == quantity.is_scalar: - abort(400, 'Wrong shape for quantity %s' % action_quantity_name) - - if not isinstance(quantity_actions, list): - quantity_actions = [quantity_actions] - - flask_verify = quantity_flask.get('verify', None) - mongo_key = 'metadata__%s' % quantity.name - has_error = False - for action in quantity_actions: - action['success'] = True - action['message'] = None - action_value = action.get('value') - action_value = action_value if action_value is None else action_value.strip() - - if action_value is None: - mongo_value = None - - elif action_value == '': - mongo_value = None - - elif flask_verify == datamodel.User: - try: - mongo_value = User.get(user_id=action_value).user_id - except KeyError: - action['success'] = False - has_error = True - action['message'] = 'User does not exist' - continue + removed_datasets = None + + with utils.timer(logger, 'edit verified'): + for action_quantity_name, quantity_actions in actions.items(): + quantity = UserMetadata.m_def.all_quantities.get(action_quantity_name) + if quantity is None: + abort(400, 'Unknown quantity %s' % action_quantity_name) + + quantity_flask = quantity.m_x('flask', {}) + if quantity_flask.get('admin_only', False): + if not g.user.is_admin(): + abort(404, 'Only the admin user can set %s' % quantity.name) + + if isinstance(quantity_actions, list) == quantity.is_scalar: + abort(400, 'Wrong shape for quantity %s' % action_quantity_name) + + if not isinstance(quantity_actions, list): + quantity_actions = [quantity_actions] + + flask_verify = quantity_flask.get('verify', None) + mongo_key = 'metadata__%s' % quantity.name + has_error = False + for action in quantity_actions: + action['success'] = True + action['message'] = None + action_value = action.get('value') + action_value = action_value if action_value is None else action_value.strip() + + if action_value is None: + mongo_value = None - if uploader_ids is None: - uploader_ids = get_uploader_ids(parsed_query) - if action_value in uploader_ids: - action['success'] = False - has_error = True - action['message'] = 'This user is already an uploader of one entry in the query' - continue - - elif flask_verify == datamodel.Dataset: - try: - mongo_value = Dataset.m_def.m_x('me').get( - user_id=g.user.user_id, name=action_value).dataset_id - except KeyError: - action['message'] = 'Dataset does not exist and will be created' + elif action_value == '': mongo_value = None - if not verify: - dataset = Dataset( - dataset_id=utils.create_uuid(), user_id=g.user.user_id, - name=action_value) - dataset.m_x('me').create() - mongo_value = dataset.dataset_id - elif action_quantity_name == 'with_embargo': - # ignore the actual value ... just lift the embargo - mongo_value = False - lift_embargo = True - else: - mongo_value = action_value - - if len(quantity.shape) == 0: - mongo_update[mongo_key] = mongo_value - else: - mongo_values = mongo_update.setdefault(mongo_key, []) - if mongo_value is not None: - if mongo_value in mongo_values: + + elif flask_verify == datamodel.User: + try: + mongo_value = User.get(user_id=action_value).user_id + except KeyError: action['success'] = False has_error = True - action['message'] = 'Duplicate values are not allowed' + action['message'] = 'User does not exist' continue - mongo_values.append(mongo_value) - if len(quantity_actions) == 0 and len(quantity.shape) > 0: - mongo_update[mongo_key] = [] + if uploader_ids is None: + uploader_ids = get_uploader_ids(parsed_query) + if action_value in uploader_ids: + action['success'] = False + has_error = True + action['message'] = 'This user is already an uploader of one entry in the query' + continue + + elif flask_verify == datamodel.Dataset: + try: + mongo_value = Dataset.m_def.m_x('me').get( + user_id=g.user.user_id, name=action_value).dataset_id + except KeyError: + action['message'] = 'Dataset does not exist and will be created' + mongo_value = None + if not verify: + dataset = Dataset( + dataset_id=utils.create_uuid(), user_id=g.user.user_id, + name=action_value) + dataset.m_x('me').create() + mongo_value = dataset.dataset_id + + elif action_quantity_name == 'with_embargo': + # ignore the actual value ... just lift the embargo + mongo_value = False + lift_embargo = True + + # check if necessary + search_request = search.SearchRequest() + add_query(search_request, parsed_query) + search_request.q = search_request.q & Q('term', with_embargo=True) + if search_request.execute()['total'] == 0: + action['success'] = False + has_error = True + action['message'] = 'There is no embargo to lift' + continue + else: + mongo_value = action_value + + if len(quantity.shape) == 0: + mongo_update[mongo_key] = mongo_value + else: + mongo_values = mongo_update.setdefault(mongo_key, []) + if mongo_value is not None: + if mongo_value in mongo_values: + action['success'] = False + has_error = True + action['message'] = 'Duplicate values are not allowed' + continue + mongo_values.append(mongo_value) + + if len(quantity_actions) == 0 and len(quantity.shape) > 0: + mongo_update[mongo_key] = [] + + if action_quantity_name == 'datasets': + # check if datasets edit is allowed and if datasets have to be removed + search_request = search.SearchRequest() + add_query(search_request, parsed_query) + search_request.quantity(name='dataset_id') + old_datasets = list( + search_request.execute()['quantities']['dataset_id']['values'].keys()) + + removed_datasets = [] + for dataset_id in old_datasets: + if dataset_id not in mongo_update.get(mongo_key, []): + removed_datasets.append(dataset_id) + + doi_ds = Dataset.m_def.m_x('me').objects( + dataset_id__in=removed_datasets, doi__ne=None).first() + if doi_ds is not None: + json_data['success'] = False + json_data['message'] = json_data.get('message', '') + \ + 'Edit would remove entries from a dataset with DOI (%s) ' % doi_ds.name + has_error = True # stop here, if client just wants to verify its actions if verify: @@ -534,6 +576,10 @@ class EditRepoCalcsResource(Resource): upload = proc.Upload.get(upload_id) upload.re_pack() + # remove old datasets + if removed_datasets is not None: + Dataset.m_def.m_x('me').objects(dataset_id__in=removed_datasets).delete() + return json_data, 200 diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 39f6950d8d..1e534737e2 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -74,7 +74,9 @@ class Calc(Proc): ('upload_id', 'parser'), ('upload_id', 'tasks_status'), ('upload_id', 'process_status'), - ('upload_id', 'metadata.nomad_version') + ('upload_id', 'metadata.nomad_version'), + 'metadata.published', + 'metadata.datasets' ] } diff --git a/tests/app/test_api.py b/tests/app/test_api.py index 856afb3545..b4e83b0ed3 100644 --- a/tests/app/test_api.py +++ b/tests/app/test_api.py @@ -36,6 +36,7 @@ from tests.test_files import example_file, example_file_mainfile, example_file_c from tests.test_files import create_staging_upload, create_public_upload, assert_upload_files from tests.test_search import assert_search_upload from tests.processing import test_data as test_processing +from tests.utils import assert_exception from tests.app.test_app import BlueprintClient @@ -1189,6 +1190,28 @@ class TestEditRepo(): self.assert_edit(rv, quantity='datasets', success=True, message=False) assert self.mongo(1, datasets=[self.example_dataset.dataset_id]) + def test_edit_ds_remove_doi(self): + rv = self.perform_edit( + datasets=[self.example_dataset.name], query=dict(upload_id='upload_1')) + assert rv.status_code == 200 + rv = self.api.post('/datasets/%s' % self.example_dataset.name, headers=self.test_user_auth) + assert rv.status_code == 200 + rv = self.perform_edit(datasets=[], query=dict(upload_id='upload_1')) + assert rv.status_code == 400 + data = json.loads(rv.data) + assert not data['success'] + assert self.example_dataset.name in data['message'] + assert Dataset.m_def.m_x('me').get(dataset_id=self.example_dataset.dataset_id) is not None + + def test_edit_ds_remove(self): + rv = self.perform_edit( + datasets=[self.example_dataset.name], query=dict(upload_id='upload_1')) + assert rv.status_code == 200 + rv = self.perform_edit(datasets=[], query=dict(upload_id='upload_1')) + assert rv.status_code == 200 + with assert_exception(KeyError): + assert Dataset.m_def.m_x('me').get(dataset_id=self.example_dataset.dataset_id) is None + def test_edit_ds_user_namespace(self, test_user): assert Dataset.m_def.m_x('me').objects( name=self.other_example_dataset.name).first() is not None @@ -1246,6 +1269,24 @@ def test_edit_lift_embargo(api, published, other_test_user_auth): f.read() +@pytest.mark.timeout(config.tests.default_timeout) +def test_edit_lift_embargo_unnecessary(api, published_wo_user_metadata, other_test_user_auth): + example_calc = Calc.objects(upload_id=published_wo_user_metadata.upload_id).first() + assert not example_calc.metadata['with_embargo'] + rv = api.post( + '/repo/edit', headers=other_test_user_auth, content_type='application/json', + data=json.dumps({ + 'actions': { + 'with_embargo': { + 'value': 'lift' + } + } + })) + assert rv.status_code == 400 + data = json.loads(rv.data) + assert not data['actions']['with_embargo']['success'] + + class TestRaw(UploadFilesBasedTests): def assert_zip_file(self, rv, files: int = -1, basename: bool = None): @@ -1607,7 +1648,20 @@ class TestDataset: self.assert_dataset(data, name='ds1', doi=True) self.assert_dataset_entry(api, '1', True, True, headers=test_user_auth) - def test_resolve_doi(self, api, example_dataset_with_entry): + def test_assign_doi_empty(self, api, test_user_auth, example_datasets): + rv = api.post('/datasets/ds1', headers=test_user_auth) + assert rv.status_code == 400 + + def test_assign_doi_unpublished(self, api, test_user_auth, example_datasets): + calc = CalcWithMetadata( + calc_id='1', upload_id='1', published=False, with_embargo=False, datasets=['1']) + Calc( + calc_id='1', upload_id='1', create_time=datetime.datetime.now(), + metadata=calc.to_dict()).save() + rv = api.post('/datasets/ds1', headers=test_user_auth) + assert rv.status_code == 400 + + def test_resolve_doi(self, api, example_datasets): rv = api.get('/datasets/doi/test_doi') assert rv.status_code == 200 data = json.loads(rv.data) diff --git a/tests/conftest.py b/tests/conftest.py index 96bf7d06d3..17bfbba924 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -581,6 +581,21 @@ def published(non_empty_processed: processing.Upload, example_user_metadata) -> return non_empty_processed +@pytest.mark.timeout(config.tests.default_timeout) +@pytest.fixture(scope='function') +def published_wo_user_metadata(non_empty_processed: processing.Upload) -> processing.Upload: + """ + Provides a processed upload. Upload was uploaded with test_user. + """ + non_empty_processed.publish_upload() + try: + non_empty_processed.block_until_complete(interval=.01) + except Exception: + pass + + return non_empty_processed + + @pytest.fixture def reset_config(): """ Fixture that resets the log-level after test. """ -- GitLab