diff --git a/nomad/app/api/mirror.py b/nomad/app/api/mirror.py index 6fbb737b79002b696770ea10ba5f7b9415b2dc05..23d9c2104dc20d00b44b70e03d233f5111cb1295 100644 --- a/nomad/app/api/mirror.py +++ b/nomad/app/api/mirror.py @@ -20,6 +20,8 @@ from flask import request from flask_restplus import Resource, abort, fields from nomad import processing as proc +from nomad.datamodel import Dataset +from nomad.doi import DOI from .api import api from .auth import authenticate @@ -32,6 +34,8 @@ mirror_upload_model = api.model('MirrorUpload', { 'upload_id': fields.String(description='The id of the exported upload'), 'upload': fields.String(description='The upload metadata as mongoengine json string'), 'calcs': fields.List(fields.Raw, description='All upload calculation metadata as mongo SON'), + 'datasets': fields.Raw(description='All upload datasets as dict id->mongo SON'), + 'dois': fields.Raw(description='All upload dois as dict id->mongo SON'), 'upload_files_path': fields.String(description='The path to the local uploads file folder') }) @@ -40,6 +44,8 @@ mirror_query_model = api.model('MirrorQuery', { description='Mongoengine query that is used to search for uploads to mirror.') }) +_Dataset = Dataset.m_def.m_x('me').me_cls + @ns.route('/') class MirrorUploadsResource(Resource): @@ -87,9 +93,23 @@ class MirrorUploadResource(Resource): if upload.tasks_running or upload.process_running: abort(400, message='Only non processing uploads can be exported') + calcs = [] + datasets = {} + dois = {} + for calc in proc.Calc._get_collection().find(dict(upload_id=upload_id)): + calcs.append(calc) + for dataset in calc['metadata'].get('datasets', []): + if dataset not in datasets: + datasets[dataset] = _Dataset._get_collection().find_one(dict(_id=dataset)) + doi = datasets[dataset].get('doi', None) + if doi is not None: + dois[doi] = DOI._get_collection().find_one(dict(_id=doi)) + return { 'upload_id': upload_id, 'upload': upload.to_json(), - 'calcs': [calc for calc in proc.Calc._get_collection().find(dict(upload_id=upload_id))], + 'calcs': calcs, + 'datasets': datasets, + 'dois': dois, 'upload_files_path': upload.upload_files.os_path }, 200 diff --git a/nomad/cli/client/mirror.py b/nomad/cli/client/mirror.py index 2878eb9de1b7e4ad98f46e844df1a2cafc109787..daa8d6db2cc3ec67da00ef207bb86fa5a5fad142 100644 --- a/nomad/cli/client/mirror.py +++ b/nomad/cli/client/mirror.py @@ -23,6 +23,7 @@ import datetime from nomad import utils, processing as proc, search, config, files, infrastructure from nomad.datamodel import Dataset, User +from nomad.doi import DOI from nomad.cli.admin.uploads import delete_upload from .client import client @@ -31,7 +32,7 @@ from .client import client __in_test = False """ Will be monkeypatched by tests to alter behavior for testing. """ -__Dataset = Dataset.m_def.m_x('me').me_cls +_Dataset = Dataset.m_def.m_x('me').me_cls __logger = utils.get_logger(__name__) @@ -42,46 +43,44 @@ def fix_time(data, keys): data[key] = datetime.datetime.utcfromtimestamp(time) -def tarnsform_user_id(source_user_id): - target_user = User.repo_users().get(str(source_user_id)) - if target_user is None: - __logger.error('user does not exist in target', source_user_id=source_user_id) - raise KeyError - - return target_user.user_id - +def v0Dot6(upload_data): + """ Inplace transforms v0.6.x upload data into v0.7.x upload data. """ -def transform_dataset(source_dataset): - pid = str(source_dataset['id']) - target_dataset = __Dataset.objects(pid=pid).first() - if target_dataset is not None: - return target_dataset.dataset_id + def tarnsform_user_id(source_user_id): + target_user = User.repo_users().get(str(source_user_id)) + if target_user is None: + __logger.error('user does not exist in target', source_user_id=source_user_id) + raise KeyError - target_dataset = __Dataset( - dataset_id=utils.create_uuid(), - pid=pid, - name=source_dataset['name']) + return target_user.user_id - if 'doi' in source_dataset and source_dataset['doi'] is not None: - source_doi = source_dataset['doi'] + def transform_dataset(source_dataset): + pid = str(source_dataset['id']) + target_dataset = _Dataset.objects(pid=pid).first() + if target_dataset is not None: + return target_dataset.dataset_id - if isinstance(source_doi, dict): - source_doi = source_doi['value'] + target_dataset = _Dataset( + dataset_id=utils.create_uuid(), + pid=pid, + name=source_dataset['name']) - if source_doi is not None: - target_dataset.doi = source_doi.replace('http://dx.doi.org/', '') + if 'doi' in source_dataset and source_dataset['doi'] is not None: + source_doi = source_dataset['doi'] - target_dataset.save() + if isinstance(source_doi, dict): + source_doi = source_doi['value'] - return target_dataset.dataset_id + if source_doi is not None: + target_dataset.doi = source_doi.replace('http://dx.doi.org/', '') + target_dataset.save() -def transform_reference(reference): - return reference['value'] + return target_dataset.dataset_id + def transform_reference(reference): + return reference['value'] -def v0Dot6(upload_data): - """ Inplace transforms v0.6.x upload data into v0.7.x upload data. """ upload = json.loads(upload_data.upload) upload['user_id'] = tarnsform_user_id(upload['user_id']) upload_data.upload = json.dumps(upload) @@ -248,6 +247,8 @@ def mirror( # In tests, we mirror from our selves, remove it so it is not there for import proc.Calc.objects(upload_id=upload_id).delete() proc.Upload.objects(upload_id=upload_id).delete() + _Dataset.objects().delete() + DOI.objects().delete() search.delete_upload(upload_id) else: n_calcs = 0 @@ -300,6 +301,12 @@ def mirror( if not files_only: # create mongo upload = proc.Upload.from_json(upload_data.upload, created=True).save() + for dataset in upload_data.datasets.values(): + fix_time(dataset, ['created']) + _Dataset._get_collection().insert(dataset) + for doi in upload_data.dois.values(): + fix_time(doi, ['create_time']) + DOI._get_collection().insert(doi) for calc in upload_data.calcs: fix_time(calc, ['create_time', 'complete_time']) fix_time(calc['metadata'], ['upload_time', 'last_processing']) diff --git a/tests/app/test_api.py b/tests/app/test_api.py index 4ef6deb3acc34f19045ff42add31ac14b352b489..b1918f1a8d9ebfbfd8922bc9e293981331935805 100644 --- a/tests/app/test_api.py +++ b/tests/app/test_api.py @@ -1624,6 +1624,36 @@ class TestMirror: data = json.loads(rv.data) assert data[0]['upload_id'] == published.upload_id + def test_dataset(self, api, published_wo_user_metadata, admin_user_auth, test_user_auth): + rv = api.post( + '/repo/edit', headers=test_user_auth, content_type='application/json', + data=json.dumps({ + 'actions': { + 'datasets': [{ + 'value': 'test_dataset' + }] + } + })) + assert rv.status_code == 200 + + rv = api.post('/datasets/test_dataset', headers=test_user_auth) + assert rv.status_code == 200 + + rv = api.post( + '/mirror/', + content_type='application/json', data='{"query":{}}', headers=admin_user_auth) + assert rv.status_code == 200, rv.data + + url = '/mirror/%s' % published_wo_user_metadata.upload_id + rv = api.get(url, headers=admin_user_auth) + assert rv.status_code == 200 + data = json.loads(rv.data) + assert len(data['datasets']) == 1 + dataset = data['calcs'][0]['metadata']['datasets'][0] + assert dataset in data['datasets'] + assert data['datasets'][dataset]['doi'] is not None + assert data['datasets'][dataset]['doi'] in data['dois'] + class TestDataset: diff --git a/tests/test_cli.py b/tests/test_cli.py index dd083ecd3a611f22005318c086c5219038534a04..f5b530314421eee957e714acdba0fad98f1aba4d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -22,6 +22,8 @@ from nomad import utils, search, processing as proc, files from nomad.cli import cli from nomad.processing import Upload, Calc +from tests.app.test_app import BlueprintClient + # TODO there is much more to test @@ -322,6 +324,35 @@ class TestClient: published.upload_files.exists + def test_mirror_datasets(self, client, published_wo_user_metadata, test_user_auth, admin_user_bravado_client, monkeypatch): + # use the API to create dataset and DOI + api = BlueprintClient(client, '/api') + rv = api.post( + '/repo/edit', headers=test_user_auth, content_type='application/json', + data=json.dumps({ + 'actions': { + 'datasets': [{ + 'value': 'test_dataset' + }] + } + })) + assert rv.status_code == 200 + + rv = api.post('/datasets/test_dataset', headers=test_user_auth) + assert rv.status_code == 200 + + # perform the mirror + monkeypatch.setattr('nomad.cli.client.mirror.__in_test', True) + + result = click.testing.CliRunner().invoke( + cli, ['client', 'mirror'], catch_exceptions=False, obj=utils.POPO()) + + assert result.exit_code == 0, result.output + assert published_wo_user_metadata.upload_id in result.output + assert published_wo_user_metadata.upload_files.os_path in result.output + + published_wo_user_metadata.upload_files.exists + def test_statistics(self, client, proc_infra, admin_user_bravado_client): result = click.testing.CliRunner().invoke(