Commit e721afd1 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Mirror functions support datasets and DOIs.

parent 71ec4508
Pipeline #69263 failed with stages
in 13 minutes and 43 seconds
......@@ -20,6 +20,8 @@ from flask import request
from flask_restplus import Resource, abort, fields
from nomad import processing as proc
from nomad.datamodel import Dataset
from nomad.doi import DOI
from .api import api
from .auth import authenticate
......@@ -32,6 +34,8 @@ mirror_upload_model = api.model('MirrorUpload', {
'upload_id': fields.String(description='The id of the exported upload'),
'upload': fields.String(description='The upload metadata as mongoengine json string'),
'calcs': fields.List(fields.Raw, description='All upload calculation metadata as mongo SON'),
'datasets': fields.Raw(description='All upload datasets as dict id->mongo SON'),
'dois': fields.Raw(description='All upload dois as dict id->mongo SON'),
'upload_files_path': fields.String(description='The path to the local uploads file folder')
})
......@@ -40,6 +44,8 @@ mirror_query_model = api.model('MirrorQuery', {
description='Mongoengine query that is used to search for uploads to mirror.')
})
_Dataset = Dataset.m_def.m_x('me').me_cls
@ns.route('/')
class MirrorUploadsResource(Resource):
......@@ -87,9 +93,23 @@ class MirrorUploadResource(Resource):
if upload.tasks_running or upload.process_running:
abort(400, message='Only non processing uploads can be exported')
calcs = []
datasets = {}
dois = {}
for calc in proc.Calc._get_collection().find(dict(upload_id=upload_id)):
calcs.append(calc)
for dataset in calc['metadata'].get('datasets', []):
if dataset not in datasets:
datasets[dataset] = _Dataset._get_collection().find_one(dict(_id=dataset))
doi = datasets[dataset].get('doi', None)
if doi is not None:
dois[doi] = DOI._get_collection().find_one(dict(_id=doi))
return {
'upload_id': upload_id,
'upload': upload.to_json(),
'calcs': [calc for calc in proc.Calc._get_collection().find(dict(upload_id=upload_id))],
'calcs': calcs,
'datasets': datasets,
'dois': dois,
'upload_files_path': upload.upload_files.os_path
}, 200
......@@ -23,6 +23,7 @@ import datetime
from nomad import utils, processing as proc, search, config, files, infrastructure
from nomad.datamodel import Dataset, User
from nomad.doi import DOI
from nomad.cli.admin.uploads import delete_upload
from .client import client
......@@ -31,7 +32,7 @@ from .client import client
__in_test = False
""" Will be monkeypatched by tests to alter behavior for testing. """
__Dataset = Dataset.m_def.m_x('me').me_cls
_Dataset = Dataset.m_def.m_x('me').me_cls
__logger = utils.get_logger(__name__)
......@@ -42,46 +43,44 @@ def fix_time(data, keys):
data[key] = datetime.datetime.utcfromtimestamp(time)
def tarnsform_user_id(source_user_id):
target_user = User.repo_users().get(str(source_user_id))
if target_user is None:
__logger.error('user does not exist in target', source_user_id=source_user_id)
raise KeyError
return target_user.user_id
def v0Dot6(upload_data):
""" Inplace transforms v0.6.x upload data into v0.7.x upload data. """
def transform_dataset(source_dataset):
pid = str(source_dataset['id'])
target_dataset = __Dataset.objects(pid=pid).first()
if target_dataset is not None:
return target_dataset.dataset_id
def tarnsform_user_id(source_user_id):
target_user = User.repo_users().get(str(source_user_id))
if target_user is None:
__logger.error('user does not exist in target', source_user_id=source_user_id)
raise KeyError
target_dataset = __Dataset(
dataset_id=utils.create_uuid(),
pid=pid,
name=source_dataset['name'])
return target_user.user_id
if 'doi' in source_dataset and source_dataset['doi'] is not None:
source_doi = source_dataset['doi']
def transform_dataset(source_dataset):
pid = str(source_dataset['id'])
target_dataset = _Dataset.objects(pid=pid).first()
if target_dataset is not None:
return target_dataset.dataset_id
if isinstance(source_doi, dict):
source_doi = source_doi['value']
target_dataset = _Dataset(
dataset_id=utils.create_uuid(),
pid=pid,
name=source_dataset['name'])
if source_doi is not None:
target_dataset.doi = source_doi.replace('http://dx.doi.org/', '')
if 'doi' in source_dataset and source_dataset['doi'] is not None:
source_doi = source_dataset['doi']
target_dataset.save()
if isinstance(source_doi, dict):
source_doi = source_doi['value']
return target_dataset.dataset_id
if source_doi is not None:
target_dataset.doi = source_doi.replace('http://dx.doi.org/', '')
target_dataset.save()
def transform_reference(reference):
return reference['value']
return target_dataset.dataset_id
def transform_reference(reference):
return reference['value']
def v0Dot6(upload_data):
""" Inplace transforms v0.6.x upload data into v0.7.x upload data. """
upload = json.loads(upload_data.upload)
upload['user_id'] = tarnsform_user_id(upload['user_id'])
upload_data.upload = json.dumps(upload)
......@@ -248,6 +247,8 @@ def mirror(
# In tests, we mirror from our selves, remove it so it is not there for import
proc.Calc.objects(upload_id=upload_id).delete()
proc.Upload.objects(upload_id=upload_id).delete()
_Dataset.objects().delete()
DOI.objects().delete()
search.delete_upload(upload_id)
else:
n_calcs = 0
......@@ -300,6 +301,12 @@ def mirror(
if not files_only:
# create mongo
upload = proc.Upload.from_json(upload_data.upload, created=True).save()
for dataset in upload_data.datasets.values():
fix_time(dataset, ['created'])
_Dataset._get_collection().insert(dataset)
for doi in upload_data.dois.values():
fix_time(doi, ['create_time'])
DOI._get_collection().insert(doi)
for calc in upload_data.calcs:
fix_time(calc, ['create_time', 'complete_time'])
fix_time(calc['metadata'], ['upload_time', 'last_processing'])
......
......@@ -1624,6 +1624,36 @@ class TestMirror:
data = json.loads(rv.data)
assert data[0]['upload_id'] == published.upload_id
def test_dataset(self, api, published_wo_user_metadata, admin_user_auth, test_user_auth):
rv = api.post(
'/repo/edit', headers=test_user_auth, content_type='application/json',
data=json.dumps({
'actions': {
'datasets': [{
'value': 'test_dataset'
}]
}
}))
assert rv.status_code == 200
rv = api.post('/datasets/test_dataset', headers=test_user_auth)
assert rv.status_code == 200
rv = api.post(
'/mirror/',
content_type='application/json', data='{"query":{}}', headers=admin_user_auth)
assert rv.status_code == 200, rv.data
url = '/mirror/%s' % published_wo_user_metadata.upload_id
rv = api.get(url, headers=admin_user_auth)
assert rv.status_code == 200
data = json.loads(rv.data)
assert len(data['datasets']) == 1
dataset = data['calcs'][0]['metadata']['datasets'][0]
assert dataset in data['datasets']
assert data['datasets'][dataset]['doi'] is not None
assert data['datasets'][dataset]['doi'] in data['dois']
class TestDataset:
......
......@@ -22,6 +22,8 @@ from nomad import utils, search, processing as proc, files
from nomad.cli import cli
from nomad.processing import Upload, Calc
from tests.app.test_app import BlueprintClient
# TODO there is much more to test
......@@ -322,6 +324,35 @@ class TestClient:
published.upload_files.exists
def test_mirror_datasets(self, client, published_wo_user_metadata, test_user_auth, admin_user_bravado_client, monkeypatch):
# use the API to create dataset and DOI
api = BlueprintClient(client, '/api')
rv = api.post(
'/repo/edit', headers=test_user_auth, content_type='application/json',
data=json.dumps({
'actions': {
'datasets': [{
'value': 'test_dataset'
}]
}
}))
assert rv.status_code == 200
rv = api.post('/datasets/test_dataset', headers=test_user_auth)
assert rv.status_code == 200
# perform the mirror
monkeypatch.setattr('nomad.cli.client.mirror.__in_test', True)
result = click.testing.CliRunner().invoke(
cli, ['client', 'mirror'], catch_exceptions=False, obj=utils.POPO())
assert result.exit_code == 0, result.output
assert published_wo_user_metadata.upload_id in result.output
assert published_wo_user_metadata.upload_files.os_path in result.output
published_wo_user_metadata.upload_files.exists
def test_statistics(self, client, proc_infra, admin_user_bravado_client):
result = click.testing.CliRunner().invoke(
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment