Commit 5a69c6ef authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added support for datasets to oasis uploads.

parent ccf8fcd4
Pipeline #89308 passed with stages
in 24 minutes and 31 seconds
......@@ -558,6 +558,8 @@ class Calc(Proc):
metadata_part = self.upload.metadata_file_cached(
os.path.join(metadata_dir, metadata_file))
for key, val in metadata_part.items():
if key in ['entries', 'oasis_datasets']:
continue
metadata.setdefault(key, val)
if metadata_dir == self.upload_files.os_path:
......@@ -912,6 +914,7 @@ class Upload(Proc):
# compile oasis metadata for the upload
upload_metadata = dict(upload_time=str(self.upload_time))
upload_metadata_entries = {}
upload_metadata_datasets = {}
for calc in self.calcs:
entry_metadata = dict(**{
key: str(value) if isinstance(value, datetime) else value
......@@ -921,7 +924,17 @@ class Upload(Proc):
if entry_metadata.get('with_embargo'):
continue
upload_metadata_entries[calc.mainfile] = entry_metadata
if 'datasets' in entry_metadata:
for dataset_id in entry_metadata['datasets']:
if dataset_id in upload_metadata_datasets:
continue
dataset = datamodel.Dataset.m_def.a_mongo.get(dataset_id=dataset_id)
upload_metadata_datasets[dataset_id] = dataset.m_to_dict()
upload_metadata['entries'] = upload_metadata_entries
upload_metadata['oasis_datasets'] = {
dataset['name']: dataset for dataset in upload_metadata_datasets.values()}
oasis_upload_id, upload_metadata = _normalize_oasis_upload_metadata(
self.upload_id, upload_metadata)
......@@ -1043,6 +1056,39 @@ class Upload(Proc):
def process_upload(self):
''' A *process* that performs the initial upload processing. '''
self.extracting()
if self.from_oasis:
# we might need to add datasets from the oasis before processing and
# adding the entries
oasis_metadata_file = os.path.join(self.upload_files.os_path, 'raw', config.metadata_file_name + '.json')
with open(oasis_metadata_file, 'rt') as f:
oasis_metadata = json.load(f)
oasis_datasets = oasis_metadata.get('oasis_datasets', {})
metadata_was_changed = False
for oasis_dataset in oasis_datasets.values():
try:
existing_dataset = datamodel.Dataset.m_def.a_mongo.get(
user_id=self.user_id, name=oasis_dataset['name'])
except KeyError:
datamodel.Dataset(**oasis_dataset).a_mongo.save()
else:
oasis_dataset_id = oasis_dataset['dataset_id']
if existing_dataset.dataset_id != oasis_dataset_id:
# A dataset for the same user with the same name was created
# in both deployments. We consider this to be the "same" dataset.
# These datasets have different ids and we need to migrate the provided
# dataset ids:
for entry in oasis_metadata['entries'].values():
entry_datasets = entry.get('datasets', [])
for index, dataset_id in enumerate(entry_datasets):
if dataset_id == oasis_dataset_id:
entry_datasets[index] = existing_dataset.dataset_id
metadata_was_changed = True
if metadata_was_changed:
with open(oasis_metadata_file, 'wt') as f:
json.dump(oasis_metadata, f)
self.parse_all()
@task
......
......@@ -587,7 +587,7 @@ def non_empty_uploaded(non_empty_example_upload: str, raw_files) -> Tuple[str, s
@pytest.fixture(scope='function')
def oasis_example_upload(non_empty_example_upload: str, raw_files) -> str:
def oasis_example_upload(non_empty_example_upload: str, test_user, raw_files) -> str:
processing.Upload.metadata_file_cached.cache_clear()
uploaded_path = non_empty_example_upload
......@@ -601,7 +601,20 @@ def oasis_example_upload(non_empty_example_upload: str, raw_files) -> str:
'published': True,
'entries': {
'examples_template/template.json': {
'calc_id': 'test_calc_id'
'calc_id': 'test_calc_id',
'datasets': ['oasis_dataset_1', 'oasis_dataset_2']
}
},
'oasis_datasets': {
'dataset_1_name': {
'dataset_id': 'oasis_dataset_1',
'user_id': test_user.user_id,
'name': 'dataset_1_name'
},
'dataset_2_name': {
'dataset_id': 'oasis_dataset_2',
'user_id': test_user.user_id,
'name': 'dataset_2_name'
}
}
}
......
......@@ -23,7 +23,7 @@ import os.path
import re
import shutil
from nomad import utils, infrastructure, config
from nomad import utils, infrastructure, config, datamodel
from nomad.archive import read_partial_archive_from_mongo
from nomad.files import UploadFiles, StagingUploadFiles, PublicUploadFiles
from nomad.processing import Upload, Calc
......@@ -208,6 +208,12 @@ def test_publish_failed(
def test_oasis_upload_processing(proc_infra, oasis_example_uploaded: Tuple[str, str], test_user, no_warn):
uploaded_id, uploaded_path = oasis_example_uploaded
# create a dataset to force dataset joining of one of the datasets in the example
# upload
datamodel.Dataset(
dataset_id='cn_dataset_2', name='dataset_2_name',
user_id=test_user.user_id).a_mongo.save()
upload = Upload.create(
upload_id=uploaded_id, user=test_user, upload_path=uploaded_path)
upload.from_oasis = True
......@@ -227,6 +233,7 @@ def test_oasis_upload_processing(proc_infra, oasis_example_uploaded: Tuple[str,
calc = Calc.objects(upload_id='oasis_upload_id').first()
assert calc.calc_id == 'test_calc_id'
assert calc.metadata['published']
assert calc.metadata['datasets'] == ['oasis_dataset_1', 'cn_dataset_2']
@pytest.mark.timeout(config.tests.default_timeout)
......@@ -238,9 +245,17 @@ def test_publish_from_oasis(
upload.publish_upload()
upload.block_until_complete(interval=.01)
# create a dataset to also test this aspect of oasis uploads
calc = Calc.objects(upload_id=upload.upload_id).first()
datamodel.Dataset(
dataset_id='dataset_id', name='dataset_name',
user_id=other_test_user.user_id).a_mongo.save()
calc.metadata['datasets'] = ['dataset_id']
calc.save()
cn_upload_id = 'cn_' + upload.upload_id
# We need to alter the ids, because we this by uploading to the same NOMAD
# We need to alter the ids, because we do this test by uploading to the same NOMAD
def normalize_oasis_upload_metadata(upload_id, metadata):
for entry in metadata['entries'].values():
entry['calc_id'] = utils.create_uuid()
......@@ -271,6 +286,10 @@ def test_publish_from_oasis(
assert cn_upload.from_oasis
assert cn_upload.oasis_deployment_id == config.meta.deployment_id
assert upload.published_to[0] == config.oasis.central_nomad_deployment_id
cn_calc = Calc.objects(upload_id=cn_upload_id).first()
assert cn_calc.calc_id != calc.calc_id
assert cn_calc.metadata['datasets'] == ['dataset_id']
assert datamodel.Dataset.m_def.a_mongo.objects().count() == 1
@pytest.mark.timeout(config.tests.default_timeout)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment