Commit 6704581c authored by David Sikter's avatar David Sikter
Browse files

Adding options upload-update and entry-update

parent 9a78848f
Pipeline #115582 passed with stages
in 29 minutes and 19 seconds
......@@ -346,6 +346,12 @@ def upgrade():
This can for example be used to subsequently retry just the uploads that failed
(as these ids can be loaded from file using --ids-from-file). You can specify both
--ids-from-file and --failed-ids-to-file at the same time with the same file name.''')
@click.option(
'--upload-update', type=str,
help='json with updates to apply to all converted uploads')
@click.option(
'--entry-update', type=str,
help='json with updates to apply to all converted entries')
@click.option(
'--fix-problems', is_flag=True,
help='''If a minor, fixable problem is encountered, fixes it automaticall; otherwise fail.''')
......@@ -354,7 +360,7 @@ def upgrade():
help='Dry run (not writing anything to the destination database).')
def migrate_mongo(
host, port, src_db_name, dst_db_name, query, ids_from_file, failed_ids_to_file,
fix_problems, dry):
upload_update, entry_update, fix_problems, dry):
import json
from pymongo.database import Database
from nomad import utils, infrastructure
......@@ -386,8 +392,13 @@ def migrate_mongo(
elif query:
query = json.loads(query)
if upload_update:
upload_update = json.loads(upload_update)
if entry_update:
entry_update = json.loads(entry_update)
logger.info('Quering uploads...')
uploads = db_src.upload.find(query)
migrate_mongo_uploads(
db_src, db_dst, uploads, failed_ids_to_file, fix_problems, dry, logger)
db_src, db_dst, uploads, failed_ids_to_file, upload_update, entry_update, fix_problems, dry, logger)
......@@ -71,6 +71,7 @@ def create_collections_if_needed(db_dst: Database):
def migrate_mongo_uploads(
db_src: Database, db_dst: Database, uploads: Cursor, failed_ids_to_file: bool,
upload_update: Dict[str, Any], entry_update: Dict[str, Any],
fix_problems: bool, dry: bool, logger):
''' Converts and/or migrates an upload and all related entries and datasets. '''
number_of_uploads = uploads.count()
......@@ -92,7 +93,8 @@ def migrate_mongo_uploads(
failed_and_skipped.append(upload_id)
else:
entry_dicts, dataset_dicts, doi_dicts = _convert_mongo_upload(
db_src, upload_dict, fix_problems, dataset_cache, stats, logger)
db_src, upload_dict, upload_update, entry_update, fix_problems,
dataset_cache, stats, logger)
if not dry:
_commit_upload(upload_dict, entry_dicts, dataset_dicts, doi_dicts, db_dst, stats)
del entry_dicts, dataset_dicts # To free up memory immediately
......@@ -138,7 +140,8 @@ def migrate_mongo_uploads(
def _convert_mongo_upload(
db_src: Database, upload_dict: Dict[str, Any], fix_problems: bool,
db_src: Database, upload_dict: Dict[str, Any],
upload_update: Dict[str, Any], entry_update: Dict[str, Any], fix_problems: bool,
dataset_cache: Dict[str, _DatasetCacheItem], stats: _UpgradeStatistics, logger):
'''
Converts (upgrades) an upload_dict and all related records. If successful,
......@@ -230,6 +233,9 @@ def _convert_mongo_upload(
for field in ('_id', 'upload_create_time', 'main_author', 'embargo_length', 'license'):
assert upload_dict.get(field) is not None, f'Missing required upload field: {field}'
if upload_update:
upload_dict.update(upload_update)
# migrate entries
newly_encountered_dataset_ids: Set[str] = set()
for entry_dict in entry_dicts:
......@@ -268,6 +274,9 @@ def _convert_mongo_upload(
converted_datasets.append(dataset_id)
entry_dict['datasets'] = converted_datasets
if entry_update:
entry_dict.update(entry_update)
# All conversion successful! Ready to migrate
dataset_dicts: List[Dict[str, Any]] = []
doi_dicts: List[Dict[str, Any]] = []
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment