Commit f834c633 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'refactor' of gitlab.mpcdf.mpg.de:nomad-lab/nomad-FAIR into refactor

parents c1290ac1 ddcfd699
Pipeline #52030 passed with stages
in 19 minutes and 40 seconds
......@@ -74,8 +74,8 @@ def chown(ctx, user, uploads):
print('%d uploads selected, changing its owner ...' % uploads.count())
user_id = str(user)
user = coe_repo.User.from_user_id(user_id)
user_id = user
user = coe_repo.User.from_user_id(int(user_id))
for upload in uploads:
upload.user_id = user_id
......
......@@ -3,11 +3,14 @@ import json
from nomad import infrastructure
from nomad import processing
infrastructure.setup_logging()
calcs = infrastructure.setup_mongo().fairdi_nomad_migration.calc
uploads = infrastructure.setup_mongo().fairdi_nomad_migration.upload
packages = infrastructure.setup_mongo().coe_migration.package
def retrieve_remote_data():
infrastructure.setup_logging()
calcs = infrastructure.setup_mongo().fairdi_nomad_migration.calc
def retrieve_remote_data():
count = 0
pid_dict = {}
......@@ -52,6 +55,7 @@ def remove_upload(upload):
def calc_dups():
upload_dict = {}
for _, uploads in pid_dict.items():
uploads = list(set(uploads))
for upload in uploads:
dup, single = upload_dict.get(upload, (0, 0))
if len(uploads) >= 2:
......@@ -67,7 +71,7 @@ more = False
while True:
upload_dict = calc_dups()
for upload, (dup, single) in upload_dict.items():
if singe == 0:
if single == 0:
print('full: ' + upload)
remove_upload(upload)
more = True
......@@ -75,5 +79,10 @@ while True:
if not more:
for upload, (dup, single) in upload_dict.items():
print('partial: %s (%d vs %d)' % (upload, dup, single))
if dup > 0:
package_id = uploads.find_one({'_id': upload})['name']
pkg = packages.find_one({'_id': package_id})
source_upload_id = pkg['upload_id']
pkg_path = pkg['package_path']
print('%s, %s, %s, %s (%d vs %d)' % (source_upload_id, package_id, pkg_path, upload, dup, single))
break
......@@ -11,24 +11,17 @@ infrastructure.setup_elastic()
def check_and_fix(upload):
example = calcs.find_one({'upload_id': upload, 'metadata.pid': {'$exists': True}})
if example is None:
# can happen on multi package uploads
return
pid = example['metadata']['pid']
truth = index.find_one({'_id': pid})
if truth['metadata']['with_embargo'] != example['metadata']['with_embargo']:
print('need to fix %s' % upload)
calcs.update_many(
{'upload_id': upload, 'metadata.with_embargo': True},
{'$set': {'changed': True, 'metadata.with_embargo': False}})
calcs.update_many(
{'upload_id': upload, 'metadata.with_embargo': False, 'changed': True},
{'$unset': {'changed': 1}, '$set': {'metadata.with_embargo': True}})
u = uploads_col.find_one({'_id': upload})
print('need to fix from user %d, %s package id %s' % (example['metadata']['uploader']['id'], upload, u['name']))
upload_proc = processing.Upload.get_by_id(upload)
upload_with_metadata = upload_proc.to_upload_with_metadata(upload_proc.metadata)
calcs_with_metadata = upload_with_metadata.calcs
search.publish(calcs_with_metadata)
for upload in calcs.distinct('upload_id'):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment