Commit 0ee08ff0 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'reprocess-match' into 'v0.10.4'

Reprocess match

See merge request !334
parents e7ffd694 45f8fb9f
Pipeline #107205 canceled with stages
in 2 minutes and 35 seconds
......@@ -192,6 +192,49 @@ def ls(ctx, uploads, calculations, ids, json):
headers=headers))
@uploads.command(help=(
'Allows to edit metadata attribute of all entries in uploads. Be aware that this '
'only edits the attributes. E.g. if you set publish true, it won\'t publish the '
'upload, pack its files, change the upload metadata, etc.'))
@click.option(
'--publish', type=click.Choice(['with-embargo', 'no-embargo']),
help='Set the publish attribute true and change with_embargo attribute.')
@click.option(
'--unpublish', is_flag=True, help='Set the publish attribute to false.')
@click.argument('UPLOADS', nargs=-1)
@click.pass_context
def edit(ctx, uploads, publish: str, unpublish: bool):
_, uploads = query_uploads(ctx, uploads)
if publish and unpublish:
print('You can only publish or unpublish, not both.')
return
if publish:
update = {
'metadata.published': True,
'metadata.with_embargo': publish == 'with-embargo'}
elif unpublish:
update = {
'metadata.published': False,
'metadata.with_embargo': False}
else:
print('You have not give any attributes to edit.')
return
print('%d uploads selected, editing ...' % uploads.count())
for upload in uploads:
proc.Calc._get_collection().update_many(
{'upload_id': upload.upload_id},
{'$set': update})
with upload.entries_metadata() as calcs:
search.index_all(calcs, do_refresh=False)
search.refresh()
@uploads.command(help='Change the owner of the upload and all its calcs.')
@click.argument('USERNAME', nargs=1)
@click.argument('UPLOADS', nargs=-1)
......
......@@ -1061,6 +1061,10 @@ class Upload(Proc):
try:
if config.reprocess_match:
with utils.timer(logger, 'calcs match on reprocess'):
with_embargo_values = Calc._get_collection().distinct(
'metadata.with_embargo', dict(upload_id=self.upload_id))
with_embargo = with_embargo_values != [False]
for filename, parser in self.match_mainfiles():
calc_id = staging_upload_files.calc_id(filename)
try:
......@@ -1070,8 +1074,13 @@ class Upload(Proc):
calc_id=calc_id,
mainfile=filename, parser=parser.name,
worker_hostname=self.worker_hostname,
create_time=self.upload_time,
upload_id=self.upload_id)
calc.metadata.update(
published=self.published,
with_embargo=with_embargo)
calc.save()
with utils.timer(logger, 'calcs resetted'):
......@@ -1221,7 +1230,7 @@ class Upload(Proc):
stripped_f.write('Stripped POTCAR file. Checksum of original file (sha224): %s\n' % checksum)
os.system(
'''
awk < %s >> %s '
awk < '%s' >> '%s' '
BEGIN { dump=1 }
/End of Dataset/ { dump=1 }
dump==1 { print }
......
......@@ -1080,7 +1080,8 @@ def search(
if query is None:
query = {}
es_query = _api_to_es_query(query)
es_query &= _owner_es_query(owner=owner, user_id=user_id)
if owner is not None:
es_query &= _owner_es_query(owner=owner, user_id=user_id)
# pagination
if pagination is None:
......
......@@ -461,6 +461,7 @@ def test_re_process_match(non_empty_processed, published, monkeypatch, no_warn):
upload: Upload = non_empty_processed
if published:
upload.embargo_length = 0
upload.publish_upload()
try:
upload.block_until_complete(interval=.01)
......@@ -471,7 +472,14 @@ def test_re_process_match(non_empty_processed, published, monkeypatch, no_warn):
assert upload.total_calcs == 1, upload.total_calcs
monkeypatch.setattr('nomad.config.reprocess_match', True)
if not published:
if published:
import zipfile
upload_files = UploadFiles.get(upload.upload_id)
zip_path = upload_files._raw_file_object(access='public').os_path
with zipfile.ZipFile(zip_path, mode='a') as zf:
zf.write('tests/data/parsers/vasp/vasp.xml', 'vasp.xml')
else:
upload_files = UploadFiles.get(upload.upload_id).to_staging_upload_files()
upload_files.add_rawfiles('tests/data/parsers/vasp/vasp.xml')
......@@ -481,10 +489,10 @@ def test_re_process_match(non_empty_processed, published, monkeypatch, no_warn):
except Exception:
pass
if published:
assert upload.total_calcs == 1
else:
assert upload.total_calcs == 2
assert upload.total_calcs == 2
for calc in upload.calcs:
assert calc.metadata['published'] == published
assert not calc.metadata['with_embargo']
@pytest.mark.timeout(config.tests.default_timeout)
......
......@@ -270,6 +270,39 @@ class TestAdminUploads:
assert upload.user_id == test_user.user_id
assert calc.metadata['uploader'] == test_user.user_id
def test_edit(self, published):
upload_id = published.upload_id
def assert_calcs(publish, with_embargo):
calcs = Calc.objects(upload_id=upload_id)
for calc in calcs:
assert calc.metadata['published'] == publish
assert calc.metadata['with_embargo'] == with_embargo
for calc in search.search(owner=None, query=dict(upload_id=upload_id)).data:
assert calc['published'] == publish
assert calc['with_embargo'] == with_embargo
assert_calcs(True, True)
def perform_test(publish, with_embargo):
if publish:
params = ['--publish', 'with-embargo' if with_embargo else 'no-embargo']
else:
assert not with_embargo
params = ['--unpublish']
result = click.testing.CliRunner().invoke(
cli, ['admin', 'uploads', 'edit'] + params, catch_exceptions=False)
assert result.exit_code == 0
assert 'editing' in result.stdout
assert_calcs(publish, with_embargo)
perform_test(False, False)
perform_test(True, False)
perform_test(True, True)
def test_reset(self, non_empty_processed):
upload_id = non_empty_processed.upload_id
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment