Skip to content
Snippets Groups Projects
Commit 190c31f8 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added command to clean publish raw .zip files. #590

parent d72f5f9c
No related branches found
No related tags found
3 merge requests!530Draft: Resolve "Workflow search",!443Release v0.10.8,!402Added command to clean publish raw .zip files. #590
Pipeline #111997 passed
......@@ -400,6 +400,71 @@ def re_pack(ctx, uploads, parallel: int):
wait_for_tasks=False)
@uploads.command(help='Prepares files for being used in the upcoming NOMAD v1.0.')
@click.argument('UPLOADS', nargs=-1)
@click.option('--dry', is_flag=True, help='Just check, do nothing.')
@click.pass_context
def prepare_migration(ctx, uploads, dry):
'''
Removes one of the raw files, either public or restricted depending on the embargo.
Files that need to be removed are saved as `quarantined` in the upload folder.
Only works on published uploads.
'''
import os.path
import os
_, uploads = query_uploads(ctx, uploads)
for upload in uploads:
print(f'Preparing {upload.upload_id} for migration ...')
if not upload.published:
print(' upload is not published, nothing to do')
return
with_embargo_values: typing.List[bool] = []
for with_embargo_value in [True, False]:
search_request = search.SearchRequest().search_parameters(
upload_id=upload.upload_id, with_embargo=with_embargo_value)
if search_request.execute()['total'] > 0:
with_embargo_values.append(with_embargo_value)
if len(with_embargo_values) > 1:
print(' !!! inconsistent upload !!!')
break
with_embargo = with_embargo_values[0]
upload_files = files.PublicUploadFiles(
upload.upload_id, is_authorized=lambda *args, **kwargs: True, create=False)
obsolute_access = 'public' if with_embargo else 'restricted'
access = 'restricted' if with_embargo else 'public'
to_move = upload_files._raw_file_object(obsolute_access)
to_stay = upload_files._raw_file_object(access)
if not to_move.exists():
print(' obsolute raw.zip was already removed', upload.upload_id, to_move.os_path)
elif to_stay.size < to_move.size:
print(' !!! likely inconsistent pack !!!')
elif to_move.size == 22:
if not dry:
to_move.delete()
print(' removed empty zip', upload.upload_id, to_move.os_path)
return
elif with_embargo:
print(' !!! embargo upload with non empty public file !!!')
else:
if not dry:
target = upload_files._raw_file_object('quarantined')
assert not target.exists()
os.rename(to_move.os_path, target.os_path)
print(' quarantined', upload.upload_id, to_move.os_path)
@uploads.command(help='Attempt to abort the processing of uploads.')
@click.argument('UPLOADS', nargs=-1)
@click.option('--calcs', is_flag=True, help='Only stop calculation processing.')
......
......@@ -17,13 +17,17 @@
# limitations under the License.
#
from typing import cast
import pytest
import click.testing
import json
import datetime
import time
import zipfile
import os.path
import re
from nomad import search, processing as proc, files
from nomad import search, processing as proc, files, config
from nomad.cli import cli
from nomad.cli.cli import POPO
from nomad.processing import Upload, Calc
......@@ -33,6 +37,7 @@ from tests.app.flask.test_app import BlueprintClient
from tests.app.flask.conftest import ( # pylint: disable=unused-import
test_user_bravado_client, client, session_client, admin_user_bravado_client) # pylint: disable=unused-import
from tests.app.conftest import test_user_auth, admin_user_auth # pylint: disable=unused-import
from tests.processing.test_data import run_processing
# TODO there is much more to test
......@@ -341,6 +346,84 @@ class TestAdminUploads:
else:
assert calc.tasks_status == expected_state
@pytest.mark.parametrize('entries,with_empty,results', [
pytest.param(['embargo', 'embargo'], True, [r'removed empty zip.*public'], id='embargo'),
pytest.param(['public', 'public'], True, [r'removed empty zip.*restricted'], id='public'),
pytest.param(['embargo'], False, [r'non empty public file'], id='non-empty-public'),
pytest.param(['embargo', 'embargo'], None, [r'was already removed.*public'], id='embargo-wo-empty'),
pytest.param(['public', 'public'], None, [r'was already removed.*restricted'], id='public-wo-empty'),
pytest.param(['public', 'embargo'], None, [r'inconsistent upload'], id='mixed'),
pytest.param(['public', 'public', 'other'], True, [r'quarantined'], id='mixed-other'),
pytest.param(['public', 'public', 'other-large'], True, [r'inconsistent pack'], id='mixed-other-large')
])
def test_prepare_migration(self, entries, results, with_empty, proc_infra, test_user):
# create upload
upload_path = os.path.join(config.fs.tmp, 'upload.zip')
nomad_json = dict(entries={})
entries_json = nomad_json['entries']
with zipfile.ZipFile(upload_path, 'w') as zf:
for i, entry in enumerate(entries):
if entry in ['other', 'other-large']:
continue
mainfile = f'{i}/archive.json'
zf.write('tests/data/parsers/archive.json', mainfile)
entries_json[mainfile] = {
'with_embargo': entry == 'embargo'
}
with zf.open('nomad.json', 'w') as f:
f.write(json.dumps(nomad_json).encode())
# process upload
upload = run_processing(('test_upload_id', upload_path), test_user)
upload.publish_upload()
try:
upload.block_until_complete(interval=.01)
except Exception:
pass
public_upload_files = cast(files.PublicUploadFiles, upload.upload_files)
# Add other files
if any(entry in ['other', 'other-large'] for entry in entries):
restricted_zip = public_upload_files._raw_file_object('restricted')
with zipfile.ZipFile(restricted_zip.os_path, 'w') as zf:
for i, entry in enumerate(entries):
if entry not in ['other', 'other-large']:
continue
with zf.open(f'{i}/not_nomad.txt', 'w') as f:
if entry == 'other-large':
f.write(b'I am not a nomad mainfile.' * 256)
else:
f.write(b'I am not a nomad mainfile.')
# create empty restricted or publish ... procesing isn't doing that anymore, but
# did in the past
if with_empty is not None:
for access in ['public', 'restricted']:
zip_path = public_upload_files._raw_file_object(access)
if zip_path.exists():
continue
with zipfile.ZipFile(zip_path.os_path, 'w') as zf:
if not with_empty:
with zf.open('file.txt', 'w') as f:
f.write(b'Content')
pass
# run command
exec_results = click.testing.CliRunner().invoke(
cli, ['admin', 'uploads', 'prepare-migration', '--', 'test_upload_id'],
catch_exceptions=False)
# assert results
for result in results:
assert re.search(result, exec_results.output) is not None, exec_results.output
# assert files
has_quarantined = re.search(r'quarantined', exec_results.output) is not None
assert public_upload_files._raw_file_object('quarantined').exists() == has_quarantined
@pytest.mark.usefixtures('reset_config')
class TestClient:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment