Commit 8f4dcc50 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Add cli command to quarantine certain raw files. #590

parent 060a622a
Pipeline #112453 passed with stages
in 27 minutes and 24 seconds
......@@ -111,7 +111,7 @@ RUN rm -f /encyclopedia/.babelrc
# Third, create a slim final image
FROM final
RUN apt-get update && apt-get install -y --no-install-recommends libgomp1 && apt-get install -y libmagic-dev curl vim
RUN apt-get update && apt-get install -y --no-install-recommends libgomp1 && apt-get install -y libmagic-dev curl vim zip unzip
# copy the sources for tests, coverage, qa, etc.
COPY . /app
......
......@@ -16,6 +16,7 @@
# limitations under the License.
#
from subprocess import PIPE
import typing
import click
import tabulate
......@@ -419,7 +420,7 @@ def prepare_migration(ctx, uploads, dry):
if not upload.published:
print(' upload is not published, nothing to do')
return
break
with_embargo_values: typing.List[bool] = []
for with_embargo_value in [True, False]:
......@@ -452,7 +453,6 @@ def prepare_migration(ctx, uploads, dry):
if not dry:
to_move.delete()
print(' removed empty zip', upload.upload_id, to_move.os_path)
return
elif with_embargo:
print(' !!! embargo upload with non empty public file !!!')
......@@ -465,6 +465,50 @@ def prepare_migration(ctx, uploads, dry):
print(' quarantined', upload.upload_id, to_move.os_path)
@uploads.command(help='Moves certain files from public or restricted to quarantine in published uploads.')
@click.argument('UPLOADS', nargs=-1)
@click.option(
'--file-pattern', type=str, multiple=True,
help=('The files as .zip patterns, e.g. "*/POTCAR". Default is all possible POTCAR patterns.'))
@click.option('--dry', is_flag=True, help='Just check, do nothing.')
@click.pass_context
def quarantine_raw_files(ctx, uploads, dry, file_pattern):
import os.path
import os
import subprocess
if len(file_pattern) == 0:
file_pattern = [
'*/POTCAR', '*/POTCAR.gz', '*/POTCAR.xz', 'POTCAR', 'POTCAR.gz', 'POTCAR.xz']
sh_script = os.path.abspath('ops/scripts/quarantine-raw-files.sh')
cwd = os.path.abspath(os.curdir)
_, uploads = query_uploads(ctx, uploads)
for upload in uploads:
print(f'Moving {" ".join(file_pattern)} to quarantine in {upload.upload_id} ...')
if not upload.published:
print(' upload is not published, nothing to do')
break
upload_files = files.PublicUploadFiles(
upload.upload_id, is_authorized=lambda *args, **kwargs: True, create=False)
try:
os.chdir(upload_files.os_path)
p = subprocess.Popen(
['sh', sh_script] + list(file_pattern), stdout=PIPE, stderr=PIPE)
_, err = p.communicate()
if p.returncode > 0:
print(f' !!! could not move files: script has error output {err} !!!')
except Exception as e:
print(f' !!! could not move files: {e} !!!')
finally:
os.chdir(cwd)
@uploads.command(help='Attempt to abort the processing of uploads.')
@click.argument('UPLOADS', nargs=-1)
@click.option('--calcs', is_flag=True, help='Only stop calculation processing.')
......
#!/bin/sh
dir="quarantined"
pwd=`pwd`
[ -e raw-restricted.plain.zip ] && unzip -d $dir -o raw-restricted.plain.zip $@ > /dev/null
[ -e raw-public.plain.zip ] && unzip -d $dir -o raw-public.plain.zip $@ > /dev/null
if [ -e $dir ]
then
cd $dir
zip -rq $pwd/raw-quarantined.plain.zip *
cd $pwd
pwd
fi
[ -e raw-restricted.plain.zip ] && zip -dq raw-restricted.plain.zip $@
[ -e raw-public.plain.zip ] && zip -dq raw-public.plain.zip $@
[ -e $dir ] && rm -rf $dir
echo "done"
......@@ -25,6 +25,7 @@ import datetime
import time
import zipfile
import os.path
import os
import re
from nomad import search, processing as proc, files, config
......@@ -346,6 +347,54 @@ class TestAdminUploads:
else:
assert calc.tasks_status == expected_state
@pytest.mark.parametrize('paths,entries', [
pytest.param(['0/POTCAR'], ['public'], id='single'),
pytest.param(['0/POTCAR.gz', '0/POTCAR.xz', '0/POTCAR'], ['public'], id='multiple'),
pytest.param(['0/POTCAR', 'POTCAR.xz'], ['public'], id='root'),
pytest.param(['0/POTCAR'], ['embargo'], id='embargo')
])
def test_quarantine_raw_files(self, paths, entries, test_user, proc_infra):
upload_path = os.path.join(config.fs.tmp, 'upload.zip')
nomad_json = {}
for i, entry in enumerate(entries):
if entry == 'embargo':
nomad_json.setdefault('entries', {})[f'{i}/archive.json'] = dict(with_embargo=True)
with zipfile.ZipFile(upload_path, 'w') as zf:
for path in paths:
with zf.open(path, 'w') as f:
f.write(b'content')
for i, _ in enumerate(entries):
zf.write('tests/data/parsers/archive.json', f'{i}/archive.json')
with zf.open('nomad.json', 'w') as f:
f.write(json.dumps(nomad_json, indent=2).encode())
# process upload
upload = run_processing(('test_upload_id', upload_path), test_user)
upload.publish_upload()
try:
upload.block_until_complete(interval=.01)
except Exception:
pass
public_upload_files = cast(files.PublicUploadFiles, upload.upload_files)
# run command
exec_results = click.testing.CliRunner().invoke(
cli, ['admin', 'uploads', 'quarantine-raw-files', '--', 'test_upload_id'],
catch_exceptions=False)
# assert results
assert re.search(r'Moving .* to quarantine', exec_results.output) is not None, exec_results.output
assert re.search(r'could not move files', exec_results.output) is None, exec_results.output
# assert files
quarantined_zip = public_upload_files._raw_file_object('quarantined')
with zipfile.ZipFile(quarantined_zip.os_path, 'r') as zf:
for path in paths:
assert path in zf.namelist()
@pytest.mark.parametrize('entries,with_empty,results', [
pytest.param(['embargo', 'embargo'], True, [r'removed empty zip.*public'], id='embargo'),
pytest.param(['public', 'public'], True, [r'removed empty zip.*restricted'], id='public'),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment