diff --git a/nomad/api.py b/nomad/api.py index fe25cd9d44794b159c34aa6f15caf564085f312d..ff40fed963260c17b39b348e0c14a3f41ec662bd 100644 --- a/nomad/api.py +++ b/nomad/api.py @@ -1,3 +1,4 @@ +from typing import Tuple from flask import Flask, request, redirect from flask_restful import Resource, Api, abort from datetime import datetime @@ -39,11 +40,7 @@ def _external_objects_url(url): '%s%s%s' % (config.services.objects_host, port_with_colon, config.services.objects_base_path)) -def _update_and_render(upload: users.Upload): - """ - If the given upload as a processing state attached, it will attempt to update this - state and store the results, before the upload is rendered for the client. - """ +def _updated_proc(upload: users.Upload) -> Tuple[UploadProc, bool]: is_stale = False if upload.proc: @@ -58,6 +55,10 @@ def _update_and_render(upload: users.Upload): else: proc = None + return proc, is_stale + + +def _render(upload: users.Upload, proc: UploadProc, is_stale: bool) -> dict: data = { 'name': upload.name, 'upload_id': upload.upload_id, @@ -73,6 +74,15 @@ def _update_and_render(upload: users.Upload): return {key: value for key, value in data.items() if value is not None} +def _update_and_render(upload: users.Upload) -> dict: + """ + If the given upload as a processing state attached, it will attempt to update this + state and store the results, before the upload is rendered for the client. + """ + proc, is_stale = _updated_proc(upload) + return _render(upload, proc, is_stale) + + class Uploads(Resource): def get(self): @@ -106,6 +116,40 @@ class Upload(Resource): return _update_and_render(upload), 200 + def delete(self, upload_id): + try: + upload = users.Upload.objects(id=upload_id).first() + except mongoengine.errors.ValidationError: + print('###') + abort(400, message='%s is not a valid upload id.' % upload_id) + + if upload is None: + abort(404, message='Upload with id %s does not exist.' % upload_id) + + proc, is_stale = _updated_proc(upload) + if not (proc.ready() or is_stale or proc.current_task_name == 'uploading'): + abort(400, message='%s has not finished processing.' % upload_id) + + logger = get_logger(__name__, upload_id=upload_id) + with logger.lnr_error('Delete upload file'): + try: + files.Upload(upload.upload_id).delete() + except KeyError: + logger.error('Upload exist, but file does not exist.') + + if proc.upload_hash is not None: + with logger.lnr_error('Deleting archives.'): + files.delete_archives(proc.upload_hash) + + with logger.lnr_error('Deleting indexed calcs.'): + for obj in search.Calc.search_objs(upload_hash=proc.upload_hash): + obj.delete() + + with logger.lnr_error('Deleting user upload.'): + upload.delete() + + return _render(upload, proc, is_stale), 200 + class RepoCalc(Resource): @staticmethod diff --git a/nomad/files.py b/nomad/files.py index 496b838dc41064424c066a022fc02e76e3310b99..b99cf07e38502e5872e1ccd65754a85da0de0b5e 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -268,6 +268,13 @@ class Upload(): """ Returns the tmp directory relative version of a filename. """ return os.path.join(self.upload_extract_dir, filename) + def delete(self): + """ Delete the file from the store. Must not be open. """ + try: + _client.remove_object(config.files.uploads_bucket, self.upload_id) + except minio.error.NoSuchKey: + raise KeyError(self.upload_id) + @contextmanager def write_archive_json(archive_id) -> Generator[TextIO, None, None]: @@ -317,3 +324,13 @@ def open_archive_json(archive_id) -> IO: return _client.get_object(config.files.archive_bucket, archive_id) except minio.error.NoSuchKey: raise KeyError() + + +def delete_archives(upload_hash: str): + """ Deletes all archive files for this upload_hash. """ + bucket = config.files.archive_bucket + prefix = '%s/' % upload_hash + to_remove = [obj.object_name for obj in _client.list_objects(bucket, prefix)] + for _ in _client.remove_objects(bucket, to_remove): + # TODO handle potential errors + pass diff --git a/nomad/search.py b/nomad/search.py index e2f4c0a56a7a4c3d3fee733d48ebc47b2592f26b..b84f0f7c96ca8e0227c9b5ddc2cb2254ccb40394 100644 --- a/nomad/search.py +++ b/nomad/search.py @@ -71,6 +71,12 @@ class Calc(Document): def search(body): return client.search(index=config.elastic.calc_index, body=body) + @staticmethod + def search_objs(**kwargs): + return Search(using=client, index=config.elastic.calc_index) \ + .query('match', **kwargs) \ + .execute() + @staticmethod def upload_exists(upload_hash): """ Returns true if there are already calcs from the given upload. """ diff --git a/tests/test_api.py b/tests/test_api.py index 3e619a341744a500d751abebce5b02e62cbdea7d..9ae9cefb9cc6d9a3bf71536474306f183b1db302 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -119,6 +119,19 @@ def test_create_upload_with_name(client): assert upload['name'] == 'test_name' +def test_delete_empty_upload(client): + rv = client.post('/uploads') + + assert rv.status_code == 200 + upload_id = assert_upload(rv.data)['upload_id'] + + rv = client.delete('/uploads/%s' % upload_id) + assert rv.status_code == 200 + + rv = client.get('/uploads/%s' % upload_id) + assert rv.status_code == 404 + + @pytest.mark.parametrize("file", example_files) @pytest.mark.timeout(30) def test_upload_to_upload(client, file): diff --git a/tests/test_files.py b/tests/test_files.py index 862b6eef9b41421845cb63004b8444a48c787d0d..f7921f7a3f5e9617887c3bbfd8721abb0437ae6d 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -116,6 +116,18 @@ def test_upload(uploaded_id: str): break +def test_delete_upload(uploaded_id: str): + files.Upload(uploaded_id).delete() + + try: + files.Upload(uploaded_id) + assert False + except KeyError: + pass + else: + assert False + + @pytest.mark.timeout(10) def test_upload_notification(upload_id): @files.upload_put_handler @@ -162,3 +174,14 @@ def test_archive(archive_id: str): assert 'test' in result assert result['test'] == 'value' + + +def test_delete_archives(archive_id: str): + files.delete_archives(archive_id.split('/')[0]) + try: + files.archive_url(archive_id) + assert False + except KeyError: + pass + else: + assert False