diff --git a/nomad/api/raw.py b/nomad/api/raw.py index 73385723c8ad642786a8acb93e2fa91fe03a5466..7ef42eee2d2e97d0e4145d504d0bbaf19fe25902 100644 --- a/nomad/api/raw.py +++ b/nomad/api/raw.py @@ -35,7 +35,7 @@ from .app import app, base_path @app.route('%s/raw/<string:upload_hash>/<path:upload_filepath>' % base_path, methods=['GET']) def get_raw_file(upload_hash, upload_filepath): """ - Get a single raw calculation file from a given upload. + Get a single raw calculation file from a given upload (or many files via wildcard). .. :quickref: raw; Get single raw calculation file. @@ -47,17 +47,28 @@ def get_raw_file(upload_hash, upload_filepath): Accept: application/gz :param string upload_hash: the hash based identifier of the upload - :param path upload_filepath: the path to the desired file within the upload + :param path upload_filepath: the path to the desired file within the upload; + can also contain a wildcard * at the end to denote all files with path as prefix :resheader Content-Type: application/gz :status 200: calc raw data successfully retrieved :status 404: upload with given hash does not exist or the given file does not exist - :returns: the gzipped raw data in the body + :returns: the gzipped raw data in the body or a zip file when wildcard was used """ repository_file = RepositoryFile(upload_hash) if not repository_file.exists(): abort(404, message='The upload with hash %s does not exist.' % upload_hash) + if upload_filepath[-1:] == '*': + upload_filepath = upload_filepath[0:-1] + files = list( + file for file in repository_file.manifest + if file.startswith(upload_filepath)) + if len(files) == 0: + abort(404, message='There are no files for %s.' % upload_filepath) + else: + return respond_to_get_raw_files(upload_hash, files) + try: the_file = repository_file.get_file(upload_filepath) with the_file.open() as f: diff --git a/nomad/api/upload.py b/nomad/api/upload.py index c66f441b88854582790e415ba1175d04ccc91210..ec387fc4623f80d88b13dab59eb296124ad49142 100644 --- a/nomad/api/upload.py +++ b/nomad/api/upload.py @@ -473,6 +473,7 @@ class UploadFileRes(Resource): else: # simple streaming data in HTTP body, e.g. with curl "url" -T local_file try: + uploadFile.create_dirs() with uploadFile.open('wb') as f: while not request.stream.is_exhausted: f.write(request.stream.read(1024)) diff --git a/nomad/files.py b/nomad/files.py index 8867adfec81d20f3bea050e5f38981823e179540..8831619132e8bb0449ab934f1a8644cdb3aee5c0 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -77,11 +77,13 @@ class File: return logger.bind(path=self.os_path) @contextmanager - def open(self, *args, **kwargs) -> Generator[IO, None, None]: + def open(self, mode: str = 'r', *args, **kwargs) -> Generator[IO, None, None]: """ Opens the object with he given mode, etc. """ self.logger.debug('open file') try: - with open(self.os_path, *args, **kwargs) as f: + if mode.startswith('w'): + self.create_dirs() + with open(self.os_path, mode, *args, **kwargs) as f: yield f except FileNotFoundError: raise KeyError() @@ -107,6 +109,11 @@ class File: def path(self) -> str: return self.os_path + def create_dirs(self) -> None: + directory = os.path.dirname(self.os_path) + if not os.path.isdir(directory): + os.makedirs(directory) + class ZippedFile(File): """ A file contained in a .zip archive. """ @@ -163,10 +170,6 @@ class Objects: path_segments = file_name.split('/') path = os.path.join(*([config.fs.objects, bucket] + path_segments)) - directory = os.path.dirname(path) - - if not os.path.isdir(directory): - os.makedirs(directory) return os.path.abspath(path) @@ -346,6 +349,9 @@ class UploadFile(ObjectFile): object_id = self.upload_hash() target = Objects._os_path(config.files.raw_bucket, object_id, 'zip') + directory = os.path.dirname(target) + if not os.path.isdir(directory): + os.makedirs(directory) return ZippedDataContainer.create(self._extract_dir, target=target) diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index a2dd113be86fc73e940f65732e1663305f7ffcfc..77815b905739f4a37ece4f5cea6f199d164c362c 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -48,8 +48,9 @@ def mocks_forall(mocksearch, mockmongo): def uploaded_id(request, clear_files) -> Generator[str, None, None]: example_file = request.param example_upload_id = os.path.basename(example_file).replace('.zip', '') - upload_file = UploadFile(example_upload_id).os_path - shutil.copyfile(example_file, upload_file) + upload_file = UploadFile(example_upload_id) + upload_file.create_dirs() + shutil.copyfile(example_file, upload_file.os_path) yield example_upload_id @@ -58,8 +59,9 @@ def uploaded_id(request, clear_files) -> Generator[str, None, None]: def uploaded_id_with_warning(request, clear_files) -> Generator[str, None, None]: example_file = 'tests/data/proc/examples_with_warning_template.zip' example_upload_id = os.path.basename(example_file).replace('.zip', '') - upload_file = UploadFile(example_upload_id).os_path - shutil.copyfile(example_file, upload_file) + upload_file = UploadFile(example_upload_id) + upload_file.create_dirs() + shutil.copyfile(example_file, upload_file.os_path) yield example_upload_id diff --git a/tests/test_api.py b/tests/test_api.py index da7a5e11667916f7a272d478d8c5a47bbc32419f..421f16a83423405220efc3785808c078d7761f5b 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -411,6 +411,30 @@ class TestRaw: url = '/raw/%s/does/not/exist' % example_upload_hash rv = client.get(url) assert rv.status_code == 404 + data = json.loads(rv.data) + assert 'files' not in data + + def test_raw_file_listing(self, client, example_upload_hash): + url = '/raw/%s/examples' % example_upload_hash + rv = client.get(url) + assert rv.status_code == 404 + data = json.loads(rv.data) + assert len(data['files']) == 5 + + def test_raw_file_wildcard(self, client, example_upload_hash): + url = '/raw/%s/examples*' % example_upload_hash + rv = client.get(url) + + assert rv.status_code == 200 + assert len(rv.data) > 0 + with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file: + assert zip_file.testzip() is None + assert len(zip_file.namelist()) == len(example_file_contents) + + def test_raw_file_wildcard_missing(self, client, example_upload_hash): + url = '/raw/%s/does/not/exist*' % example_upload_hash + rv = client.get(url) + assert rv.status_code == 404 def test_raw_file_missing_upload(self, client, example_upload_hash): url = '/raw/doesnotexist/%s' % example_file_mainfile diff --git a/tests/test_files.py b/tests/test_files.py index 693ed254d5588a49c530a008aff824d2e25aa4d1..8475dd5d3dd03319c6604e25d96aef8c9c742040 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -205,6 +205,7 @@ class TestUploadFile: @pytest.fixture() def upload(self, clear_files): upload = UploadFile('__test_upload_id') + upload.create_dirs() shutil.copyfile(example_file, upload.os_path) yield upload