diff --git a/gui/src/components/archive/ArchiveBrowser.js b/gui/src/components/archive/ArchiveBrowser.js index dc5b1c718950786005b66e798a83c48ef28c0b5e..57f7f04fe1f78cc4d516ea77dd89bf35c6239fd7 100644 --- a/gui/src/components/archive/ArchiveBrowser.js +++ b/gui/src/components/archive/ArchiveBrowser.js @@ -281,7 +281,7 @@ const ArchiveConfigForm = React.memo(function ArchiveConfigForm({searchOptions, </Tooltip> {entryId && <Download tooltip="download the archive" - url={`entries/${entryId}/archive/download?ignore_mime_type=true`} + url={`entries/${entryId}/archive/download`} component={IconButton} > <DownloadIcon/> diff --git a/nomad/app/v1/routers/entries.py b/nomad/app/v1/routers/entries.py index 53ebf6d4f2f685c41cfe8b6b19cb59863163978d..23f0f108b9facf718b7628774a1e79baf23dd60d 100644 --- a/nomad/app/v1/routers/entries.py +++ b/nomad/app/v1/routers/entries.py @@ -29,7 +29,7 @@ from fastapi import ( Query as QueryParameter, Body, ) -from fastapi.responses import StreamingResponse +from fastapi.responses import StreamingResponse, ORJSONResponse from fastapi.exceptions import RequestValidationError from pydantic import BaseModel, Field, validator import os.path @@ -46,7 +46,7 @@ from nomad.config import config from nomad.config.models.config import Reprocess from nomad.datamodel import EditableUserMetadata from nomad.datamodel.context import ServerContext -from nomad.files import StreamedFile, create_zipstream +from nomad.files import StreamedFile, create_zipstream_async from nomad.processing.data import Upload from nomad.utils import strip from nomad.archive import RequiredReader, RequiredValidationError, ArchiveQueryError @@ -726,16 +726,14 @@ def _answer_entries_raw_request(owner: Owner, query: Query, files: Files, user: entry_metadata=entry_metadata, ) - # create the streaming response with zip file contents - content = create_download_stream_zipped( - download_items=download_items_generator(), - re_pattern=files_params.re_pattern, - recursive=False, - create_manifest_file=True, - compress=files_params.compress, - ) return StreamingResponse( - content, + create_download_stream_zipped( + download_items=download_items_generator(), + re_pattern=files_params.re_pattern, + recursive=False, + create_manifest_file=True, + compress=files_params.compress, + ), headers=browser_download_headers( filename='raw_files.zip', media_type='application/zip' ), @@ -1104,10 +1102,8 @@ def _answer_entries_archive_download_request( ) with _Uploads() as uploads: - # create the streaming response with zip file contents - content = create_zipstream(streamed_files(), compress=files_params.compress) return StreamingResponse( - content, + create_zipstream_async(streamed_files(), compress=files_params.compress), headers=browser_download_headers( filename='archives.zip', media_type='application/zip' ), @@ -1361,14 +1357,14 @@ async def get_entry_raw_file( if offset == 0 and length < 0: mime_type = upload_files.raw_file_mime_type(path) - raw_file_content = create_download_stream_raw_file( - upload_files, path, offset, length, decompress + return StreamingResponse( + create_download_stream_raw_file(upload_files, path, offset, length, decompress), + media_type=mime_type, ) - return StreamingResponse(raw_file_content, media_type=mime_type) def answer_entry_archive_request( - query: Dict[str, Any], required: ArchiveRequired, user: User, entry_metadata=None + query: dict, required: ArchiveRequired, user: User, entry_metadata=None ): required_reader = _validate_required(required, user) @@ -1392,26 +1388,24 @@ def answer_entry_archive_request( with _Uploads() as uploads: try: - archive_data = _read_archive(entry_metadata, uploads, required_reader)[ - 'archive' - ] + return { + 'entry_id': entry_id, + 'required': required, + 'data': { + 'entry_id': entry_id, + 'upload_id': entry_metadata['upload_id'], + 'parser_name': entry_metadata['parser_name'], + 'archive': _read_archive(entry_metadata, uploads, required_reader)[ + 'archive' + ], + }, + } except KeyError: raise HTTPException( status.HTTP_404_NOT_FOUND, detail='The entry does exist, but it has no archive.', ) - return { - 'entry_id': entry_id, - 'required': required, - 'data': { - 'entry_id': entry_id, - 'upload_id': entry_metadata['upload_id'], - 'parser_name': entry_metadata['parser_name'], - 'archive': archive_data, - }, - } - @router.post( '/{entry_id}/edit', @@ -1563,8 +1557,8 @@ async def get_entry_archive( """ Returns the full archive for the given `entry_id`. """ - return answer_entry_archive_request( - dict(entry_id=entry_id), required='*', user=user + return ORJSONResponse( + answer_entry_archive_request(dict(entry_id=entry_id), required='*', user=user) ) @@ -1579,14 +1573,6 @@ async def get_entry_archive_download( ..., description='The unique entry id of the entry to retrieve archive data from.', ), - ignore_mime_type: bool = QueryParameter( - False, - description=strip( - """ - Sets the mime type specified in the response headers to `application/octet-stream` - instead of the actual mime type (i.e. `application/json`).""" - ), - ), user: User = Depends(create_user_dependency(signature_token_auth_allowed=True)), ): """ @@ -1595,16 +1581,7 @@ async def get_entry_archive_download( response = answer_entry_archive_request( dict(entry_id=entry_id), required='*', user=user ) - archive = response['data']['archive'] - return StreamingResponse( - io.BytesIO(json.dumps(archive, indent=2).encode()), - headers=browser_download_headers( - filename=f'{entry_id}.json', - media_type='application/octet-stream' - if ignore_mime_type - else 'application/json', - ), - ) + return ORJSONResponse(response['data']['archive']) @router.post( @@ -1628,8 +1605,10 @@ async def post_entry_archive_query( Returns a partial archive for the given `entry_id` based on the `required` specified in the body. """ - return answer_entry_archive_request( - dict(entry_id=entry_id), required=data.required, user=user + return ORJSONResponse( + answer_entry_archive_request( + dict(entry_id=entry_id), required=data.required, user=user + ) ) diff --git a/nomad/app/v1/routers/uploads.py b/nomad/app/v1/routers/uploads.py index de1deabdbf690694f884d3b9ec49662947c26edd..f1368d14b56caf82ccd213e0eb4bd5ce4f63d5f8 100644 --- a/nomad/app/v1/routers/uploads.py +++ b/nomad/app/v1/routers/uploads.py @@ -1170,11 +1170,14 @@ async def get_upload_raw_path( # File if files_params.compress: media_type = 'application/zip' - download_item = DownloadItem( - upload_id=upload_id, raw_path=path, zip_path=os.path.basename(path) - ) content = create_download_stream_zipped( - download_item, upload_files, compress=True + DownloadItem( + upload_id=upload_id, + raw_path=path, + zip_path=os.path.basename(path), + ), + upload_files, + compress=True, ) else: if offset < 0: @@ -1220,19 +1223,15 @@ async def get_upload_raw_path( ), ) # Stream directory content, compressed. - download_item = DownloadItem( - upload_id=upload_id, raw_path=path, zip_path='' - ) - content = create_download_stream_zipped( - download_item, - upload_files, - re_pattern=files_params.re_pattern, - recursive=True, - create_manifest_file=False, - compress=True, - ) return StreamingResponse( - content, + create_download_stream_zipped( + DownloadItem(upload_id=upload_id, raw_path=path, zip_path=''), + upload_files, + re_pattern=files_params.re_pattern, + recursive=True, + create_manifest_file=False, + compress=True, + ), headers=browser_download_headers( ( upload.upload_id diff --git a/nomad/app/v1/utils.py b/nomad/app/v1/utils.py index 9dcfa771ef95316510ecc07711b20e7924bdf366..ee71017f556452040c4da333152c399f07b32697 100644 --- a/nomad/app/v1/utils.py +++ b/nomad/app/v1/utils.py @@ -94,14 +94,14 @@ class DownloadItem(BaseModel): entry_metadata: Optional[Dict[str, Any]] -def create_download_stream_zipped( +async def create_download_stream_zipped( download_items: Union[DownloadItem, Iterator[DownloadItem]], upload_files: UploadFiles = None, re_pattern: Any = None, recursive: bool = False, create_manifest_file: bool = False, compress: bool = True, -) -> Iterator[bytes]: +): """ Creates a zip-file stream for downloading raw data with ``StreamingResponse``. @@ -184,16 +184,17 @@ def create_download_stream_zipped( if upload_files: upload_files.close() - return create_zipstream(streamed_files(upload_files), compress=compress) + for x in create_zipstream(streamed_files(upload_files), compress=compress): + yield x -def create_download_stream_raw_file( +async def create_download_stream_raw_file( upload_files: UploadFiles, path: str, offset: int = 0, length: int = -1, decompress=False, -) -> Iterator[bytes]: +): """ Creates a file stream for downloading raw data with ``StreamingResponse``. @@ -222,20 +223,10 @@ def create_download_stream_raw_file( if length > 0: # Read up to a certain number of bytes - remaining = length - while remaining: - content = raw_file.read(remaining) - content_length = len(content) - remaining -= content_length - if content_length == 0: - break # No more bytes - yield content + yield raw_file.read(length) else: # Read until the end of the file. - while True: - content = raw_file.read(1024 * 64) - if not content: - break # No more bytes + while content := raw_file.read(1024 * 1024): yield content raw_file.close() diff --git a/nomad/files.py b/nomad/files.py index 7a4a4677b143864f323297bd426c921ae9686cb8..8993e428a66d0c804b12ae1eefb4f592a9552f1a 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -588,34 +588,39 @@ def create_zipstream_content(streamed_files: Iterable[StreamedFile]) -> Iterable the form which is required by the `zipstream` library, i.e. dictionaries with keys `arcname`, `iterable` and `buffer_size`. Useful for generating zipstreams. """ - for streamed_file in streamed_files: - def content_generator(): - with streamed_file.f as f: - while True: - data = f.read(1024 * 64) - if not data: - break - yield data + def content_generator(file): + with file.f as f: + while data := f.read(1024 * 1024): + yield data + for streamed_file in streamed_files: yield dict( arcname=streamed_file.path, - iterable=content_generator(), + iterable=content_generator(streamed_file), buffer_size=streamed_file.size, ) -def create_zipstream( - streamed_files: Iterable[StreamedFile], compress: bool = False -) -> Iterator[bytes]: +def create_zipstream(streamed_files: Iterable[StreamedFile], compress: bool = False): """ Creates a zip stream, i.e. a streamed zip file. """ - compression = zipfile.ZIP_DEFLATED if compress else zipfile.ZIP_STORED - zip_stream = zipstream.ZipFile(mode='w', compression=compression, allowZip64=True) + zip_stream = zipstream.ZipFile( + mode='w', + compression=zipfile.ZIP_DEFLATED if compress else zipfile.ZIP_STORED, + allowZip64=True, + ) zip_stream.paths_to_write = create_zipstream_content(streamed_files) - return iter(zip_stream) + yield from zip_stream + + +async def create_zipstream_async( + streamed_files: Iterable[StreamedFile], compress: bool = False +): + for x in create_zipstream(streamed_files, compress): + yield x def _versioned_archive_file_object( @@ -662,6 +667,12 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta): self.upload_id = upload_id + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + @classmethod def file_area(cls): """ diff --git a/tests/app/v1/routers/test_entries.py b/tests/app/v1/routers/test_entries.py index c17be92099b18a141e192f5c45f5ca9a578be3e1..d0adb39e13cee9f0cabbe8b0298faca9a49df47a 100644 --- a/tests/app/v1/routers/test_entries.py +++ b/tests/app/v1/routers/test_entries.py @@ -1081,33 +1081,25 @@ def test_entry_archive(auth_headers, client, example_data, user, entry_id, statu @pytest.mark.parametrize( - 'user, entry_id, ignore_mime_type, status_code', + 'user, entry_id, status_code', [ - pytest.param(None, 'id_01', False, 200, id='id'), - pytest.param(None, 'id_01', True, 200, id='id'), - pytest.param('user1', 'id_child_entries_child1', False, 200, id='child-entry'), - pytest.param('user1', 'id_child_entries_child1', True, 200, id='child-entry'), - pytest.param(None, 'id_02', True, 404, id='404-not-visible'), - pytest.param(None, 'doesnotexist', False, 404, id='404-does-not-exist'), + pytest.param(None, 'id_01', 200, id='id'), + pytest.param(None, 'id_01', 200, id='id'), + pytest.param('user1', 'id_child_entries_child1', 200, id='child-entry'), + pytest.param('user1', 'id_child_entries_child1', 200, id='child-entry'), + pytest.param(None, 'id_02', 404, id='404-not-visible'), + pytest.param(None, 'doesnotexist', 404, id='404-does-not-exist'), ], ) def test_entry_archive_download( - auth_headers, client, example_data, user, entry_id, ignore_mime_type, status_code + auth_headers, client, example_data, user, entry_id, status_code ): response = client.get( - f'entries/{entry_id}/archive/download' - + ('?ignore_mime_type=true' if ignore_mime_type else ''), + f'entries/{entry_id}/archive/download', headers=auth_headers[user], ) assert_response(response, status_code) if status_code == 200: - assert_browser_download_headers( - response, - media_type='application/octet-stream' - if ignore_mime_type - else 'application/json', - filename=entry_id + '.json', - ) archive = response.json() assert 'metadata' in archive assert 'run' in archive