diff --git a/nomad/config.py b/nomad/config.py index cf40de0e5d0887a9a16bdeb6d25d4ce612237679..f9871e0be88634a22d76a19d19cb9be973e51564 100644 --- a/nomad/config.py +++ b/nomad/config.py @@ -20,7 +20,8 @@ This module is used to store all configuration values. It makes use of import os from collections import namedtuple -FilesConfig = namedtuple('S3', ['uploads_bucket', 'repository_bucket', 'archive_bucket']) +FilesConfig = namedtuple( + 'FilesConfig', ['uploads_bucket', 'repository_bucket', 'archive_bucket', 'compress_archive']) """ API independent configuration for the object storage. """ CeleryConfig = namedtuple('Celery', [ @@ -39,7 +40,8 @@ LogstashConfig = namedtuple('LogstashConfig', ['enabled', 'host', 'tcp_port']) files = FilesConfig( uploads_bucket='uploads', repository_bucket='repository', - archive_bucket='archive' + archive_bucket='archive', + compress_archive=False ) celery = CeleryConfig( rabbit_host=os.environ.get('NOMAD_RABBITMQ_HOST', 'localhost'), diff --git a/nomad/files.py b/nomad/files.py index 7bcc826424b0e550840f5aa8c308ce4908b8435c..1a15734da8f1e2d1c47c1e90fe3acb486a404551 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -271,21 +271,28 @@ class Upload(): def write_archive_json(archive_id) -> Generator[TextIO, None, None]: """ Context manager that yiels a file-like to write the archive json. """ binary_out = io.BytesIO() - gzip_wrapper = cast(TextIO, gzip.open(binary_out, 'wt')) + if config.files.compress_archive: + gzip_wrapper = cast(TextIO, gzip.open(binary_out, 'wt')) + out = gzip_wrapper + metadata = {'Content-Encoding': 'gzip'} + else: + text_wrapper = io.TextIOWrapper(binary_out, encoding='utf-8') + out = text_wrapper + metadata = {} try: - yield gzip_wrapper + yield out finally: - gzip_wrapper.flush() + out.flush() binary_out.seek(0) length = len(binary_out.getvalue()) _client.put_object( config.files.archive_bucket, archive_id, binary_out, length=length, content_type='application/json', - metadata={'Content-Encoding': 'gzip'}) + metadata=metadata) - gzip_wrapper.close() + out.close() binary_out.close() diff --git a/tests/test_processing.py b/tests/test_processing.py index 6516ada7db2b3e57fdce1edd4fb88bd173a33b0a..745e10c67d05c77a37e3100e047376651944dba2 100644 --- a/tests/test_processing.py +++ b/tests/test_processing.py @@ -33,6 +33,7 @@ def uploaded_id(request) -> Generator[str, None, None]: yield example_upload_id try: + # remove the created uploads files._client.remove_object(config.files.uploads_bucket, example_upload_id) # remove all the created archive files