From 9dbb3bda92ac37828c4be0d40dd9be5d6035ad94 Mon Sep 17 00:00:00 2001 From: Markus Scheidgen <markus.scheidgen@gmail.com> Date: Thu, 16 Aug 2018 10:46:19 +0200 Subject: [PATCH] Optional compression on archive. --- nomad/config.py | 6 ++++-- nomad/files.py | 17 ++++++++++++----- tests/test_processing.py | 1 + 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/nomad/config.py b/nomad/config.py index cf40de0e5d..f9871e0be8 100644 --- a/nomad/config.py +++ b/nomad/config.py @@ -20,7 +20,8 @@ This module is used to store all configuration values. It makes use of import os from collections import namedtuple -FilesConfig = namedtuple('S3', ['uploads_bucket', 'repository_bucket', 'archive_bucket']) +FilesConfig = namedtuple( + 'FilesConfig', ['uploads_bucket', 'repository_bucket', 'archive_bucket', 'compress_archive']) """ API independent configuration for the object storage. """ CeleryConfig = namedtuple('Celery', [ @@ -39,7 +40,8 @@ LogstashConfig = namedtuple('LogstashConfig', ['enabled', 'host', 'tcp_port']) files = FilesConfig( uploads_bucket='uploads', repository_bucket='repository', - archive_bucket='archive' + archive_bucket='archive', + compress_archive=False ) celery = CeleryConfig( rabbit_host=os.environ.get('NOMAD_RABBITMQ_HOST', 'localhost'), diff --git a/nomad/files.py b/nomad/files.py index 7bcc826424..1a15734da8 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -271,21 +271,28 @@ class Upload(): def write_archive_json(archive_id) -> Generator[TextIO, None, None]: """ Context manager that yiels a file-like to write the archive json. """ binary_out = io.BytesIO() - gzip_wrapper = cast(TextIO, gzip.open(binary_out, 'wt')) + if config.files.compress_archive: + gzip_wrapper = cast(TextIO, gzip.open(binary_out, 'wt')) + out = gzip_wrapper + metadata = {'Content-Encoding': 'gzip'} + else: + text_wrapper = io.TextIOWrapper(binary_out, encoding='utf-8') + out = text_wrapper + metadata = {} try: - yield gzip_wrapper + yield out finally: - gzip_wrapper.flush() + out.flush() binary_out.seek(0) length = len(binary_out.getvalue()) _client.put_object( config.files.archive_bucket, archive_id, binary_out, length=length, content_type='application/json', - metadata={'Content-Encoding': 'gzip'}) + metadata=metadata) - gzip_wrapper.close() + out.close() binary_out.close() diff --git a/tests/test_processing.py b/tests/test_processing.py index 6516ada7db..745e10c67d 100644 --- a/tests/test_processing.py +++ b/tests/test_processing.py @@ -33,6 +33,7 @@ def uploaded_id(request) -> Generator[str, None, None]: yield example_upload_id try: + # remove the created uploads files._client.remove_object(config.files.uploads_bucket, example_upload_id) # remove all the created archive files -- GitLab