From c99669a24284eebb81f1d711a5454e8d90222322 Mon Sep 17 00:00:00 2001 From: Markus Scheidgen <markus.scheidgen@gmail.com> Date: Wed, 18 Dec 2019 13:31:07 +0100 Subject: [PATCH] Fixed broken zipfile cache. --- nomad/app/api/archive.py | 37 +++++++++++--------- nomad/app/api/raw.py | 75 +++++++++++++++++++++------------------- nomad/files.py | 22 ++++++------ 3 files changed, 72 insertions(+), 62 deletions(-) diff --git a/nomad/app/api/archive.py b/nomad/app/api/archive.py index 57484680af..19713b7061 100644 --- a/nomad/app/api/archive.py +++ b/nomad/app/api/archive.py @@ -149,25 +149,27 @@ class ArchiveQueryResource(Resource): def generator(): manifest = {} + upload_files = None for entry in calcs: upload_id = entry['upload_id'] calc_id = entry['calc_id'] - upload_files = UploadFiles.get( - upload_id, create_authorization_predicate(upload_id)) - if upload_files is None: - utils.get_logger(__name__).error('upload files do not exist', upload_id=upload_id) - continue - - if hasattr(upload_files, 'zipfile_cache'): - zipfile_cache = upload_files.zipfile_cache() - else: - zipfile_cache = contextlib.suppress() - - with zipfile_cache: - yield ( - '%s.%s' % (calc_id, upload_files._archive_ext), calc_id, - lambda calc_id: upload_files.archive_file(calc_id, 'rb'), - lambda calc_id: upload_files.archive_file_size(calc_id)) + if upload_files is None or upload_files.upload_id != upload_id: + if upload_files is not None: + upload_files.close_zipfile_cache() + + upload_files = UploadFiles.get( + upload_id, create_authorization_predicate(upload_id)) + + if upload_files is None: + utils.get_logger(__name__).error('upload files do not exist', upload_id=upload_id) + continue + + upload_files.open_zipfile_cache() + + yield ( + '%s.%s' % (calc_id, upload_files._archive_ext), calc_id, + lambda calc_id: upload_files.archive_file(calc_id, 'rb'), + lambda calc_id: upload_files.archive_file_size(calc_id)) manifest[calc_id] = { key: entry[key] @@ -175,6 +177,9 @@ class ArchiveQueryResource(Resource): if entry.get(key) is not None } + if upload_files is not None: + upload_files.close_zipfile_cache() + try: manifest_contents = json.dumps(manifest).encode('utf-8') except Exception as e: diff --git a/nomad/app/api/raw.py b/nomad/app/api/raw.py index a0a761c4fe..3ae09689d2 100644 --- a/nomad/app/api/raw.py +++ b/nomad/app/api/raw.py @@ -430,40 +430,44 @@ class RawFileQueryResource(Resource): def generator(): manifest = {} + upload_files = None for entry in calcs: upload_id = entry['upload_id'] mainfile = entry['mainfile'] - upload_files = UploadFiles.get( - upload_id, create_authorization_predicate(upload_id)) - if upload_files is None: - utils.get_logger(__name__).error('upload files do not exist', upload_id=upload_id) - continue - - if hasattr(upload_files, 'zipfile_cache'): - zipfile_cache = upload_files.zipfile_cache() - else: - zipfile_cache = contextlib.suppress() - - with zipfile_cache: - filenames = upload_files.raw_file_manifest( - path_prefix=os.path.dirname(mainfile)) - for filename in filenames: - filename_w_upload = os.path.join(upload_files.upload_id, filename) - filename_wo_prefix = filename_w_upload[common_prefix_len:] - if len(patterns) == 0 or any( - fnmatch.fnmatchcase(os.path.basename(filename_wo_prefix), pattern) - for pattern in patterns): - - yield ( - filename_wo_prefix, filename, - lambda upload_filename: upload_files.raw_file(upload_filename, 'rb'), - lambda upload_filename: upload_files.raw_file_size(upload_filename)) - - manifest[path(entry)] = { - key: entry[key] - for key in RawFileQueryResource.manifest_quantities - if entry.get(key) is not None - } + if upload_files is None or upload_files.upload_id != upload_id: + if upload_files is not None: + upload_files.close_zipfile_cache() + + upload_files = UploadFiles.get( + upload_id, create_authorization_predicate(upload_id)) + + if upload_files is None: + utils.get_logger(__name__).error('upload files do not exist', upload_id=upload_id) + continue + + upload_files.open_zipfile_cache() + + filenames = upload_files.raw_file_manifest(path_prefix=os.path.dirname(mainfile)) + for filename in filenames: + filename_w_upload = os.path.join(upload_files.upload_id, filename) + filename_wo_prefix = filename_w_upload[common_prefix_len:] + if len(patterns) == 0 or any( + fnmatch.fnmatchcase(os.path.basename(filename_wo_prefix), pattern) + for pattern in patterns): + + yield ( + filename_wo_prefix, filename, + lambda upload_filename: upload_files.raw_file(upload_filename, 'rb'), + lambda upload_filename: upload_files.raw_file_size(upload_filename)) + + manifest[path(entry)] = { + key: entry[key] + for key in RawFileQueryResource.manifest_quantities + if entry.get(key) is not None + } + + if upload_files is not None: + upload_files.close_zipfile_cache() try: manifest_contents = json.dumps(manifest).encode('utf-8') @@ -490,17 +494,14 @@ def respond_to_get_raw_files(upload_id, files, compress=False, strip=False): # the zipfile cache allows to access many raw-files from public upload files without # having to re-open the underlying zip files all the time - if hasattr(upload_files, 'zipfile_cache'): - zipfile_cache = upload_files.zipfile_cache() - else: - zipfile_cache = contextlib.suppress() + upload_files.open_zipfile_cache() if strip: common_prefix_len = len(utils.common_prefix(files)) else: common_prefix_len = 0 - with zipfile_cache: + try: return streamed_zipfile( [( filename[common_prefix_len:], filename, @@ -508,3 +509,5 @@ def respond_to_get_raw_files(upload_id, files, compress=False, strip=False): lambda upload_filename: upload_files.raw_file_size(upload_filename) ) for filename in files], zipfile_name='%s.zip' % upload_id, compress=compress) + finally: + upload_files.close_zipfile_cache() diff --git a/nomad/files.py b/nomad/files.py index aa94329961..9c26a21901 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -58,7 +58,6 @@ import tarfile import hashlib import io import pickle -from contextlib import contextmanager from nomad import config, utils from nomad.datamodel import UploadWithMetadata @@ -313,6 +312,13 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta): """ raise NotImplementedError() + def open_zipfile_cache(self): + """ Allows to reuse the same zipfile for multiple file operations. Must be closed. """ + pass + + def close_zipfile_cache(self): + pass + class StagingUploadFiles(UploadFiles): def __init__( @@ -916,16 +922,12 @@ class PublicUploadFiles(UploadFiles): repacked_file.os_path, public_file.os_path) - @contextmanager - def zipfile_cache(self): - """ - Context that allows to read files while caching zipfiles without reopening them - all the time. - """ + def open_zipfile_cache(self): if self._zipfile_cache is None: self._zipfile_cache = {} - try: - yield - finally: + + def close_zipfile_cache(self): + if self._zipfile_cache is not None: for zip_file in self._zipfile_cache.values(): zip_file.close() + self._zipfile_cache = None -- GitLab