Commit c99669a2 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Fixed broken zipfile cache.

parent d509f37e
Pipeline #65724 failed with stages
in 15 minutes and 58 seconds
......@@ -149,21 +149,23 @@ class ArchiveQueryResource(Resource):
def generator():
manifest = {}
upload_files = None
for entry in calcs:
upload_id = entry['upload_id']
calc_id = entry['calc_id']
if upload_files is None or upload_files.upload_id != upload_id:
if upload_files is not None:
upload_files.close_zipfile_cache()
upload_files = UploadFiles.get(
upload_id, create_authorization_predicate(upload_id))
if upload_files is None:
utils.get_logger(__name__).error('upload files do not exist', upload_id=upload_id)
continue
if hasattr(upload_files, 'zipfile_cache'):
zipfile_cache = upload_files.zipfile_cache()
else:
zipfile_cache = contextlib.suppress()
upload_files.open_zipfile_cache()
with zipfile_cache:
yield (
'%s.%s' % (calc_id, upload_files._archive_ext), calc_id,
lambda calc_id: upload_files.archive_file(calc_id, 'rb'),
......@@ -175,6 +177,9 @@ class ArchiveQueryResource(Resource):
if entry.get(key) is not None
}
if upload_files is not None:
upload_files.close_zipfile_cache()
try:
manifest_contents = json.dumps(manifest).encode('utf-8')
except Exception as e:
......
......@@ -430,23 +430,24 @@ class RawFileQueryResource(Resource):
def generator():
manifest = {}
upload_files = None
for entry in calcs:
upload_id = entry['upload_id']
mainfile = entry['mainfile']
if upload_files is None or upload_files.upload_id != upload_id:
if upload_files is not None:
upload_files.close_zipfile_cache()
upload_files = UploadFiles.get(
upload_id, create_authorization_predicate(upload_id))
if upload_files is None:
utils.get_logger(__name__).error('upload files do not exist', upload_id=upload_id)
continue
if hasattr(upload_files, 'zipfile_cache'):
zipfile_cache = upload_files.zipfile_cache()
else:
zipfile_cache = contextlib.suppress()
upload_files.open_zipfile_cache()
with zipfile_cache:
filenames = upload_files.raw_file_manifest(
path_prefix=os.path.dirname(mainfile))
filenames = upload_files.raw_file_manifest(path_prefix=os.path.dirname(mainfile))
for filename in filenames:
filename_w_upload = os.path.join(upload_files.upload_id, filename)
filename_wo_prefix = filename_w_upload[common_prefix_len:]
......@@ -465,6 +466,9 @@ class RawFileQueryResource(Resource):
if entry.get(key) is not None
}
if upload_files is not None:
upload_files.close_zipfile_cache()
try:
manifest_contents = json.dumps(manifest).encode('utf-8')
except Exception as e:
......@@ -490,17 +494,14 @@ def respond_to_get_raw_files(upload_id, files, compress=False, strip=False):
# the zipfile cache allows to access many raw-files from public upload files without
# having to re-open the underlying zip files all the time
if hasattr(upload_files, 'zipfile_cache'):
zipfile_cache = upload_files.zipfile_cache()
else:
zipfile_cache = contextlib.suppress()
upload_files.open_zipfile_cache()
if strip:
common_prefix_len = len(utils.common_prefix(files))
else:
common_prefix_len = 0
with zipfile_cache:
try:
return streamed_zipfile(
[(
filename[common_prefix_len:], filename,
......@@ -508,3 +509,5 @@ def respond_to_get_raw_files(upload_id, files, compress=False, strip=False):
lambda upload_filename: upload_files.raw_file_size(upload_filename)
) for filename in files],
zipfile_name='%s.zip' % upload_id, compress=compress)
finally:
upload_files.close_zipfile_cache()
......@@ -58,7 +58,6 @@ import tarfile
import hashlib
import io
import pickle
from contextlib import contextmanager
from nomad import config, utils
from nomad.datamodel import UploadWithMetadata
......@@ -313,6 +312,13 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta):
"""
raise NotImplementedError()
def open_zipfile_cache(self):
""" Allows to reuse the same zipfile for multiple file operations. Must be closed. """
pass
def close_zipfile_cache(self):
pass
class StagingUploadFiles(UploadFiles):
def __init__(
......@@ -916,16 +922,12 @@ class PublicUploadFiles(UploadFiles):
repacked_file.os_path,
public_file.os_path)
@contextmanager
def zipfile_cache(self):
"""
Context that allows to read files while caching zipfiles without reopening them
all the time.
"""
def open_zipfile_cache(self):
if self._zipfile_cache is None:
self._zipfile_cache = {}
try:
yield
finally:
def close_zipfile_cache(self):
if self._zipfile_cache is not None:
for zip_file in self._zipfile_cache.values():
zip_file.close()
self._zipfile_cache = None
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment