From e1c3785b37be9d3d97d58ed8c85fe70af63303fe Mon Sep 17 00:00:00 2001 From: Theodore Chang <tlcfem@gmail.com> Date: Sat, 24 Aug 2024 18:49:44 +0200 Subject: [PATCH] Add size limit --- nomad/archive/converter.py | 15 +++++++++++++++ nomad/cli/admin/uploads.py | 13 ++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/nomad/archive/converter.py b/nomad/archive/converter.py index c4e7357d98..77e2cf944e 100644 --- a/nomad/archive/converter.py +++ b/nomad/archive/converter.py @@ -58,6 +58,7 @@ def convert_archive( delete_old: bool = False, counter: Counter = None, force_repack: bool = False, + size_limit: int = 4, ): """ Convert an archive of the old format to the new format. @@ -83,6 +84,7 @@ def convert_archive( delete_old (bool, optional): Whether to delete the old file after conversion. Defaults to False. counter (Counter, optional): A counter to track the progress of the conversion. Defaults to None. force_repack (bool, optional): Force repacking the archive that is already in the new format. Defaults to False. + size_limit (int, optional): The size limit in GB for the archive. Defaults to 4. """ prefix: str = counter.increment() if counter else '' @@ -111,6 +113,13 @@ def convert_archive( flush(f'{prefix} [ERROR] File already exists: {new_path}') return + original_size = os.path.getsize(original_path) + if original_size > size_limit * 1024**3: + flush( + f'{prefix} [WARNING] File size exceeds limit {size_limit} GB: {original_path}' + ) + return + def safe_remove(path: str): if not path: return @@ -165,6 +174,7 @@ def convert_folder( overwrite: bool = False, delete_old: bool = False, force_repack: bool = False, + size_limit: int = 4, ): """ Convert archives in the specified folder to the new format using parallel processing. @@ -181,6 +191,7 @@ def convert_folder( overwrite (bool): Whether to overwrite existing files (default is False). delete_old (bool): Whether to delete the old file after conversion (default is False). force_repack (bool): Force repacking the archive (default is False). + size_limit (int): Size limit in GB for the archive (default is 4). """ file_list: list = [] @@ -217,6 +228,7 @@ def convert_folder( delete_old=delete_old, counter=counter, force_repack=force_repack, + size_limit=size_limit, ) with ProcessPoolExecutor(max_workers=processes) as executor: @@ -242,6 +254,7 @@ def convert_upload( overwrite: bool = False, delete_old: bool = False, force_repack: bool = False, + size_limit: int = 4, ): """ Function to convert an upload with the given upload_id to the new format. @@ -258,6 +271,7 @@ def convert_upload( overwrite (bool, optional): Whether to overwrite existing files. Defaults to False. delete_old (bool, optional): Whether to delete the old file after conversion. Defaults to False. force_repack (bool, optional): Force repacking the existing archive (in new format). Defaults to False. + size_limit (int, optional): Size limit in GB for the archive. Defaults to 4. """ if isinstance(uploads, (str, Upload)): uploads = [uploads] @@ -289,6 +303,7 @@ def convert_upload( overwrite=overwrite, delete_old=delete_old, force_repack=force_repack, + size_limit=size_limit, ) diff --git a/nomad/cli/admin/uploads.py b/nomad/cli/admin/uploads.py index 0d041fd3f6..20d8bc930e 100644 --- a/nomad/cli/admin/uploads.py +++ b/nomad/cli/admin/uploads.py @@ -1429,11 +1429,13 @@ def only_v1(path: str): ) @click.option( '--migrate', + '-m', is_flag=True, help='Only convert v1 archive files to v1.2 archive files.', ) @click.option( '--force-repack', + '-f', is_flag=True, help='Force repacking existing archives that are already in the new format', ) @@ -1444,9 +1446,16 @@ def only_v1(path: str): default=os.cpu_count(), help='Number of processes to use for conversion. Default is os.cpu_count().', ) +@click.option( + '--size-limit', + '-s', + type=int, + default=4, + help='Limit archive size in GB. Default is 4GB.', +) @click.pass_context def convert_archive( - ctx, uploads, overwrite, delete_old, migrate, force_repack, parallel + ctx, uploads, overwrite, delete_old, migrate, force_repack, parallel, size_limit ): _, selected = _query_uploads(uploads, **ctx.obj.uploads_kwargs) @@ -1461,6 +1470,7 @@ def convert_archive( if_include=only_v1, processes=parallel, force_repack=force_repack, + size_limit=size_limit, ) else: convert_upload( @@ -1469,4 +1479,5 @@ def convert_archive( delete_old=delete_old, processes=parallel, force_repack=force_repack, + size_limit=size_limit, ) -- GitLab