diff --git a/nomad/client/migration.py b/nomad/client/migration.py index 8a75b796475ea994e963a351f35c08b58fe6023d..b4349cec2691016895cf24d41521e8011123905e 100644 --- a/nomad/client/migration.py +++ b/nomad/client/migration.py @@ -25,7 +25,8 @@ from nomad.migration import NomadCOEMigration from .main import cli -_migration: NomadCOEMigration = None +def _Migration(**kwargs) -> NomadCOEMigration: + return NomadCOEMigration() def _setup(): @@ -38,7 +39,8 @@ def _setup(): @click.option('-u', '--user', default=config.migration_source_db.user, help='The migration repository source db user, default is %s.' % config.migration_source_db.user) @click.option('-w', '--password', default=config.migration_source_db.password, help='The migration repository source db password.') @click.option('-db', '--dbname', default=config.migration_source_db.dbname, help='The migration repository source db name, default is %s.' % config.migration_source_db.dbname) -def migration(host, port, user, password, dbname): +@click.option('--migration-version', default=0, type=int, help='The version number, only packages with lower or no number will be migrated.') +def migration(host, port, user, password, dbname, migration_version): global _setup def _setup(): @@ -47,8 +49,10 @@ def migration(host, port, user, password, dbname): readony=True, host=host, port=port, user=user, password=password, dbname=dbname) infrastructure.setup_mongo() - global _migration - _migration = NomadCOEMigration() + global _Migration + + def _Migration(**kwargs): + return NomadCOEMigration(migration_version=migration_version, **kwargs) @migration.command(help='Create/update the coe repository db migration index') @@ -60,7 +64,7 @@ def index(drop, with_metadata, per_query): start = time.time() indexed_total = 0 indexed_calcs = 0 - for calc, total in _migration.index(drop=drop, with_metadata=with_metadata, per_query=int(per_query)): + for calc, total in _Migration().index(drop=drop, with_metadata=with_metadata, per_query=int(per_query)): indexed_total += 1 indexed_calcs += 1 if calc is not None else 0 eta = total * ((time.time() - start) / indexed_total) @@ -76,22 +80,29 @@ def package(upload_paths): infrastructure.setup_logging() infrastructure.setup_mongo() - migration = NomadCOEMigration() - migration.package(*upload_paths) + _Migration().package(*upload_paths) + + +@migration.command(help='Get an report over all migrated packages.') +def report(): + infrastructure.setup_logging() + infrastructure.setup_mongo() + + report = _Migration().report() + print(report) @migration.command(help='Copy users from source into empty target db') def copy_users(**kwargs): _setup() - _migration.copy_users() + _Migration().copy_users() @migration.command(help='Set the repo db PID calc counter.') @click.argument('prefix', nargs=1, type=int, default=7000000) def pid_prefix(prefix: int): infrastructure.setup_logging() - migration = NomadCOEMigration() - migration.set_pid_prefix(prefix=prefix) + _Migration().set_pid_prefix(prefix=prefix) @migration.command(help='Upload the given upload locations. Uses the existing index to provide user metadata') @@ -101,11 +112,10 @@ def pid_prefix(prefix: int): @click.option('--local', help='Create local upload files.', is_flag=True) @click.option('--delete-local', help='Delete created local upload files after upload.', is_flag=True) @click.option('--parallel', default=1, type=int, help='Use the given amount of parallel processes. Default is 1.') -@click.option('--migration-version', default=0, type=int, help='The version number, only packages with lower or no number will be migrated.') @click.option('--delete-failed', default='', type=str, help='String from N, U, P to determine if empty (N), failed (U), or failed to publish (P) uploads should be deleted or kept for debugging.') def upload( paths: list, pattern: str, create_packages: bool, local: bool, delete_local: bool, - parallel: int, migration_version: int, delete_failed: str): + parallel: int, delete_failed: str): infrastructure.setup_logging() infrastructure.setup_mongo() @@ -119,7 +129,6 @@ def upload( if re.fullmatch(compiled_pattern, sub_directory): paths.append(os.path.join(path, sub_directory)) - migration = NomadCOEMigration(migration_version=migration_version, threads=parallel) - migration.migrate( + _Migration(threads=parallel).migrate( *paths, local=local, delete_local=delete_local, create_packages=create_packages, delete_failed=delete_failed) diff --git a/nomad/migration.py b/nomad/migration.py index 4b18b21d005ee628901209e0ad514625b31c2c17..fe32a52535f505fc2f85745743dacf4e0ad9cab3 100644 --- a/nomad/migration.py +++ b/nomad/migration.py @@ -115,6 +115,23 @@ class Package(Document): meta = dict(indexes=['upload_id', 'migration_version']) + @classmethod + def aggregate_reports(cls, migration_version: str = None) -> 'Report': + """ + Returns an aggregated report over all package reports of the given migration version, + or all packages. + """ + if migration_version is not None: + query = cls.objects(migration_version__gte=migration_version, report__exists=True) + else: + query = cls.objects(report__exists=True) + + report = Report() + for package in query: + report.add(Report(package.report)) + + return report + def open_package_upload_file(self) -> IO: """ Creates a streaming zip file from the files of this package. """ zip_file = zipstream.ZipFile(compression=zipstream.ZIP_STORED, allowZip64=True) @@ -388,6 +405,10 @@ class NomadCOEMigration: return self._client + def report(self): + """ returns an aggregated report over all prior migrated packages """ + return Package.aggregate_reports(migration_version=self.migration_version) + def copy_users(self): """ Copy all users. """ for source_user in self.source.query(User).all(): @@ -593,13 +614,7 @@ class NomadCOEMigration: def print_report(): if not self._quiet: - print( - 'packages: {:,}, skipped: {:,}, source calcs: {:,}, migrated: {:,}, ' - 'failed: {:,}, missing: {:,}, new: {:,}'.format( - overall_report.total_packages, overall_report.skipped_packages, - overall_report.total_source_calcs, overall_report.migrated_calcs, - overall_report.failed_calcs, overall_report.missing_calcs, - overall_report.new_calcs)) + print(overall_report) def migrate_package(package: Package, of_packages: int): logger = self.logger.bind( @@ -983,3 +998,12 @@ class Report(utils.POPO): def add(self, other: 'Report') -> None: for key, value in other.items(): self[key] = self.get(key, 0) + value + + def __str__(self): + return ( + 'packages: {:,}, skipped: {:,}, source calcs: {:,}, migrated: {:,}, ' + 'failed: {:,}, missing: {:,}, new: {:,}'.format( + self.total_packages, self.skipped_packages, + self.total_source_calcs, self.migrated_calcs, + self.failed_calcs, self.missing_calcs, + self.new_calcs))