Commit fe385656 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added report script. [skip ci]

parent e50ed7d4
Pipeline #45224 skipped
......@@ -25,7 +25,8 @@ from nomad.migration import NomadCOEMigration
from .main import cli
_migration: NomadCOEMigration = None
def _Migration(**kwargs) -> NomadCOEMigration:
return NomadCOEMigration()
def _setup():
......@@ -38,7 +39,8 @@ def _setup():
@click.option('-u', '--user', default=config.migration_source_db.user, help='The migration repository source db user, default is %s.' % config.migration_source_db.user)
@click.option('-w', '--password', default=config.migration_source_db.password, help='The migration repository source db password.')
@click.option('-db', '--dbname', default=config.migration_source_db.dbname, help='The migration repository source db name, default is %s.' % config.migration_source_db.dbname)
def migration(host, port, user, password, dbname):
@click.option('--migration-version', default=0, type=int, help='The version number, only packages with lower or no number will be migrated.')
def migration(host, port, user, password, dbname, migration_version):
global _setup
def _setup():
......@@ -47,8 +49,10 @@ def migration(host, port, user, password, dbname):
readony=True, host=host, port=port, user=user, password=password, dbname=dbname)
infrastructure.setup_mongo()
global _migration
_migration = NomadCOEMigration()
global _Migration
def _Migration(**kwargs):
return NomadCOEMigration(migration_version=migration_version, **kwargs)
@migration.command(help='Create/update the coe repository db migration index')
......@@ -60,7 +64,7 @@ def index(drop, with_metadata, per_query):
start = time.time()
indexed_total = 0
indexed_calcs = 0
for calc, total in _migration.index(drop=drop, with_metadata=with_metadata, per_query=int(per_query)):
for calc, total in _Migration().index(drop=drop, with_metadata=with_metadata, per_query=int(per_query)):
indexed_total += 1
indexed_calcs += 1 if calc is not None else 0
eta = total * ((time.time() - start) / indexed_total)
......@@ -76,22 +80,29 @@ def package(upload_paths):
infrastructure.setup_logging()
infrastructure.setup_mongo()
migration = NomadCOEMigration()
migration.package(*upload_paths)
_Migration().package(*upload_paths)
@migration.command(help='Get an report over all migrated packages.')
def report():
infrastructure.setup_logging()
infrastructure.setup_mongo()
report = _Migration().report()
print(report)
@migration.command(help='Copy users from source into empty target db')
def copy_users(**kwargs):
_setup()
_migration.copy_users()
_Migration().copy_users()
@migration.command(help='Set the repo db PID calc counter.')
@click.argument('prefix', nargs=1, type=int, default=7000000)
def pid_prefix(prefix: int):
infrastructure.setup_logging()
migration = NomadCOEMigration()
migration.set_pid_prefix(prefix=prefix)
_Migration().set_pid_prefix(prefix=prefix)
@migration.command(help='Upload the given upload locations. Uses the existing index to provide user metadata')
......@@ -101,11 +112,10 @@ def pid_prefix(prefix: int):
@click.option('--local', help='Create local upload files.', is_flag=True)
@click.option('--delete-local', help='Delete created local upload files after upload.', is_flag=True)
@click.option('--parallel', default=1, type=int, help='Use the given amount of parallel processes. Default is 1.')
@click.option('--migration-version', default=0, type=int, help='The version number, only packages with lower or no number will be migrated.')
@click.option('--delete-failed', default='', type=str, help='String from N, U, P to determine if empty (N), failed (U), or failed to publish (P) uploads should be deleted or kept for debugging.')
def upload(
paths: list, pattern: str, create_packages: bool, local: bool, delete_local: bool,
parallel: int, migration_version: int, delete_failed: str):
parallel: int, delete_failed: str):
infrastructure.setup_logging()
infrastructure.setup_mongo()
......@@ -119,7 +129,6 @@ def upload(
if re.fullmatch(compiled_pattern, sub_directory):
paths.append(os.path.join(path, sub_directory))
migration = NomadCOEMigration(migration_version=migration_version, threads=parallel)
migration.migrate(
_Migration(threads=parallel).migrate(
*paths, local=local, delete_local=delete_local, create_packages=create_packages,
delete_failed=delete_failed)
......@@ -115,6 +115,23 @@ class Package(Document):
meta = dict(indexes=['upload_id', 'migration_version'])
@classmethod
def aggregate_reports(cls, migration_version: str = None) -> 'Report':
"""
Returns an aggregated report over all package reports of the given migration version,
or all packages.
"""
if migration_version is not None:
query = cls.objects(migration_version__gte=migration_version, report__exists=True)
else:
query = cls.objects(report__exists=True)
report = Report()
for package in query:
report.add(Report(package.report))
return report
def open_package_upload_file(self) -> IO:
""" Creates a streaming zip file from the files of this package. """
zip_file = zipstream.ZipFile(compression=zipstream.ZIP_STORED, allowZip64=True)
......@@ -388,6 +405,10 @@ class NomadCOEMigration:
return self._client
def report(self):
""" returns an aggregated report over all prior migrated packages """
return Package.aggregate_reports(migration_version=self.migration_version)
def copy_users(self):
""" Copy all users. """
for source_user in self.source.query(User).all():
......@@ -593,13 +614,7 @@ class NomadCOEMigration:
def print_report():
if not self._quiet:
print(
'packages: {:,}, skipped: {:,}, source calcs: {:,}, migrated: {:,}, '
'failed: {:,}, missing: {:,}, new: {:,}'.format(
overall_report.total_packages, overall_report.skipped_packages,
overall_report.total_source_calcs, overall_report.migrated_calcs,
overall_report.failed_calcs, overall_report.missing_calcs,
overall_report.new_calcs))
print(overall_report)
def migrate_package(package: Package, of_packages: int):
logger = self.logger.bind(
......@@ -983,3 +998,12 @@ class Report(utils.POPO):
def add(self, other: 'Report') -> None:
for key, value in other.items():
self[key] = self.get(key, 0) + value
def __str__(self):
return (
'packages: {:,}, skipped: {:,}, source calcs: {:,}, migrated: {:,}, '
'failed: {:,}, missing: {:,}, new: {:,}'.format(
self.total_packages, self.skipped_packages,
self.total_source_calcs, self.migrated_calcs,
self.failed_calcs, self.missing_calcs,
self.new_calcs))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment