Commit 6134efdf authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added a start_pid parameter to migration index cli command. [skip ci]

parent 3d17c42a
Pipeline #53200 skipped
......@@ -68,12 +68,14 @@ def migration(
@click.option('--drop', help='Drop the existing index, otherwise it will only add new data.', is_flag=True)
@click.option('--with-metadata', help='Extract metadata for each calc and add it to the index.', is_flag=True)
@click.option('--per-query', default=100, help='We index many objects with one query. Default is 100.')
def index(drop, with_metadata, per_query):
@click.option('--start-pid', type=int, default=-1, help='Only index calculations with PID greater equal the given value')
def index(drop, with_metadata, per_query, start_pid):
_setup()
start = time.time()
indexed_total = 0
indexed_calcs = 0
for calc, total in _Migration().source_calc_index(drop=drop, with_metadata=with_metadata, per_query=int(per_query)):
for calc, total in _Migration().source_calc_index(
drop=drop, with_metadata=with_metadata, per_query=int(per_query), start_pid=start_pid):
indexed_total += 1
indexed_calcs += 1 if calc is not None else 0
eta = total * ((time.time() - start) / indexed_total)
......
......@@ -899,8 +899,9 @@ class SourceCalc(Document):
_dataset_cache: dict = {}
@staticmethod
def index(source, drop: bool = False, with_metadata: bool = True, per_query: int = 100) \
-> Generator[Tuple['SourceCalc', int], None, None]:
def index(
source, drop: bool = False, with_metadata: bool = True, per_query: int = 100,
start_pid: int = -1) -> Generator[Tuple['SourceCalc', int], None, None]:
"""
Creates a collection of :class:`SourceCalc` documents that represent source repo
db entries.
......@@ -914,6 +915,7 @@ class SourceCalc(Document):
query on the CoE snoflake/star shaped schema.
The query cannot ask for the whole db at once: choose how many calculations
should be read at a time to optimize for your application.
start_pid: Only index calculations with PID greater equal the given value
Returns:
yields tuples (:class:`SourceCalc`, #calcs_total[incl. datasets])
......@@ -923,7 +925,8 @@ class SourceCalc(Document):
SourceCalc.drop_collection()
last_source_calc = SourceCalc.objects().order_by('-pid').first()
start_pid = last_source_calc.pid if last_source_calc is not None else 0
if start_pid is None or start_pid == -1:
start_pid = last_source_calc.pid if last_source_calc is not None else 0
source_query = source.query(Calc)
total = source_query.count() - SourceCalc.objects.count()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment