diff --git a/nomad/processing/base.py b/nomad/processing/base.py index a3b187da5f226095b03ddf4bf90ab7ec9f257e55..3085c319cb5406dba74fddf9916578073aff5bd5 100644 --- a/nomad/processing/base.py +++ b/nomad/processing/base.py @@ -198,6 +198,7 @@ class Proc(Document, metaclass=ProcMetaclass): assert not self.process_running self.current_task = None + self.process_status = None self.tasks_status = PENDING self.errors = [] self.warnings = [] @@ -207,7 +208,7 @@ class Proc(Document, metaclass=ProcMetaclass): def reset_pymongo_update(cls, worker_hostname: str = None): """ Returns a pymongo update dict part to reset calculations. """ return dict( - current_task=None, tasks_status=PENDING, errors=[], warnings=[], + current_task=None, process_status=None, tasks_status=PENDING, errors=[], warnings=[], worker_hostname=worker_hostname) @classmethod diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 33422d282e473650b36a05e6611e2cbebad195d4..17da0fb5ce6ed8f283cf397c0f14b5280778f574 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -190,6 +190,17 @@ class Calc(Proc): """ logger = self.get_logger() + parser = match_parser(self.mainfile, self.upload_files, strict=False) + if parser is None: + logger.error( + 'no parser matches during re-process, use the old parser', + calc_id=self.calc_id) + elif self.parser != parser.name: + self.parser = parser.name + logger.info( + 'different parser matches during re-process, use new parser', + calc_id=self.calc_id, parser=parser.name) + try: calc_with_metadata = datamodel.CalcWithMetadata(**self.metadata) calc_with_metadata.upload_id = self.upload_id @@ -660,33 +671,21 @@ class Upload(Proc): self._continue_with('parse_all') try: # check if a calc is already/still processing - for calc in Calc.objects( - upload_id=self.upload_id, - **Calc.process_running_mongoengine_query()).exclude('metadata'): - logger.warn('a process is already running on calc', calc_id=calc.calc_id) + processing = Calc.objects( + upload_id=self.upload_id, + **Calc.process_running_mongoengine_query()).count() + + if processing > 0: + logger.warn( + 'processes are still/already running on calc, they will be resetted', + count=processing) # reset all calcs Calc._get_collection().update_many( dict(upload_id=self.upload_id), {'$set': Calc.reset_pymongo_update(worker_hostname=self.worker_hostname)}) - # match and call calc processings - # we use a copy of the mongo queryset; reasons are cursor timeouts and - # changing results on modifying the calc entries - calcs = list(Calc.objects(upload_id=self.upload_id).exclude('metadata')) - for calc in calcs: - parser = match_parser(calc.mainfile, staging_upload_files, strict=False) - if parser is None: - logger.error( - 'no parser matches during re-process, use the old parser', - calc_id=calc.calc_id) - elif calc.parser != parser.name: - calc.parser = parser.name - calc.save() - logger.info( - 'different parser matches during re-process, use new parser', - calc_id=calc.calc_id, parser=parser.name) - + # process call calcs Calc.process_all(Calc.re_process_calc, dict(upload_id=self.upload_id), exclude=['metadata']) logger.info('completed to trigger re-process of all calcs')