Commit e0eb4634 authored by Markus Scheidgen's avatar Markus Scheidgen Committed by Lauri Himanen
Browse files

Added option to skip rematching in reproccesing.

parent 9a8ea668
......@@ -309,6 +309,7 @@ raw_file_strip_cutoff = 1000
max_entry_download = 500000
use_empty_parsers = False
reprocess_unmatched = True
reprocess_rematch = True
metadata_file_name = 'nomad'
metadata_file_extensions = ('json', 'yaml', 'yml')
......
......@@ -283,27 +283,34 @@ class Calc(Proc):
instead of creating it initially, we are just updating the existing
records.
'''
parser = match_parser(self.upload_files.raw_file_object(self.mainfile).os_path, strict=False)
logger = self.get_logger()
if parser is None and not config.reprocess_unmatched:
self.errors = ['no parser matches during re-process, will not re-process this calc']
if config.reprocess_rematch:
with utils.timer(logger, 'parser matching executed'):
parser = match_parser(
self.upload_files.raw_file_object(self.mainfile).os_path, strict=False)
try:
upload_files = PublicUploadFiles(self.upload_id, is_authorized=lambda: True)
with upload_files.read_archive(self.calc_id) as archive:
self.upload_files.write_archive(self.calc_id, archive[self.calc_id].to_dict())
if parser is None and not config.reprocess_unmatched:
self.errors = ['no parser matches during re-process, will not re-process this calc']
except Exception as e:
logger.error('could not copy archive for non matching, non reprocessed entry', exc_info=e)
raise e
# mock the steps of actual processing
self._continue_with('parsing')
self._continue_with('normalizing')
self._continue_with('archiving')
self._complete()
return
try:
upload_files = PublicUploadFiles(self.upload_id, is_authorized=lambda: True)
with upload_files.read_archive(self.calc_id) as archive:
self.upload_files.write_archive(self.calc_id, archive[self.calc_id].to_dict())
except Exception as e:
logger.error('could not copy archive for non matching, non reprocessed entry', exc_info=e)
raise e
# mock the steps of actual processing
self._continue_with('parsing')
self._continue_with('normalizing')
self._continue_with('archiving')
self._complete()
return
else:
parser = parser_dict.get(self.parser)
if parser is None:
self.get_logger().warn('no parser matches during re-process, use the old parser')
......
......@@ -16,6 +16,7 @@ data:
source_url: "{{ .Values.meta.source_url }}"
maintainer_email: "{{ .Values.meta.maintainer_email }}"
reprocess_unmatched: {{ .Values.reprocess_unmatched }}
reprocess_rematch: {{ .Values.reprocess_rematch }}
fs:
tmp: "{{ .Values.volumes.tmp }}"
prefix_size: {{ .Values.volumes.prefixSize }}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment