Commit e0eb4634 authored by Markus Scheidgen's avatar Markus Scheidgen Committed by Lauri Himanen
Browse files

Added option to skip rematching in reproccesing.

parent 9a8ea668
...@@ -309,6 +309,7 @@ raw_file_strip_cutoff = 1000 ...@@ -309,6 +309,7 @@ raw_file_strip_cutoff = 1000
max_entry_download = 500000 max_entry_download = 500000
use_empty_parsers = False use_empty_parsers = False
reprocess_unmatched = True reprocess_unmatched = True
reprocess_rematch = True
metadata_file_name = 'nomad' metadata_file_name = 'nomad'
metadata_file_extensions = ('json', 'yaml', 'yml') metadata_file_extensions = ('json', 'yaml', 'yml')
......
...@@ -283,27 +283,34 @@ class Calc(Proc): ...@@ -283,27 +283,34 @@ class Calc(Proc):
instead of creating it initially, we are just updating the existing instead of creating it initially, we are just updating the existing
records. records.
''' '''
parser = match_parser(self.upload_files.raw_file_object(self.mainfile).os_path, strict=False)
logger = self.get_logger() logger = self.get_logger()
if parser is None and not config.reprocess_unmatched: if config.reprocess_rematch:
self.errors = ['no parser matches during re-process, will not re-process this calc'] with utils.timer(logger, 'parser matching executed'):
parser = match_parser(
self.upload_files.raw_file_object(self.mainfile).os_path, strict=False)
try: if parser is None and not config.reprocess_unmatched:
upload_files = PublicUploadFiles(self.upload_id, is_authorized=lambda: True) self.errors = ['no parser matches during re-process, will not re-process this calc']
with upload_files.read_archive(self.calc_id) as archive:
self.upload_files.write_archive(self.calc_id, archive[self.calc_id].to_dict())
except Exception as e: try:
logger.error('could not copy archive for non matching, non reprocessed entry', exc_info=e) upload_files = PublicUploadFiles(self.upload_id, is_authorized=lambda: True)
raise e with upload_files.read_archive(self.calc_id) as archive:
self.upload_files.write_archive(self.calc_id, archive[self.calc_id].to_dict())
# mock the steps of actual processing
self._continue_with('parsing') except Exception as e:
self._continue_with('normalizing') logger.error('could not copy archive for non matching, non reprocessed entry', exc_info=e)
self._continue_with('archiving') raise e
self._complete()
return # mock the steps of actual processing
self._continue_with('parsing')
self._continue_with('normalizing')
self._continue_with('archiving')
self._complete()
return
else:
parser = parser_dict.get(self.parser)
if parser is None: if parser is None:
self.get_logger().warn('no parser matches during re-process, use the old parser') self.get_logger().warn('no parser matches during re-process, use the old parser')
......
...@@ -16,6 +16,7 @@ data: ...@@ -16,6 +16,7 @@ data:
source_url: "{{ .Values.meta.source_url }}" source_url: "{{ .Values.meta.source_url }}"
maintainer_email: "{{ .Values.meta.maintainer_email }}" maintainer_email: "{{ .Values.meta.maintainer_email }}"
reprocess_unmatched: {{ .Values.reprocess_unmatched }} reprocess_unmatched: {{ .Values.reprocess_unmatched }}
reprocess_rematch: {{ .Values.reprocess_rematch }}
fs: fs:
tmp: "{{ .Values.volumes.tmp }}" tmp: "{{ .Values.volumes.tmp }}"
prefix_size: {{ .Values.volumes.prefixSize }} prefix_size: {{ .Values.volumes.prefixSize }}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment