Commit 553f50dd authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Improved processing logs, fixed bugs arround missing/changed parser names.

parent d754830b
Pipeline #71882 failed with stages
in 31 minutes and 11 seconds
......@@ -463,8 +463,7 @@ class EntryMetadata(metainfo.MSection):
if domain_section is None:
domain_section = self.m_create(domain_section_def.section_cls)
if backend is not None:
domain_section.apply_domain_metadata(backend)
domain_section.apply_domain_metadata(backend)
class EntryArchive(metainfo.MSection):
......
......@@ -278,6 +278,14 @@ class DFTMetadata(MSection):
logger = utils.get_logger(__name__).bind(
upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
if backend is None:
if entry.parser_name is not None:
from nomad.parsing import parser_dict
parser = parser_dict.get(entry.parser_name)
if hasattr(parser, 'code_name'):
self.code_name = parser.code_name
return
# code and code specific ids
self.code_name = backend.get_value('program_name', 0)
try:
......
......@@ -51,6 +51,9 @@ class EMSMetadata(MSection):
group_hash = Quantity(type=str, a_search=Search())
def apply_domain_metadata(self, backend):
if backend is None:
return
entry = self.m_parent
logger = utils.get_logger(__name__).bind(
upload_id=entry.upload_id, calc_id=entry.calc_id, mainfile=entry.mainfile)
......
......@@ -126,8 +126,8 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
except KeyError as e:
self.logger.error(
'Could not read all input data', normalizer=self.__class__.__name__,
section='section_system', g_index=g_index, key_error=str(e))
'could read a system property', normalizer=self.__class__.__name__,
section='section_system', g_index=g_index, key_error=str(e), exc_info=e)
return False
except Exception as e:
......
......@@ -444,48 +444,57 @@ parsers = [
mainfile_contents_re=r'Materials Studio DMol\^3'
),
LegacyParser(
name='parser/fleur', code_name='fleur', domain='dft',
name='parsers/fleur', code_name='fleur', domain='dft',
parser_class_name='fleurparser.FleurParser',
mainfile_contents_re=r'This output is generated by fleur.'
),
LegacyParser(
name='parser/molcas', code_name='MOLCAS', domain='dft',
name='parsers/molcas', code_name='MOLCAS', domain='dft',
parser_class_name='molcasparser.MolcasParser',
mainfile_contents_re=r'M O L C A S'
),
LegacyParser(
name='parser/onetep', code_name='ONETEP', domain='dft',
name='parsers/onetep', code_name='ONETEP', domain='dft',
parser_class_name='onetepparser.OnetepParser',
mainfile_contents_re=r'####### # # ####### ####### ####### ######'
)
]
empty_parsers = [
EmptyParser(
name='missing/octopus', code_name='Octopus', domain='dft',
mainfile_name_re=r'(inp)|(.*/inp)'
),
EmptyParser(
name='missing/crystal', code_name='Crystal', domain='dft',
mainfile_name_re=r'.*\.cryst\.out'
),
EmptyParser(
name='missing/wien2k', code_name='WIEN2k', domain='dft',
mainfile_name_re=r'.*\.scf'
),
EmptyParser(
name='missing/fhi-aims', code_name='FHI-aims', domain='dft',
mainfile_name_re=r'.*\.fhiaims'
)
]
if config.use_empty_parsers:
# There are some entries with PIDs that have mainfiles which do not match what
# the actual parsers expect. We use the EmptyParser to produce placeholder entries
# to keep the PIDs. These parsers will not match for new, non migrated data.
parsers.extend([
EmptyParser(
name='missing/octopus', code_name='Octopus', domain='dft',
mainfile_name_re=r'(inp)|(.*/inp)'
),
EmptyParser(
name='missing/crystal', code_name='Crystal',
mainfile_name_re=r'.*\.cryst\.out'
),
EmptyParser(
name='missing/wien2k', code_name='WIEN2k',
mainfile_name_re=r'.*\.scf'
),
EmptyParser(
name='missing/fhi-aims', code_name='FHI-aims', domain='dft',
mainfile_name_re=r'.*\.fhiaims'
)
])
parsers.extend(empty_parsers)
parsers.append(BrokenParser())
''' Instantiation and constructor based config of all parsers. '''
parser_dict = {parser.name: parser for parser in parsers} # type: ignore
parser_dict = {parser.name: parser for parser in parsers + empty_parsers} # type: ignore
''' A dict to access parsers by name. Usually 'parsers/<...>', e.g. 'parsers/vasp'. '''
# renamed parsers
parser_dict['parser/broken'] = parser_dict['parsers/broken']
parser_dict['parser/fleur'] = parser_dict['parsers/fleur']
parser_dict['parser/molcas'] = parser_dict['parsers/molcas']
parser_dict['parser/octopus'] = parser_dict['parsers/octopus']
parser_dict['parser/onetep'] = parser_dict['parsers/onetep']
......@@ -52,7 +52,7 @@ class EmptyParser(MatchingParser):
Implementation that produces an empty code_run
'''
def run(self, mainfile: str, logger=None) -> Backend:
backend = Backend(metainfo='vasp')
backend = Backend(metainfo=self.code_name, domain=self.domain, logger=logger)
backend.openSection('section_run')
backend.addValue('program_name', self.code_name)
backend.closeSection('section_run', 0)
......
......@@ -70,7 +70,7 @@ class BrokenParser(Parser):
'''
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.name = 'parser/broken'
self.name = 'parsers/broken'
self.code_name = 'currupted mainfile'
self._patterns = [
re.compile(r'^pid=[0-9]+'), # some 'mainfile' contain list of log-kinda information with pids
......
......@@ -269,10 +269,15 @@ class Proc(Document, metaclass=ProcMetaclass):
errors_str = "; ".join([str(error) for error in errors])
Proc.log(logger, log_level, 'task failed', errors=errors_str)
self.on_fail()
logger.info('process failed')
self.save()
def on_fail(self):
pass
def warning(self, *warnings, log_level=logging.WARNING, **kwargs):
''' Allows to save warnings. Takes strings or exceptions as args. '''
assert self.process_running or self.tasks_running
......
......@@ -97,6 +97,7 @@ class Calc(Proc):
('upload_id', 'tasks_status'),
('upload_id', 'process_status'),
('upload_id', 'metadata.nomad_version'),
'parser',
'metadata.published',
'metadata.datasets'
'metadata.pid'
......@@ -139,7 +140,8 @@ class Calc(Proc):
the archive.
'''
entry_metadata = datamodel.EntryMetadata()
entry_metadata.domain = parser_dict[self.parser].domain
if self.parser is not None:
entry_metadata.domain = parser_dict[self.parser].domain
entry_metadata.upload_id = self.upload_id
entry_metadata.calc_id = self.calc_id
entry_metadata.mainfile = self.mainfile
......@@ -260,10 +262,15 @@ class Calc(Proc):
self.warnings = ['no matching parser found during re-processing']
elif self.parser != parser.name:
self.parser = parser.name
logger.info(
'different parser matches during re-process, use new parser',
parser=parser.name)
if parser_dict[self.parser].name == parser.name:
# parser was just renamed
self.parser = parser.name
else:
self.parser = parser.name
logger.info(
'different parser matches during re-process, use new parser',
parser=parser.name)
try:
self._entry_metadata = self.user_metadata()
......@@ -318,29 +325,19 @@ class Calc(Proc):
except Exception as e:
logger.error('could unload processing results', exc_info=e)
def fail(self, *errors, log_level=logging.ERROR, **kwargs):
def on_fail(self):
# in case of failure, index a minimum set of metadata and mark
# processing failure
try:
if self.parser is not None:
try:
parser = parser_dict[self.parser]
if hasattr(parser, 'code_name'):
self._entry_metadata.code_name = parser.code_name
except KeyError:
# This only happens in re-processing. The parser was removed.
# The old parser was probably only used to keep this entry matching
# and in the system (retain its PID). With the current nomad this is
# not parsable anyhow.
self._entry_metadata.code_name = config.services.unavailable_value
self._entry_metadata.processed = False
self.apply_entry_metadata(self._entry_metadata)
if self._parser_backend and self._parser_backend.resource:
backend = self._parser_backend
else:
backend = None
self._entry_metadata.apply_domain_metadata(backend)
self._entry_metadata.a_elastic.index()
except Exception as e:
self.get_logger().error(
......@@ -352,8 +349,6 @@ class Calc(Proc):
self.get_logger().error(
'could not write archive after processing failure', exc_info=e)
super().fail(*errors, log_level=log_level, **kwargs)
def on_process_complete(self, process_name):
# the save might be necessary to correctly read the join condition from the db
self.save()
......@@ -470,6 +465,13 @@ class Calc(Proc):
log_data.update(archive_size=archive_size)
def write_archive(self, backend: Backend):
def filter_processing_logs(logs):
if len(logs) > 100:
return [
log for log in logs
if log.get('level') != 'DEBUG']
return logs
if self._calc_proc_logs is None:
self._calc_proc_logs = []
......@@ -481,7 +483,7 @@ class Calc(Proc):
if entry_archive.section_metadata is None:
entry_archive.m_add_sub_section(datamodel.EntryArchive.section_metadata, self._entry_metadata)
entry_archive.processing_logs = self._calc_proc_logs
entry_archive.processing_logs = filter_processing_logs(self._calc_proc_logs)
try:
return self.upload_files.write_archive(self.calc_id, entry_archive.m_to_dict())
......@@ -492,7 +494,7 @@ class Calc(Proc):
# most likely failed due to domain data, try to write metadata and processing logs
entry_archive = datamodel.EntryArchive()
entry_archive.m_add_sub_section(datamodel.EntryArchive.section_metadata, self._entry_metadata)
entry_archive.processing_logs = self._calc_proc_logs
entry_archive.processing_logs = filter_processing_logs(self._calc_proc_logs)
self.upload_files.write_archive(self.calc_id, entry_archive.m_to_dict())
raise e
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment