Commit f53ffb23 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'parsers' into 'master'

Parsers

See merge request !10
parents 4d5381b0 286df611
Pipeline #37403 passed with stages
in 4 minutes and 57 seconds
......@@ -322,7 +322,7 @@ class LocalBackend(LegacyParserBackend):
after parsing via :func:`write_json`.
"""
def __init__(self, *args, **kwargs):
self.logger = logger
self.logger = kwargs.pop('logger', logger)
delegate = LegacyLocalBackend(*args, **kwargs)
super().__init__(delegate)
......
......@@ -95,13 +95,13 @@ class LegacyParser(Parser):
return False
def create_backend(self, meta_info):
return LocalBackend(meta_info, debug=False)
def run(self, mainfile: str, logger=None) -> LocalBackend:
# TODO we need a homogeneous interface to parsers, but we dont have it right now.
# There are some hacks to distringuish between ParserInterface parser and simple_parser
# using hasattr, kwargs, etc.
def create_backend(meta_info, logger=None):
return LocalBackend(meta_info, debug=False, logger=logger)
module_name = self.parser_class_name.split('.')[:-1]
parser_class = self.parser_class_name.split('.')[1]
module = importlib.import_module('.'.join(module_name))
......@@ -109,7 +109,7 @@ class LegacyParser(Parser):
init_signature = inspect.getargspec(Parser.__init__)
kwargs = dict(
backend=lambda meta_info: self.create_backend(meta_info),
backend=lambda meta_info: create_backend(meta_info, logger=logger),
log_level=logging.DEBUG, debug=True)
kwargs = {key: value for key, value in kwargs.items() if key in init_signature.args}
self.parser = Parser(**kwargs)
......
......@@ -178,7 +178,7 @@ class Calc(Proc):
def parsing(self):
logger = self.get_calc_logger(parser=self.parser)
parser = parser_dict[self.parser]
with utils.timer(logger, 'parser executed'):
with utils.timer(logger, 'parser executed', step=self.parser):
self._parser_backend = parser.run(self.mainfile_tmp_path, logger=logger)
if self._parser_backend.status[0] != 'ParseSuccess':
logger.error(self._parser_backend.status[1])
......@@ -190,7 +190,7 @@ class Calc(Proc):
for normalizer in normalizers:
normalizer_name = normalizer.__name__
logger = self.get_calc_logger(normalizer=normalizer_name)
with utils.timer(logger, 'normalizer executed'):
with utils.timer(logger, 'normalizer executed', step=normalizer_name):
normalizer(self._parser_backend).normalize(logger=logger)
if self._parser_backend.status[0] != 'ParseSuccess':
logger.error(self._parser_backend.status[1])
......@@ -202,14 +202,17 @@ class Calc(Proc):
@task
def archiving(self):
with utils.timer(self.get_logger(), 'archived'):
upload_hash, calc_hash = self.archive_id.split('/')
additional = dict(
mainfile=self.mainfile,
upload_time=self._upload.upload_time,
staging=True,
restricted=False,
user_id=self._upload.user_id)
logger = self.get_logger()
upload_hash, calc_hash = self.archive_id.split('/')
additional = dict(
mainfile=self.mainfile,
upload_time=self._upload.upload_time,
staging=True,
restricted=False,
user_id=self._upload.user_id)
with utils.timer(logger, 'indexed', step='index'):
# persist to elastic search
RepoCalc.create_from_backend(
self._parser_backend,
......@@ -218,10 +221,12 @@ class Calc(Proc):
calc_hash=calc_hash,
upload_id=self.upload_id)
with utils.timer(logger, 'archived', step='archive'):
# persist the archive
with ArchiveFile(self.archive_id).write_archive_json() as out:
self._parser_backend.write_json(out, pretty=True)
with utils.timer(logger, 'archived log', step='archive_log'):
# close loghandler
if self._calc_proc_logwriter is not None:
self._calc_proc_logwriter.close()
......@@ -394,7 +399,7 @@ class Upload(Chord):
def extracting(self):
logger = self.get_logger()
try:
with utils.timer(logger, 'upload extracted'):
with utils.timer(logger, 'upload extracted', step='extracting'):
self._upload = UploadFile(self.upload_id, local_path=self.local_path)
self._upload.extract()
except KeyError as e:
......@@ -413,25 +418,28 @@ class Upload(Chord):
@task
def parse_all(self):
logger = self.get_logger()
# TODO: deal with multiple possible parser specs
total_calcs = 0
for filename in self._upload.filelist:
for parser in parsers:
try:
if parser.is_mainfile(filename, lambda fn: self._upload.open_file(fn)):
tmp_mainfile = self._upload.get_path(filename)
calc = Calc.create(
archive_id='%s/%s' % (self.upload_hash, utils.hash(filename)),
mainfile=filename, parser=parser.name,
mainfile_tmp_path=tmp_mainfile,
upload_id=self.upload_id)
calc.process()
total_calcs += 1
except Exception as e:
self.warning(
'exception while matching pot. mainfile',
mainfile=filename, exc_info=e)
with utils.timer(logger, 'upload extracted', step='matching'):
total_calcs = 0
for filename in self._upload.filelist:
for parser in parsers:
try:
if parser.is_mainfile(filename, lambda fn: self._upload.open_file(fn)):
tmp_mainfile = self._upload.get_path(filename)
calc = Calc.create(
archive_id='%s/%s' % (self.upload_hash, utils.hash(filename)),
mainfile=filename, parser=parser.name,
mainfile_tmp_path=tmp_mainfile,
upload_id=self.upload_id)
calc.process()
total_calcs += 1
except Exception as e:
self.warning(
'exception while matching pot. mainfile',
mainfile=filename, exc_info=e)
# have to save the total_calcs information for chord management
self.spwaned_childred(total_calcs)
......@@ -442,7 +450,7 @@ class Upload(Chord):
@task
def cleanup(self):
try:
with utils.timer(self.get_logger(), 'processing cleaned up'):
with utils.timer(self.get_logger(), 'processing cleaned up', step='cleaning'):
upload = UploadFile(self.upload_id, local_path=self.local_path)
except KeyError as e:
self.fail('Upload does not exist', exc_info=e)
......
......@@ -177,7 +177,7 @@ def lnr(logger, event, **kwargs):
@contextmanager
def timer(logger, event, method='info'):
def timer(logger, event, method='info', **kwargs):
start = time.time()
try:
......@@ -187,6 +187,6 @@ def timer(logger, event, method='info'):
logger_method = getattr(logger, 'info', None)
if logger_method is not None:
logger_method(event, exec_time=stop - start)
logger_method(event, exec_time=stop - start, **kwargs)
else:
logger.error('Uknown logger method %s.' % method)
from nomad.parsing import parser_dict
from nomad import utils
def run_parser(parser_name, mainfile):
parser = parser_dict[parser_name]
return parser.run(mainfile, logger=utils.get_logger(__name__))
if __name__ == '__main__':
run_parser('parsers/vasp', '.dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
run_parser('parsers/vasp', '.dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
run_parser('parsers/vasp', '.dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
run_parser('parsers/vasp', '.dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
run_parser('parsers/vasp', '.dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment