From 75a663a7e1ba8ff13c49bcdc62bca8bdb2f2d108 Mon Sep 17 00:00:00 2001 From: Markus Scheidgen <markus.scheidgen@gmail.com> Date: Fri, 20 Mar 2020 17:31:45 +0100 Subject: [PATCH] Parser optimizations. --- common/python/nomadcore/baseclasses.py | 36 +++++++++++++-------- common/python/nomadcore/simple_parser.py | 41 +++++++++++++++--------- 2 files changed, 48 insertions(+), 29 deletions(-) diff --git a/common/python/nomadcore/baseclasses.py b/common/python/nomadcore/baseclasses.py index 1287513..cc9b29a 100644 --- a/common/python/nomadcore/baseclasses.py +++ b/common/python/nomadcore/baseclasses.py @@ -42,7 +42,6 @@ class ParserInterface(with_metaclass(ABCMeta, object)): This is contructed here and then passed onto the different subparsers. """ - metainfo_env = None def __init__( self, metainfo_to_keep=None, backend=None, default_units=None, @@ -63,6 +62,8 @@ class ParserInterface(with_metaclass(ABCMeta, object)): self.store = store self.debug = debug + self.metainfo_env = None + self.metaInfoEnv = None self.initialize(metainfo_to_keep, backend, default_units, metainfo_units) def setup_logger(self, new_logger): @@ -81,19 +82,21 @@ class ParserInterface(with_metaclass(ABCMeta, object)): self.parser_context.cache_service = CacheService() self.parser_context.parser_info = self.get_parser_info() self.main_parser = None - - # Initialize the backend. - metainfo_package = os.path.basename(self.get_metainfo_filename()) - if backend is not None: - self.parser_context.super_backend = backend(metainfo_package) - else: - from nomad.parsing.legacy import Backend - self.parser_context.super_backend = Backend(metainfo_package) + self.backend = backend + + if self.metainfo_env is None: + metainfo_filename = os.path.basename(self.get_metainfo_filename()) + from nomad.metainfo.legacy import python_package_mapping + import importlib + python_package_name, _ = python_package_mapping(metainfo_filename) + python_package_name = '.'.join(python_package_name.split('.')[:-1]) + python_module = importlib.import_module(python_package_name) + metainfo = getattr(python_module, 'm_env') + self.metainfo_env = metainfo + self.metaInfoEnv = self.metainfo_env.legacy_info_env() # Setup the metainfo environment. - metainfo_env = self.parser_context.super_backend.metaInfoEnv() - self.parser_context.metainfo_env = metainfo_env - type(self).metainfo_env = metainfo_env + self.parser_context.metainfo_env = self.metaInfoEnv # Check the list of default units default_unit_map = {} @@ -113,7 +116,7 @@ class ParserInterface(with_metaclass(ABCMeta, object)): unit_conversion.ureg(unit) # Check that the metaname is OK - meta = ParserInterface.metainfo_env.infoKinds.get(metaname) + meta = self.metaInfoEnv.infoKinds.get(metaname) if meta is None: raise KeyError("The metainfo name '{}' could not be found. Check for typos or try updating the metainfo repository.".format(metaname)) @@ -179,6 +182,13 @@ class ParserInterface(with_metaclass(ABCMeta, object)): """Starts the actual parsing process, and outputs the results to the backend specified in the constructor. """ + # Initialize the backend. + if self.backend is not None: + self.parser_context.super_backend = self.backend(self.metainfo_env) + else: + from nomad.parsing.legacy import Backend + self.parser_context.super_backend = Backend(self.metainfo_env) + # Check that the main file exists if not os.path.isfile(main_file): raise ValueError( diff --git a/common/python/nomadcore/simple_parser.py b/common/python/nomadcore/simple_parser.py index 2e0ae0e..9a4f196 100644 --- a/common/python/nomadcore/simple_parser.py +++ b/common/python/nomadcore/simple_parser.py @@ -1369,24 +1369,33 @@ class SimpleParser(object): self.parsingStats['unmatched'] += 1 +# This cache only works if the simpleParser (root of the SM tree) is not recreated +# on each parse +_compile_parser_cache = {} + + def compileParser(simpleParser, metaInfo, metaInfoToKeep, default_units=None, metainfo_units=None, strValueTransform=None): """compiles the given simple parser""" - parserBuilder = SimpleParserBuilder( - simpleParser, metaInfo, metaInfoToKeep, default_units, metainfo_units, strValueTransform) - if logger.isEnabledFor(logging.DEBUG): - s = io.StringIO() - s.write("matchers:") - parserBuilder.writeMatchers(s, 2) - logger.debug(s.getvalue()) - if not parserBuilder.verifyMetaInfo(sys.stderr): - sys.exit(1) - parserBuilder.compile() - if logger.isEnabledFor(logging.DEBUG): - s = io.StringIO() - s.write("compiledMatchers:") - parserBuilder.writeCompiledMatchers(s, 2) - logger.debug(s.getvalue()) - return parserBuilder + if simpleParser not in _compile_parser_cache: + parserBuilder = SimpleParserBuilder( + simpleParser, metaInfo, metaInfoToKeep, default_units, metainfo_units, strValueTransform) + if logger.isEnabledFor(logging.DEBUG): + s = io.StringIO() + s.write("matchers:") + parserBuilder.writeMatchers(s, 2) + logger.debug(s.getvalue()) + if not parserBuilder.verifyMetaInfo(sys.stderr): + sys.exit(1) + parserBuilder.compile() + if logger.isEnabledFor(logging.DEBUG): + s = io.StringIO() + s.write("compiledMatchers:") + parserBuilder.writeCompiledMatchers(s, 2) + logger.debug(s.getvalue()) + + _compile_parser_cache[simpleParser] = parserBuilder + + return _compile_parser_cache[simpleParser] def runParser(compiledParser, backend, superContext, fIn, uri, path): -- GitLab