From 75a663a7e1ba8ff13c49bcdc62bca8bdb2f2d108 Mon Sep 17 00:00:00 2001
From: Markus Scheidgen <markus.scheidgen@gmail.com>
Date: Fri, 20 Mar 2020 17:31:45 +0100
Subject: [PATCH] Parser optimizations.

---
 common/python/nomadcore/baseclasses.py   | 36 +++++++++++++--------
 common/python/nomadcore/simple_parser.py | 41 +++++++++++++++---------
 2 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/common/python/nomadcore/baseclasses.py b/common/python/nomadcore/baseclasses.py
index 1287513..cc9b29a 100644
--- a/common/python/nomadcore/baseclasses.py
+++ b/common/python/nomadcore/baseclasses.py
@@ -42,7 +42,6 @@ class ParserInterface(with_metaclass(ABCMeta, object)):
             This is contructed here and then passed onto the different
             subparsers.
     """
-    metainfo_env = None
 
     def __init__(
             self, metainfo_to_keep=None, backend=None, default_units=None,
@@ -63,6 +62,8 @@ class ParserInterface(with_metaclass(ABCMeta, object)):
 
         self.store = store
         self.debug = debug
+        self.metainfo_env = None
+        self.metaInfoEnv = None
         self.initialize(metainfo_to_keep, backend, default_units, metainfo_units)
 
     def setup_logger(self, new_logger):
@@ -81,19 +82,21 @@ class ParserInterface(with_metaclass(ABCMeta, object)):
         self.parser_context.cache_service = CacheService()
         self.parser_context.parser_info = self.get_parser_info()
         self.main_parser = None
-
-        # Initialize the backend.
-        metainfo_package = os.path.basename(self.get_metainfo_filename())
-        if backend is not None:
-            self.parser_context.super_backend = backend(metainfo_package)
-        else:
-            from nomad.parsing.legacy import Backend
-            self.parser_context.super_backend = Backend(metainfo_package)
+        self.backend = backend
+
+        if self.metainfo_env is None:
+            metainfo_filename = os.path.basename(self.get_metainfo_filename())
+            from nomad.metainfo.legacy import python_package_mapping
+            import importlib
+            python_package_name, _ = python_package_mapping(metainfo_filename)
+            python_package_name = '.'.join(python_package_name.split('.')[:-1])
+            python_module = importlib.import_module(python_package_name)
+            metainfo = getattr(python_module, 'm_env')
+            self.metainfo_env = metainfo
+            self.metaInfoEnv = self.metainfo_env.legacy_info_env()
 
         # Setup the metainfo environment.
-        metainfo_env = self.parser_context.super_backend.metaInfoEnv()
-        self.parser_context.metainfo_env = metainfo_env
-        type(self).metainfo_env = metainfo_env
+        self.parser_context.metainfo_env = self.metaInfoEnv
 
         # Check the list of default units
         default_unit_map = {}
@@ -113,7 +116,7 @@ class ParserInterface(with_metaclass(ABCMeta, object)):
                 unit_conversion.ureg(unit)
 
                 # Check that the metaname is OK
-                meta = ParserInterface.metainfo_env.infoKinds.get(metaname)
+                meta = self.metaInfoEnv.infoKinds.get(metaname)
                 if meta is None:
                     raise KeyError("The metainfo name '{}' could not be found. Check for typos or try updating the metainfo repository.".format(metaname))
 
@@ -179,6 +182,13 @@ class ParserInterface(with_metaclass(ABCMeta, object)):
         """Starts the actual parsing process, and outputs the results to the
         backend specified in the constructor.
         """
+        # Initialize the backend.
+        if self.backend is not None:
+            self.parser_context.super_backend = self.backend(self.metainfo_env)
+        else:
+            from nomad.parsing.legacy import Backend
+            self.parser_context.super_backend = Backend(self.metainfo_env)
+
         # Check that the main file exists
         if not os.path.isfile(main_file):
             raise ValueError(
diff --git a/common/python/nomadcore/simple_parser.py b/common/python/nomadcore/simple_parser.py
index 2e0ae0e..9a4f196 100644
--- a/common/python/nomadcore/simple_parser.py
+++ b/common/python/nomadcore/simple_parser.py
@@ -1369,24 +1369,33 @@ class SimpleParser(object):
             self.parsingStats['unmatched'] += 1
 
 
+# This cache only works if the simpleParser (root of the SM tree) is not recreated
+# on each parse
+_compile_parser_cache = {}
+
+
 def compileParser(simpleParser, metaInfo, metaInfoToKeep, default_units=None, metainfo_units=None, strValueTransform=None):
     """compiles the given simple parser"""
-    parserBuilder = SimpleParserBuilder(
-        simpleParser, metaInfo, metaInfoToKeep, default_units, metainfo_units, strValueTransform)
-    if logger.isEnabledFor(logging.DEBUG):
-        s = io.StringIO()
-        s.write("matchers:")
-        parserBuilder.writeMatchers(s, 2)
-        logger.debug(s.getvalue())
-    if not parserBuilder.verifyMetaInfo(sys.stderr):
-        sys.exit(1)
-    parserBuilder.compile()
-    if logger.isEnabledFor(logging.DEBUG):
-        s = io.StringIO()
-        s.write("compiledMatchers:")
-        parserBuilder.writeCompiledMatchers(s, 2)
-        logger.debug(s.getvalue())
-    return parserBuilder
+    if simpleParser not in _compile_parser_cache:
+        parserBuilder = SimpleParserBuilder(
+            simpleParser, metaInfo, metaInfoToKeep, default_units, metainfo_units, strValueTransform)
+        if logger.isEnabledFor(logging.DEBUG):
+            s = io.StringIO()
+            s.write("matchers:")
+            parserBuilder.writeMatchers(s, 2)
+            logger.debug(s.getvalue())
+        if not parserBuilder.verifyMetaInfo(sys.stderr):
+            sys.exit(1)
+        parserBuilder.compile()
+        if logger.isEnabledFor(logging.DEBUG):
+            s = io.StringIO()
+            s.write("compiledMatchers:")
+            parserBuilder.writeCompiledMatchers(s, 2)
+            logger.debug(s.getvalue())
+
+        _compile_parser_cache[simpleParser] = parserBuilder
+
+    return _compile_parser_cache[simpleParser]
 
 
 def runParser(compiledParser, backend, superContext, fIn, uri, path):
-- 
GitLab