diff --git a/common/python/nomadcore/baseclasses.py b/common/python/nomadcore/baseclasses.py index 572ff23f9b878ce6babb38cdf39d1e3721996d57..127f4e9254bfcd7480f127c767212b28df5c65e1 100644 --- a/common/python/nomadcore/baseclasses.py +++ b/common/python/nomadcore/baseclasses.py @@ -6,6 +6,7 @@ from builtins import str from builtins import object import os +import sys import copy import numpy as np import logging @@ -15,7 +16,8 @@ from nomadcore.simple_parser import mainFunction from nomadcore.local_backend import LocalBackend from nomadcore.local_meta_info import load_metainfo from nomadcore.caching_backend import CachingLevel -from nomadcore.simple_parser import extractOnCloseTriggers, compileParser, runParser, PushbackLineFile +from nomadcore.simple_parser import extractOnCloseTriggers, extractOnOpenTriggers +from nomadcore.caching_backend import ActiveBackend import nomadcore.ActivateLogging from future.utils import with_metaclass logger = logging.getLogger("nomad") @@ -281,36 +283,7 @@ class RegexService(object): #=============================================================================== -class CommonParser(object): - """ - This class is used as a base class for objects that store and instantiate - common parts of the hierarchical SimpleMatcher structure. The object can be - shared for many HierarchicalParsers. - """ - def __init__(self, parser_context): - - # Repeating regex definitions - self.parser_context = parser_context - self.backend = parser_context.caching_backend - self.file_service = parser_context.file_service - self.cache_service = parser_context.cache_service - self.caching_levels = {} - self.regexs = RegexService() - - def getOnCloseTriggers(self): - """ - Returns: - A dictionary containing a section name as a key, and a list of - trigger functions associated with closing that section. - """ - onClose = {} - for attr, callback in extractOnCloseTriggers(self).items(): - onClose[attr] = [callback] - return onClose - - -#=============================================================================== -class BasicParser(with_metaclass(ABCMeta, object)): +class AbstractBaseParser(with_metaclass(ABCMeta, object)): """A base class for all objects that parse contents from files. When initialized with the parser_context, you can find the caching backend @@ -334,6 +307,44 @@ class BasicParser(with_metaclass(ABCMeta, object)): self.super_backend = parser_context.super_backend self.file_service = parser_context.file_service self.cache_service = parser_context.cache_service + self.caching_levels = {} + self.default_data_caching_level = CachingLevel.ForwardAndCache + self.default_section_caching_level = CachingLevel.Forward + self.on_close = {} + self.on_open = {} + + def prepare(self): + """This function will prepare everything for parsing. + + The onClose and onOpen callbacks are gathered and the ActiveBackend is + initialized. You should call this function, or prepare things manually + before trying to use push values to the backend. + """ + # Gather the onClose and onOpen triggers + for attr, callback in extractOnCloseTriggers(self).items(): + oldCallbacks = self.on_close.get(attr, None) + if oldCallbacks: + oldCallbacks.append(callback) + else: + self.on_close[attr] = [callback] + for attr, callback in extractOnOpenTriggers(self).items(): + oldCallbacks = self.on_open.get(attr, None) + if oldCallbacks: + oldCallbacks.append(callback) + else: + self.on_open[attr] = [callback] + + # Initialize the Caching backend + self.backend = ActiveBackend.activeBackend( + metaInfoEnv=self.parser_context.metainfo_env, + cachingLevelForMetaName=self.caching_levels, + defaultDataCachingLevel=self.default_data_caching_level, + defaultSectionCachingLevel=self.default_section_caching_level, + onClose=self.on_close, + onOpen=self.on_open, + superBackend=self.parser_context.super_backend, + default_units=self.parser_context.default_units, + metainfo_units=self.parser_context.metainfo_units) @abstractmethod def parse(self): @@ -342,11 +353,10 @@ class BasicParser(with_metaclass(ABCMeta, object)): superBackend. You will also have to open new sections, but remember that certain sections may already be opened by other parsers. """ - pass #=============================================================================== -class HierarchicalParser(BasicParser): +class MainHierarchicalParser(AbstractBaseParser): """A base class for all parsers that parse a file using a hierarchy of SimpleMatcher objects. @@ -369,35 +379,6 @@ class HierarchicalParser(BasicParser): cm: An optional CommonMatcher object that is used to store common onClose triggers, SimpleMatchers, caching levels, etc. - """ - def __init__(self, file_path, parser_context): - super(HierarchicalParser, self).__init__(file_path, parser_context) - self.root_matcher = None - self.caching_levels = {} - self.default_data_caching_level = CachingLevel.ForwardAndCache - self.default_section_caching_level = CachingLevel.Forward - self.onClose = {} - self.cm = None - self.regexs = RegexService() - - def setup_common_matcher(self, common_matcher): - """ Used to setup the CommonMatcher object. This object will contain - SimpleMatchers, onClose functions, caching levels, etc. that are common - for many different HierarchicalParsers. - - Args: - common_matcher: A CommonMatcher object from which to exctract stuff. - """ - self.cm = common_matcher - self.onClose.update(common_matcher.getOnCloseTriggers()) - self.caching_levels.update(common_matcher.caching_levels) - - -#=============================================================================== -class MainHierarchicalParser(HierarchicalParser): - """Base class for parsers that process the main file by using SimpleMatcher - objects, parse it's contents, and also initialize other parser objects that - are subclassed from BasicParser and parse ancillary files. """ def __init__(self, file_path, parser_context): """ @@ -407,6 +388,9 @@ class MainHierarchicalParser(HierarchicalParser): in-depth information about the parsing environment. """ super(MainHierarchicalParser, self).__init__(file_path, parser_context) + self.root_matcher = None + self.regexs = RegexService() + self.cm = None def parse(self): """Starts the parsing. By default uses the SimpleParser scheme, if you @@ -420,7 +404,7 @@ class MainHierarchicalParser(HierarchicalParser): outF=self.parser_context.super_backend.fileOut, cachingLevelForMetaName=self.caching_levels, superContext=self, - onClose=self.onClose, + onClose=self.on_close, default_units=self.parser_context.default_units, metainfo_units=self.parser_context.metainfo_units, superBackend=self.parser_context.super_backend, @@ -443,43 +427,46 @@ class MainHierarchicalParser(HierarchicalParser): if self.cm is not None: self.cm.backend = parser.backend + def setup_common_matcher(self, common_matcher): + """Used to setup the CommonMatcher object. This object will contain + SimpleMatchers, onClose functions, caching levels, etc. that are common + for many different HierarchicalParsers. -#=============================================================================== -class SubHierarchicalParser(HierarchicalParser): - """Parser that can be used inside a running MainHierarchicalParser to parse - external files. - - The same backend is used as for the main parser, therefore, the output of - the ancillary parser is included in the output of the the main parser. Be - careful when you open sections with SubHierarchicalParsers. Be sure not to - write the opening events for sections that are already opened by the main - parser. You can use cachingLevelForMetaName to set a section to Ignore or - Cache. Thus, you can still use sections and their onClose triggers, but no - opening events are written. - """ - def __init__(self, file_path, parser_context): - """ Args: - file_path: Path to the main file as a string. - parser_context: The ParserContext object that contains various - in-depth information about the parsing environment. + common_matcher: A CommonMatcher object from which to extract stuff. """ - super(HierarchicalParser, self).__init__(file_path, parser_context) + self.cm = common_matcher + self.on_close.update(common_matcher.getOnCloseTriggers()) + self.caching_levels.update(common_matcher.caching_levels) - def parse(self): - """Starts the parsing. By default uses the SimpleParser scheme, if you - want to use something else or customize the process just override this - method in the subclass. - """ - with open(self.file_path, "r") as fin: - compiledParser = compileParser(self.root_matcher, self.parser_context.metainfo_env, self.parser_context.metainfo_to_keep, self.parser_context.default_units, self.parser_context.metainfo_units) - runParser(compiledParser, self.backend, self, PushbackLineFile(fin)) - def startedParsing(self, fInName, parser): - """Function is called when the parsing starts. Currently does nothing, - but is required by the runParser function. +#=============================================================================== +class CommonParser(object): + """ + This class is used as a base class for objects that store and instantiate + common parts of the hierarchical SimpleMatcher structure. The object can be + shared for many MainHierarchicalParsers. + """ + def __init__(self, parser_context): + + # Repeating regex definitions + self.parser_context = parser_context + self.backend = parser_context.caching_backend + self.file_service = parser_context.file_service + self.cache_service = parser_context.cache_service + self.caching_levels = {} + self.regexs = RegexService() + + def getOnCloseTriggers(self): """ - pass + Returns: + A dictionary containing a section name as a key, and a list of + trigger functions associated with closing that section. + """ + onClose = {} + for attr, callback in extractOnCloseTriggers(self).items(): + onClose[attr] = [callback] + return onClose #=============================================================================== @@ -504,7 +491,7 @@ class ParserContext(object): #=============================================================================== class CacheObject(object): - """Wraps an intermediate value stored during parsing. + """Wraps a value stored inside a CacheService. """ def __init__(self, name, default_value=None, single=True, update=True): self.name = name