diff --git a/common/python/nomadcore/baseclasses.py b/common/python/nomadcore/baseclasses.py index 1a27540e22b53b18dd525d1b6e4f2e7c0c4ec5b7..1287513d541f9691e56a943dc2ca8efd71fd196f 100644 --- a/common/python/nomadcore/baseclasses.py +++ b/common/python/nomadcore/baseclasses.py @@ -82,22 +82,18 @@ class ParserInterface(with_metaclass(ABCMeta, object)): self.parser_context.parser_info = self.get_parser_info() self.main_parser = None - # Setup the metainfo environment. All parsers that inherit from this - # class will have a static class attribute that will store the metainfo - # environment. This way every instance of a parser doesn't have to load - # the environment separately because it is identical for each instance. - if type(self).metainfo_env is None: - metainfo_env, warn = load_metainfo(self.get_metainfo_filename()) - type(self).metainfo_env = metainfo_env - self.parser_context.metainfo_env = metainfo_env - else: - self.parser_context.metainfo_env = type(self).metainfo_env - - # Initialize the backend. Use local backend if none given + # Initialize the backend. + metainfo_package = os.path.basename(self.get_metainfo_filename()) if backend is not None: - self.parser_context.super_backend = backend(type(self).metainfo_env) + self.parser_context.super_backend = backend(metainfo_package) else: - self.parser_context.super_backend = LocalBackend(type(self).metainfo_env, debug=self.debug, store=self.store) + from nomad.parsing.legacy import Backend + self.parser_context.super_backend = Backend(metainfo_package) + + # Setup the metainfo environment. + metainfo_env = self.parser_context.super_backend.metaInfoEnv() + self.parser_context.metainfo_env = metainfo_env + type(self).metainfo_env = metainfo_env # Check the list of default units default_unit_map = {} diff --git a/common/python/nomadcore/caching_backend.py b/common/python/nomadcore/caching_backend.py index 954997319524d7449c0895918ade6a065e6ff266..868e71c4b601956045edaa6ca9c72aaa9231fa63 100644 --- a/common/python/nomadcore/caching_backend.py +++ b/common/python/nomadcore/caching_backend.py @@ -261,14 +261,19 @@ class CachingSectionManager(object): except: raise Exception("Cannot add array values for metadata %s to section %d (%d) of %s, as it is not open" % (valueMetaInfo.name, gI, gIndex, self.metaInfo.name)) + class CachingDataManager(object): def __init__(self, metaInfo, superSectionManager, cachingLevel): self.metaInfo = metaInfo self.superSectionManager = superSectionManager self.cachingLevel = cachingLevel + class ActiveBackend(object): - def __init__(self, metaInfoEnv, sectionManagers, dataManagers, superBackend, propagateStartFinishParsing = True, default_units=None, metainfo_units=None): + def __init__( + self, metaInfoEnv, sectionManagers, dataManagers, superBackend, + propagateStartFinishParsing=True, default_units=None, metainfo_units=None): + self.__metaInfoEnv = metaInfoEnv self.sectionManagers = sectionManagers self.dataManagers = dataManagers @@ -278,36 +283,62 @@ class ActiveBackend(object): self.metainfo_units = metainfo_units # A mapping between metaname and an unit definition. @classmethod - def activeBackend(cls, metaInfoEnv, cachingLevelForMetaName = {}, defaultDataCachingLevel = CachingLevel.ForwardAndCache, defaultSectionCachingLevel = CachingLevel.Forward, superBackend = None, - onClose = {}, onOpen = {}, propagateStartFinishParsing = True, default_units=None, metainfo_units=None): + def activeBackend( + cls, + metaInfoEnv, + cachingLevelForMetaName={}, + defaultDataCachingLevel=CachingLevel.ForwardAndCache, + defaultSectionCachingLevel=CachingLevel.Forward, + superBackend=None, + onClose={}, onOpen={}, + propagateStartFinishParsing=True, + default_units=None, + metainfo_units=None): + for sectionName in onClose.keys(): - if not sectionName in metaInfoEnv: - raise Exception("Found trigger for non existing section %s" % sectionName) + if sectionName not in metaInfoEnv: + raise Exception( + "Found trigger for non existing section %s" % sectionName) + elif metaInfoEnv.infoKinds[sectionName].kindStr != "type_section": - raise Exception("Found trigger for %s which is not a section but %s" % - (sectionName, json.dumps(metaInfoEnv.infoKinds[sectionName].toDict(), indent=2))) + raise Exception( + "Found trigger for %s which is not a section but %s" % + (sectionName, json.dumps(metaInfoEnv.infoKinds[sectionName].toDict(), indent=2))) + for sectionName in onOpen.keys(): - if not sectionName in metaInfoEnv: - raise Exception("Found trigger for non existing section %s" % sectionName) + if sectionName not in metaInfoEnv: + raise Exception( + "Found trigger for non existing section %s" % sectionName) + elif metaInfoEnv.infoKinds[sectionName].kindStr != "type_section": - raise Exception("Found trigger for %s which is not a section but %s" % - (sectionName, json.dumps(metaInfoEnv.infoKinds[sectionName].toDict(), indent=2))) + raise Exception( + "Found trigger for %s which is not a section but %s" % + (sectionName, json.dumps(metaInfoEnv.infoKinds[sectionName].toDict(), indent=2))) + sectionManagers = {} - for ikNames, ik in metaInfoEnv.infoKinds.items(): + for ik in metaInfoEnv.infoKinds.values(): if ik.kindStr == "type_section": - parentS, parentO = list(metaInfoEnv.firstAncestorsByType(ik.name).get("type_section", [[],[]])) + parentS, parentO = list( + metaInfoEnv.firstAncestorsByType(ik.name).get("type_section", [[], []])) parentS.sort() - cachingLevel = reduce(CachingLevel.restrict, [cachingLevelForMetaName.get(x, defaultSectionCachingLevel) for x in ([ik.name] + parentS + parentO)]) + + cachingLevel = reduce( + CachingLevel.restrict, + [ + cachingLevelForMetaName.get(x, defaultSectionCachingLevel) + for x in ([ik.name] + parentS + parentO)]) + sectionManagers[ik.name] = CachingSectionManager( - metaInfo = ik, - parentSectionNames = parentS, - storeInSuper = (cachingLevel == CachingLevel.ForwardAndCache or cachingLevel == CachingLevel.Cache or cachingLevel == CachingLevel.PreOpenedCache), - forwardOpenClose = (cachingLevel == CachingLevel.Forward or cachingLevel == CachingLevel.ForwardAndCache), - preOpened = (cachingLevel == CachingLevel.PreOpenedCache or cachingLevel == CachingLevel.PreOpenedIgnore), - onClose = onClose.get(ik.name, []), - onOpen = onOpen.get(ik.name, [])) + metaInfo=ik, + parentSectionNames=parentS, + storeInSuper=(cachingLevel == CachingLevel.ForwardAndCache or cachingLevel == CachingLevel.Cache or cachingLevel == CachingLevel.PreOpenedCache), + forwardOpenClose=(cachingLevel == CachingLevel.Forward or cachingLevel == CachingLevel.ForwardAndCache), + preOpened=(cachingLevel == CachingLevel.PreOpenedCache or cachingLevel == CachingLevel.PreOpenedIgnore), + onClose=onClose.get(ik.name, []), + onOpen=onOpen.get(ik.name, [])) + dataManagers = {} - for ikNames, ik in metaInfoEnv.infoKinds.items(): + for ik in metaInfoEnv.infoKinds.values(): if ik.kindStr == "type_document_content" or ik.kindStr == "type_dimension": superSectionNames = metaInfoEnv.firstAncestorsByType(ik.name).get("type_section", [[]])[0] if not superSectionNames: @@ -316,9 +347,15 @@ class ActiveBackend(object): raise Exception("MetaInfo of concrete value %s has multiple superSections (%s)" % (ik.name, superSectionNames)) sectionManager = sectionManagers[superSectionNames[0]] - dataManagers[ik.name] = CachingDataManager(ik, sectionManager, - CachingLevel.restrict(cachingLevelForMetaName.get(ik.name, defaultDataCachingLevel), CachingLevel.Forward if sectionManager.forwardOpenClose or sectionManager.preOpened else CachingLevel.Ignore)) - return ActiveBackend(metaInfoEnv, sectionManagers, dataManagers, superBackend, propagateStartFinishParsing, default_units, metainfo_units) + dataManagers[ik.name] = CachingDataManager( + ik, sectionManager, + CachingLevel.restrict( + cachingLevelForMetaName.get(ik.name, defaultDataCachingLevel), + CachingLevel.Forward if sectionManager.forwardOpenClose or sectionManager.preOpened else CachingLevel.Ignore)) + + return ActiveBackend( + metaInfoEnv, sectionManagers, dataManagers, superBackend, + propagateStartFinishParsing, default_units, metainfo_units) def appendOnClose(self, sectionName, onClose): self.sectionManagers.onClose.append(onClose) diff --git a/common/python/nomadcore/local_meta_info.py b/common/python/nomadcore/local_meta_info.py index 983ddf4c188eda453c2765a1d9f8c494157c10bc..97f7190e1003c02e4de55516c0c34985c3a2a774 100644 --- a/common/python/nomadcore/local_meta_info.py +++ b/common/python/nomadcore/local_meta_info.py @@ -10,7 +10,7 @@ import json import os, re from nomadcore.json_support import jsonCompactS, jsonCompactD, jsonIndentD from io import open -import nomad_meta_info +import metainfo """objects to handle a local InfoKinds with unique name (think self written json)""" class InfoKindEl(object): @@ -186,7 +186,7 @@ class RelativeDependencySolver(object): def __call__(self, infoKindEnv, source, dep): if "metainfoPath" in dep: - basePath = nomad_meta_info.__file__ + basePath = metainfo.__file__ path = dep["metainfoPath"] elif "relativePath" in dep: basePath = source.get('path') @@ -677,11 +677,10 @@ def load_metainfo(filename, dependencyLoader=None, extraArgsHandling=InfoKindEl. Tuple containing the metainfo environment, and any possible warnings that were encountered in the loading. """ - import nomad_meta_info if os.path.isfile(filename): path = filename else: - path = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(nomad_meta_info.__file__)), "{}".format(filename))) + path = os.path.join(os.path.dirname(metainfo.__file__), filename) return loadJsonFile(path, dependencyLoader, extraArgsHandling, uri) def loadJsonStream(fileStream, name = None, dependencyLoader = None, extraArgsHandling = InfoKindEl.ADD_EXTRA_ARGS, filePath = None, uri = None): diff --git a/common/python/nomadcore/parse_streamed_dicts.py b/common/python/nomadcore/parse_streamed_dicts.py deleted file mode 100644 index 032d86fa5e477b872f922f63b3f22261bec74689..0000000000000000000000000000000000000000 --- a/common/python/nomadcore/parse_streamed_dicts.py +++ /dev/null @@ -1,264 +0,0 @@ -from future import standard_library -standard_library.install_aliases() -from builtins import object -import json -import io - -# the 3 possible states while reading input character per character -IN_NORMAL_TEXT = 0 -IN_STRING = 1 -IN_STRING_ESCAPE = 2 - -def readDict(inF, line0): - "reads a dictionary from an indented json" - status = IN_NORMAL_TEXT - # iterator for character position in string - i = 0 - # counts number of open { - n_paren = 0 - # used for StringIO if dictionary is distributed over several blocks - fullDictStr = None - line = line0 - iline = 0 - while True: - # iterate through characters of current block - while i < len(line): - c = line[i] - i += 1 - if status == IN_STRING_ESCAPE: - status = IN_STRING - continue - elif status == IN_NORMAL_TEXT: - # skip characters that do not change state - while (c != '{' and c!= '}' and c != '"' and i < len(line)): - c = line[i] - i += 1 - if c == '{': - # skip the characters before the first opening { - if n_paren == 0: - line = line[i - 1:] - i = 1 - n_paren += 1 - elif c == '}': - n_paren -= 1 - # found closing }, return dictionary - if n_paren == 0: - # write block upto current character to string or StringIO - # only use StringIO if dictionary is distributed over several blocks - if fullDictStr == None: - outS = line[:i] - else: - fullDictStr.write(line[:i]) - outS = fullDictStr.getvalue() - # reset StringIO - fullDictStr.close() - fullDictStr = None - # reset block to remaining characters - line = line[i:] - i = 0 - # dictionary output - try: - outD = json.loads(outS) - except: - raise Exception("Could not convert string " + repr(outS) + " with json.loads to dictionary.") - return outD - elif c == '"': - status = IN_STRING - elif status == IN_STRING: - # skip characters that do not change state - while (c != '"' and c != '\\' and i < len(line)): - c = line[i] - i += 1 - if c == '"': - status = IN_NORMAL_TEXT - elif c == '\\': - status = IN_STRING_ESCAPE - # if we arrive here, then the dictionary is distributed over several blocks - # write block to StringIO but only if an opening { already found - if n_paren > 0: - if fullDictStr == None: - fullDictStr = io.StringIO() - fullDictStr.write(line) - else: - # failed to find dictionary in first line, stopping - return None - # read new block from input - line = inF.readline() - if not line: - # early EOF - return None - iline += 1 - i = 0 - -def readArray(inF, line0): - "reads an array from an indented json" - status = IN_NORMAL_TEXT - # iterator for character position in string - i = 0 - # counts number of open [ - n_paren = 0 - # used for StringIO if dictionary is distributed over several blocks - fullDictStr = None - line = line0 - iline = 0 - while True: - # iterate through characters of current block - while i < len(line): - c = line[i] - i += 1 - if status == IN_STRING_ESCAPE: - status = IN_STRING - continue - elif status == IN_NORMAL_TEXT: - # skip characters that do not change state - while (c != '[' and c!= ']' and c != '"' and i < len(line)): - c = line[i] - i += 1 - if c == '[': - # skip the characters before the first opening [ - if n_paren == 0: - line = line[i - 1:] - i = 1 - n_paren += 1 - elif c == ']': - n_paren -= 1 - # found closing ], return dictionary - if n_paren == 0: - # write block upto current character to string or StringIO - # only use StringIO if dictionary is distributed over several blocks - if fullDictStr == None: - outS = line[:i] - else: - fullDictStr.write(line[:i]) - outS = fullDictStr.getvalue() - # reset StringIO - fullDictStr.close() - fullDictStr = None - # reset block to remaining characters - line = line[i:] - i = 0 - # dictionary output - try: - outD = json.loads(outS) - except: - raise Exception("Could not convert string " + repr(outS) + " with json.loads to dictionary.") - return outD - elif c == '"': - status = IN_STRING - elif status == IN_STRING: - # skip characters that do not change state - while (c != '"' and c != '\\' and i < len(line)): - c = line[i] - i += 1 - if c == '"': - status = IN_NORMAL_TEXT - elif c == '\\': - status = IN_STRING_ESCAPE - # if we arrive here, then the dictionary is distributed over several blocks - # write block to StringIO but only if an opening [ already found - if n_paren > 0: - if fullDictStr == None: - fullDictStr = io.StringIO() - fullDictStr.write(line) - else: - # failed to find dictionary in first line, stopping - return None - # read new block from input - line = inF.readline() - if not line: - # early EOF - return None - iline += 1 - i = 0 - -class ParseStreamedDicts(object): - """allows the extraction of JSON dictionaries out of file objects - which are then converted to python dictionaries - therefore, strings must be passed as StringIO objects""" - def __init__(self, inF, blockSize = 8192): - self.inF = inF - # read at least 2 unicode characters (2 x 4 bytes) - if blockSize < 8: - self.blockSize = 8 - else: - self.blockSize = blockSize - # read input in blocks - self.blockRead = "" - # set initial state - self.status = IN_NORMAL_TEXT - # iterator for character position in string - self.i = 0 - # counts number of open { - self.n_paren = 0 - # used for StringIO if dictionary is distributed over several blocks - self.fullDictStr = None - - def readNextDict(self): - """reads input until a complete set of {} is found - the so obtained string is then converted to a python dictionary with json.loads""" - while True: - # iterate through characters of current block - while self.i < len(self.blockRead): - c = self.blockRead[self.i] - self.i += 1 - if self.status == IN_STRING_ESCAPE: - self.status = IN_STRING - continue - elif self.status == IN_NORMAL_TEXT: - # skip characters that do not change state - while (c != '{' and c!= '}' and c != '"' and self.i < len(self.blockRead)): - c = self.blockRead[self.i] - self.i += 1 - if c == '{': - # skip the characters before the first opening { - if self.n_paren == 0: - self.blockRead = self.blockRead[self.i - 1:] - self.i = 1 - self.n_paren += 1 - elif c == '}': - self.n_paren -= 1 - # found closing }, return dictionary - if self.n_paren == 0: - # write block upto current character to string or StringIO - # only use StringIO if dictionary is distributed over several blocks - if self.fullDictStr == None: - outS = self.blockRead[:self.i] - else: - self.fullDictStr.write(self.blockRead[:self.i]) - outS = self.fullDictStr.getvalue() - # reset StringIO - self.fullDictStr.close() - self.fullDictStr = None - # reset block to remaining characters - self.blockRead = self.blockRead[self.i:] - self.i = 0 - # dictionary output - try: - outD = json.loads(outS) - except: - raise Exception("Could not convert string " + repr(outS) + " with json.loads to dictionary.") - return outD - elif c == '"': - self.status = IN_STRING - elif self.status == IN_STRING: - # skip characters that do not change state - while (c != '"' and c != '\\' and self.i < len(self.blockRead)): - c = self.blockRead[self.i] - self.i += 1 - if c == '"': - self.status = IN_NORMAL_TEXT - elif c == '\\': - self.status = IN_STRING_ESCAPE - # if we arrive here, then the dictionary is distributed over several blocks - # write block to StringIO but only if an opening { already found - if self.n_paren > 0: - if self.fullDictStr == None: - self.fullDictStr = io.StringIO() - self.fullDictStr.write(self.blockRead) - # read new block from input - self.blockRead = self.inF.read(self.blockSize) - self.i = 0 - # reached end of input - if not self.blockRead: - return None - diff --git a/common/python/nomadcore/parser_backend.py b/common/python/nomadcore/parser_backend.py index f9e1b38529557f7f08b0211c396bfd6d145971fa..f82135eb8ade0a9897af535a5506af76d82943d1 100644 --- a/common/python/nomadcore/parser_backend.py +++ b/common/python/nomadcore/parser_backend.py @@ -48,246 +48,3 @@ def valueForStrValue(strValue, dtypeStr): raise Exception("unexpected dtypeStr %s" % (dtypeStr)) except Exception as e: raise Exception("Error when converting %r to dtypeStr %r" % (strValue, dtypeStr), e) - -class JsonParseEventsWriterBackend(object): - """Simple backend that writes out the parse events in json format""" - # json content is written to fileOut - def __init__(self, metaInfoEnv, fileOut = sys.stdout, writeMatchTelemetry=False): - self.__metaInfoEnv = metaInfoEnv - self.fileOut = fileOut - self.__gIndex = -1 - self.__openSections = set() - self.__writeComma = False - self.__lastIndex = {} - self.writeMatchTelemetry = writeMatchTelemetry - self.stats = {} - - def addStat(self, name): - self.stats[name] = self.stats.get(name, 0) + 1 - - def sendStats(self): - stats = {"parser":{"name":"fhi-aims", "version": "0.3"}, - "data":self.stats} - url = 'https://nomad-dev.rz-berlin.mpg.de/parsers/addStat' - #url = 'http://127.0.0.1:8081/parsers/addStat' - data = json.dumps(stats, sort_keys=True) - req = urllib.request.Request(url, data) - response = urllib.request.urlopen(req) - the_page = response.read() - sys.stderr.write("stat sending did answer:" + the_page) - - @staticmethod - def __numpyEncoder(self, o): - """new default function for json class so that numpy arrays can be encoded""" - # check if object is a numpy array - if isinstance(o, np.ndarray): - # ensure that we have an array with row-major memory order (C like) - if not o.flags['C_CONTIGUOUS']: - o = np.ascontiguousarray(o) - return o.tolist() - # see default method in python/json/encoder.py - elif isinstance(o, set): - return list(sorted(o)) - else: - raise TypeError(repr(o) + " is not JSON serializable") - - def __jsonOutput(self, dic): - """method to define format of json output""" - if self.__writeComma: - self.fileOut.write(", ") - else: - self.__writeComma = True - json.dump(dic, self.fileOut, indent = 2, separators = (',', ':'), sort_keys=True) # default = self.__numpyEncoder) - - def startedParsingSession(self, mainFileUri, parserInfo, parserStatus = None, parserErrors = None): - """should be called when the parsing starts, parserInfo should be a valid json dictionary""" - self.fileOut.write("{\n \"type\": \"nomad_parse_events_1_0\"") - self.sessionMainFileUri = mainFileUri - self.sessionParserInfo = parserInfo - self.sessionParserStatus = parserStatus - self.sessionParserErrors = parserErrors - if mainFileUri is not None: - self.fileOut.write(",\n \"mainFileUri\": " + json.dumps(mainFileUri, sort_keys=True)) - if parserInfo is not None: - self.fileOut.write(",\n \"parserInfo\": " + json.dumps(parserInfo, indent = 2, separators = (',', ':'), sort_keys=True)) - if parserStatus is not None: - self.fileOut.write(",\n \"parserStatus\": " + json.dumps(parserStatus, indent = 2, separators = (',', ':'), sort_keys=True)) - if parserErrors is not None: - self.fileOut.write(",\n \"parserStatus\": " + json.dumps(parserErrors, indent = 2, separators = (',', ':'), sort_keys=True)) - self.fileOut.write(""", - "events": [""") - - def finishedParsingSession(self, parserStatus, parserErrors, mainFileUri = None, parserInfo = None, - parsingStats = None): - """should be called when the parsing finishes""" - self.fileOut.write("]") - if mainFileUri is not None and self.sessionMainFileUri is None: - self.fileOut.write(",\n \"mainFileUri\": " + json.dumps(mainFileUri, sort_keys=True)) - if parserInfo is not None and self.sessionParserInfo is None: - self.fileOut.write(",\n \"parserInfo\": " + json.dumps(parserInfo, indent = 2, separators = (',', ':'), sort_keys=True)) - if parserStatus is not None and self.sessionParserStatus is None: - self.fileOut.write(",\n \"parserStatus\": " + json.dumps(parserStatus, indent = 2, separators = (',', ':'), sort_keys=True)) - if parserErrors is not None and self.sessionParserErrors is None: - self.fileOut.write(",\n \"parserErrors\": " + json.dumps(parserErrors, indent = 2, separators = (',', ':'), sort_keys=True)) - if parsingStats is not None: - self.fileOut.write(",\n \"parsingStats\": " + json.dumps(parsingStats, indent = 4, separators = (',', ':'), sort_keys=True)) - self.fileOut.write(""" -}""") - self.fileOut.flush() - - def openContext(self, contextUri): - self.__jsonOutput({"event":"openContext", "nomadUri":contextUri}) - - def closeContext(self, contextUri): - self.__jsonOutput({"event":"closeContext", "nomadUri":contextUri}) - - def metaInfoEnv(self): - """the metaInfoEnv this parser was optimized for""" - return self.__metaInfoEnv - - def openSections(self): - """returns the sections that are still open - sections are identified by metaName and their gIndex""" - return self.__openSections - - def sectionInfo(self, metaName, gIndex): - """returns information on a section (for debugging purposes)""" - if (metaName,gIndex) in self.__openSections: - return "section {} gIndex: {} is open".format(metaName, gIndex) - else: - return "section {} gIndex: {} is closed".format(metaName, gIndex) - - def openSection(self, metaName): - """opens a new section and returns its new unique gIndex""" - newIndex = self.__lastIndex.get(metaName, -1) + 1 - self.openSectionWithGIndex(metaName, newIndex) - return newIndex - - def openNonOverlappingSection(self, metaName): - """opens a new non overlapping section""" - if any(x[0] == metaName for x in self.__openSections): - raise Exception("Section %s is not supposed to overlap" % metaName) - return self.openSection(metaName) - - def openSectionWithGIndex(self, metaName, gIndex): - """opens a new section where gIndex is generated externally - gIndex should be unique (no reopening of a closed section)""" - self.__lastIndex[metaName] = gIndex - self.__openSections.add((metaName, gIndex)) - self.__jsonOutput({"event":"openSection", "metaName":metaName, "gIndex":gIndex}) - - def setSectionInfo(self, metaName, gIndex, references): - """sets info values of an open section - references should be a dictionary with the gIndexes of the root sections this section refers to""" - self.__jsonOutput({"event":"setSectionInfo", "metaName":metaName, "gIndex":gIndex, "references":references}) - - def closeSection(self, metaName, gIndex): - """closes a section - after this no other value can be added to the section - metaName is the name of the meta info, gIndex the index of the section""" - if (metaName, gIndex) in self.__openSections: - self.__openSections.remove((metaName, gIndex)) - self.__jsonOutput({"event":"closeSection", "metaName":metaName, "gIndex":gIndex}) - # raise exeption if section is not open - else: - raise Exception("There is no open section with metaName %s and gIndex %d" % (metaName, gIndex)) - - def closeNonOverlappingSection(self, metaName): - """closes a non overlapping section""" - openGIndexes = [x for x in self.__openSections if x[0] == metaName] - if len(openGIndexes) != 1: - if not openGIndexes: - raise Exception("Call to closeNonOverlapping(%s) with no open section" % metaName) - else: - raise Exception("Section %s was not supposed to overlap, found %s open when closing" % (metaName, openGIndexes)) - self.closeSection(metaName, openGIndexes[0][1]) - - def addValue(self, metaName, value, gIndex = -1): - """adds a json value corresponding to metaName - the value is added to the section the meta info metaName is in - a gIndex of -1 means the latest section""" - self.__jsonOutput({"event":"addValue", "metaName":metaName, "gIndex":gIndex, "value":value}) - - def addRealValue(self, metaName, value, gIndex = -1): - """adds a floating point value corresponding to metaName - The value is added to the section the meta info metaName is in - A gIndex of -1 means the latest section""" - self.__jsonOutput({"event":"addRealValue", "metaName":metaName, "gIndex":gIndex, "value":value}) - - def addArray(self, metaName, shape, gIndex = -1): - """adds a new array value of the given size corresponding to metaName - the value is added to the section the meta info metaName is in - a gIndex of -1 means the latest section - the array is unitialized""" - self.__jsonOutput({"event":"addArray", "metaName":metaName, "gIndex":gIndex, "shape":shape}) - - def setArrayValues(self, metaName, values, offset = None, gIndex = -1): - """adds values to the last array added, array must be a numpy array""" - res = { - "event":"setArrayValues", - "metaName":metaName, - "gIndex":gIndex, - "valuesShape":values.shape, - "flatValues": values.flatten().tolist() - } - if offset: - res["offset"] = offset - self.__jsonOutput(res) - - def addArrayValues(self, metaName, values, gIndex = -1): - """adds an array value with the given array values. - values must be a numpy array""" - res = { - "event":"addArrayValues", - "metaName":metaName, - "gIndex":gIndex, - "valuesShape":values.shape, - "flatValues": values.flatten().tolist() - } - self.__jsonOutput(res) - - def addMatchTelemetry(self, match_telemetry, gIndex = -1): - if not self.writeMatchTelemetry: - return - res = { - 'event': "matchTelemetry", - 'gIndex': gIndex, - 'fInLine': match_telemetry['fInLine'], - 'fInLineNr': match_telemetry['fInLineNr'], - 'matcherName': match_telemetry['matcherName'], - 'matchFlags': match_telemetry['matchFlags'], - 'matchSpansFlat': match_telemetry['matchSpansFlat'], - 'matcherGroup': match_telemetry['matcherGroup'], - } - self.__jsonOutput(res) - - def convertScalarStringValue(self, metaName, strValue): - """converts a scalar string value of the given meta info to a python value""" - metaInfo = self.metaInfoEnv().infoKindEl(metaName) - dtypeStr = metaInfo.dtypeStr - return valueForStrValue(strValue, dtypeStr) - - def arrayForMetaInfo(self, metaName, shape): - """Returns an array with the correct type for the given meta info, and the given shape""" - metaInfo = self.metaInfoEnv().infoKindEl(metaName) - dtypeStr = metaInfo.dtypeStr - return numpy.zeros(shape, dtype = numpyDtypeForDtypeStr(dtypeStr)) - - def pwarn(self, msg): - """Writes a parser warning message""" - self.addValue("parsing_message_warning_run", msg) - -# testing -if __name__ == '__main__': - parser = JsonParseEventsWriterBackend(None) - - array = np.array(list(range(3**3)), dtype = 'int64') - array = np.reshape(array, (3, 3, 3)) - - gIndex = parser.openSection('test') - print("Open sections: " + str(parser.openSections())) - parser.setSectionInfo('single_run', gIndex, ('main_section', 5)) - parser.addRealValue('energy', 5.0) - parser.addArrayValues('array', array) - parser.closeSection('test', gIndex) - print("Open sections: " + str(parser.openSections())) diff --git a/common/python/nomadcore/simple_parser.py b/common/python/nomadcore/simple_parser.py index 18ac69db7b676fefb63165c2c6c4f01698d3850a..2e0ae0e44b62c1876040b3832ab4bd0dcc6ae326 100644 --- a/common/python/nomadcore/simple_parser.py +++ b/common/python/nomadcore/simple_parser.py @@ -8,9 +8,7 @@ import types import re import sys import os -import json import logging -import traceback import inspect import io from urllib.parse import urlparse, urlunparse @@ -19,8 +17,6 @@ from builtins import range from builtins import object from nomadcore.json_support import jsonIndentF -from nomadcore.parser_backend import JsonParseEventsWriterBackend -from nomadcore.parse_streamed_dicts import ParseStreamedDicts from nomadcore.unit_conversion import unit_conversion from nomadcore.caching_backend import CachingLevel, ActiveBackend from nomadcore.annotator import Annotator @@ -1428,7 +1424,7 @@ def mainFunction(mainFileDescription, metaInfoEnv, parserInfo, parseFile=None, - outF=sys.stdout, + outF=None, cachingLevelForMetaName=None, defaultDataCachingLevel=CachingLevel.ForwardAndCache, defaultSectionCachingLevel=CachingLevel.Forward, @@ -1458,24 +1454,17 @@ def mainFunction(mainFileDescription, usage = """{exeName} [--annotate] [--matchTelemetry] [--meta-info] [--help] [--specialize] [--stream] [--uri uri] [--verbose] [path/toFile] --annotate writes annotated debug info of parsing process to file path/toFile.annotate - --matchTelemetry write information about matches to output JSON stream - --meta-info outputs the meta info supported by this parser --help prints this message - --specialize expects inclusion and exclusion of meta info to parse via a json dictionary on stdin - --stream expects the files to parse via dictionary on stdin --verbose writes metainfo to stderr and detailed debug info of parsing process to file detailed.log If a path to a file is given this is parsed """.format(exeName=os.path.basename(sys.argv[0] if len(sys.argv) > 0 else "simple_parser")) if parseFile is None: parseFile = defaultParseFile(parserInfo) - metaInfo = False - specialize = False - stream = False + uri = None verbose = False fileToParse = None - writeMatchTelemetry = False if cachingLevelForMetaName is None: cachingLevelForMetaName = {} @@ -1490,22 +1479,14 @@ def mainFunction(mainFileDescription, while ii < len(sys.argv): arg = sys.argv[ii] ii += 1 - if arg == "--meta-info": - metaInfo = True - elif arg == "--help": + if arg == "--help": sys.stderr.write(usage) sys.exit(0) - elif arg == "--specialize": - specialize = True - elif arg == "--stream": - stream = True elif arg == "--verbose": verbose = True elif arg == "--annotate": global annotate annotate = True - elif arg == "--matchTelemetry": - writeMatchTelemetry = True elif arg == "--uri": if ii >= len(sys.argv): raise Exception("missing uri after --uri") @@ -1518,6 +1499,9 @@ def mainFunction(mainFileDescription, sys.stderr.write(usage) sys.exit(1) + if metaInfoEnv is None: + metaInfoEnv = superBackend.metaInfoEnv() + # If a main file has been specified, override the one given as a command # line argument if mainFile is not None: @@ -1526,27 +1510,12 @@ def mainFunction(mainFileDescription, if verbose: import nomadcore.ActivateLogging nomadcore.ActivateLogging.debugToFile() + if uri is None and fileToParse: uri = "file://" + fileToParse - outF.write("[") - writeComma = False - if metaInfo: - if writeComma: - outF.write(", ") - else: - writeComma = True - metaInfoEnv.embedDeps() - metaInfoEnv.serialize(outF.write, subGids=True, selfGid=True) - outF.flush() - dictReader = ParseStreamedDicts(sys.stdin) - # Initialize the backend where the final results are printed after they are - # possibly filtered by the caching ActiveBackend. - if superBackend is None: - jsonBackend = JsonParseEventsWriterBackend( - metaInfoEnv, outF, writeMatchTelemetry=writeMatchTelemetry) - else: - jsonBackend = superBackend + assert superBackend is not None + assert fileToParse is not None if superContext: strValueTransform = dict(strValueTransform) @@ -1554,9 +1523,10 @@ def mainFunction(mainFileDescription, oldCallbacks = strValueTransform.get(attr, None) if attr not in strValueTransform: strValueTransform[attr] = callback - # initialize the parser builder - parserBuilder = compileParser(mainFileDescription, metaInfoEnv, metaInfoToKeep, default_units, metainfo_units, - strValueTransform) + + parserBuilder = compileParser( + mainFileDescription, metaInfoEnv, metaInfoToKeep, default_units, metainfo_units, + strValueTransform) if superContext: onClose = dict(onClose) @@ -1573,6 +1543,7 @@ def mainFunction(mainFileDescription, oldCallbacks.append(callback) else: onOpen[attr] = [callback] + backend = ActiveBackend.activeBackend( metaInfoEnv=metaInfoEnv, cachingLevelForMetaName=cachingLevelForMetaName, @@ -1580,58 +1551,11 @@ def mainFunction(mainFileDescription, defaultSectionCachingLevel=defaultSectionCachingLevel, onClose=onClose, onOpen=onOpen, - superBackend=jsonBackend, + superBackend=superBackend, default_units=default_units, metainfo_units=metainfo_units) - if specialize: - specializationInfo = dictReader.readNextDict() - if specializationInfo is None or specializationInfo.get("type", "") != "nomad_parser_specialization_1_0": - raise Exception( - "expected a nomad_parser_specialization_1_0 as first dictionary, got " + json.dumps(specializationInfo)) - metaInfoToKeep = specializationInfo.get("metaInfoToKeep") - if fileToParse: - if writeComma: - outF.write(", ") - else: - writeComma = True - parseFile(parserBuilder, uri, fileToParse, backend, superContext) - if stream: - while True: - if writeComma: - outF.write(", ") - else: - writeComma = True - toRead = dictReader.readNextDict() - if toRead is None: - break - outFileName = toRead.get('outFile', None) - outFile = None - if outFileName is not None: - # create file-specific json backend - outFile = open(outFileName, 'w') - outFile.write("[") - outFilejsonBackend = JsonParseEventsWriterBackend( - metaInfoEnv, outFile, writeMatchTelemetry=writeMatchTelemetry) - # override superBackend for this parseFile - backend.superBackend = outFilejsonBackend - logger.warning("# parsing: %s", toRead['mainFile']) - try: - parseFile( - parserBuilder, toRead['mainFileUri'], toRead['mainFile'], backend, superContext) - except Exception: - logger.error(traceback.format_exc()) - if outFile is not None: - # we had the comma already from last round - writeComma = False - # finish stream json - outFile.write("]\n") - outFile.close() - # reset everything to defaults - outFile = None - outFileName = None - backend.superBackend = jsonBackend - outF.write("]\n") + parseFile(parserBuilder, uri, fileToParse, backend, superContext) class AncillaryParser(object):