diff --git a/common/python/nomadcore/parser_backend.py b/common/python/nomadcore/parser_backend.py index 9616be14727716918de8cd5f343b053bec1e02c1..f789444595e7fbcbfeead0f92f385e255bc03a0d 100644 --- a/common/python/nomadcore/parser_backend.py +++ b/common/python/nomadcore/parser_backend.py @@ -69,7 +69,7 @@ class JsonParseEventsWriterBackend(object): "data":self.stats} url = 'https://nomad-dev.rz-berlin.mpg.de/parsers/addStat' #url = 'http://127.0.0.1:8081/parsers/addStat' - data = json.dumps(stats) + data = json.dumps(stats, sort_keys=True) req = urllib.request.Request(url, data) response = urllib.request.urlopen(req) the_page = response.read() @@ -94,7 +94,7 @@ class JsonParseEventsWriterBackend(object): self.fileOut.write(", ") else: self.__writeComma = True - json.dump(dic, self.fileOut, indent = 2, separators = (',', ':')) # default = self.__numpyEncoder) + json.dump(dic, self.fileOut, indent = 2, separators = (',', ':'), sort_keys=True) # default = self.__numpyEncoder) def startedParsingSession(self, mainFileUri, parserInfo, parserStatus = None, parserErrors = None): """should be called when the parsing starts, parserInfo should be a valid json dictionary""" @@ -104,13 +104,13 @@ class JsonParseEventsWriterBackend(object): self.sessionParserStatus = parserStatus self.sessionParserErrors = parserErrors if mainFileUri is not None: - self.fileOut.write(",\n \"mainFileUri\": " + json.dumps(mainFileUri)) + self.fileOut.write(",\n \"mainFileUri\": " + json.dumps(mainFileUri, sort_keys=True)) if parserInfo is not None: - self.fileOut.write(",\n \"parserInfo\": " + json.dumps(parserInfo, indent = 2, separators = (',', ':'))) + self.fileOut.write(",\n \"parserInfo\": " + json.dumps(parserInfo, indent = 2, separators = (',', ':'), sort_keys=True)) if parserStatus is not None: - self.fileOut.write(",\n \"parserStatus\": " + json.dumps(parserStatus, indent = 2, separators = (',', ':'))) + self.fileOut.write(",\n \"parserStatus\": " + json.dumps(parserStatus, indent = 2, separators = (',', ':'), sort_keys=True)) if parserErrors is not None: - self.fileOut.write(",\n \"parserStatus\": " + json.dumps(parserErrors, indent = 2, separators = (',', ':'))) + self.fileOut.write(",\n \"parserStatus\": " + json.dumps(parserErrors, indent = 2, separators = (',', ':'), sort_keys=True)) self.fileOut.write(""", "events": [""") @@ -118,13 +118,13 @@ class JsonParseEventsWriterBackend(object): """should be called when the parsing finishes""" self.fileOut.write("]") if mainFileUri is not None and self.sessionMainFileUri is None: - self.fileOut.write(",\n \"mainFileUri\": " + json.dumps(mainFileUri)) + self.fileOut.write(",\n \"mainFileUri\": " + json.dumps(mainFileUri, sort_keys=True)) if parserInfo is not None and self.sessionParserInfo is None: - self.fileOut.write(",\n \"parserInfo\": " + json.dumps(parserInfo, indent = 2, separators = (',', ':'))) + self.fileOut.write(",\n \"parserInfo\": " + json.dumps(parserInfo, indent = 2, separators = (',', ':'), sort_keys=True)) if parserStatus is not None and self.sessionParserStatus is None: - self.fileOut.write(",\n \"parserStatus\": " + json.dumps(parserStatus, indent = 2, separators = (',', ':'))) + self.fileOut.write(",\n \"parserStatus\": " + json.dumps(parserStatus, indent = 2, separators = (',', ':'), sort_keys=True)) if parserErrors is not None and self.sessionParserErrors is None: - self.fileOut.write(",\n \"parserErrors\": " + json.dumps(parserErrors, indent = 2, separators = (',', ':'))) + self.fileOut.write(",\n \"parserErrors\": " + json.dumps(parserErrors, indent = 2, separators = (',', ':'), sort_keys=True)) self.fileOut.write(""" }""") self.fileOut.flush() diff --git a/common/python/nomadcore/simple_parser.py b/common/python/nomadcore/simple_parser.py index 221109d797e5794cf5770ec13f4ed7390bbebee6..ffe0e680d0486ca95021623f86f940c4478c0a2b 100644 --- a/common/python/nomadcore/simple_parser.py +++ b/common/python/nomadcore/simple_parser.py @@ -1,6 +1,5 @@ from __future__ import division from future import standard_library -from future.utils import raise_from standard_library.install_aliases() from builtins import str from builtins import range @@ -581,14 +580,14 @@ class CompiledMatcher(object): raise Exception("Expected to match %s on %r" % (self.startRe.pattern, line)) self.annotate(m,line,parser,self.matcher,0) result_dict = {} - for k,v in m.groupdict().items(): + for k,v in sorted(m.groupdict().items()): if v is None: # a group may be optional (subexpression of ? or | in regex) continue k_converted, v_converted = self.addStrValue(parser.backend, k, v) result_dict[k_converted] = v_converted if self.matcher.fixedStartValues: - for k,v in self.matcher.fixedStartValues.items(): + for k,v in sorted(self.matcher.fixedStartValues.items()): k_converted, v_converted = self.addStrValue(parser.backend, k, v) result_dict[k_converted] = v_converted if self.matcher.forwardMatch: @@ -601,14 +600,14 @@ class CompiledMatcher(object): m = self.endRe.match(line) #, re.MULTILINE) self.annotate(m,line,parser,self.matcher,1) result_dict = {} - for k,v in m.groupdict().items(): + for k,v in sorted(m.groupdict().items()): if v is None: # a group may be optional (subexpression of ? or | in regex) continue k_converted, v_converted = self.addStrValue(parser.backend, k, v) result_dict[k_converted] = v_converted if self.matcher.fixedEndValues: - for k,v in self.matcher.fixedEndValues.items(): + for k,v in sorted(self.matcher.fixedEndValues.items()): k_converted, v_converted = self.addStrValue(parser.backend, k, v) result_dict[k_converted] = v_converted return result_dict @@ -1061,37 +1060,33 @@ class SimpleParser(object): def parseStep(self): if not self.context: return False - try: - currentContext = self.context[len(self.context) - 1] - if logger.isEnabledFor(logging.DEBUG): - logger.debug("lineNr: %d context: %s", self.fIn.lineNr,self.contextDesc()) - if currentContext.startEnd == ParsingContext.Start: - nextI = currentContext.compiledMatcher.findNextMatch(self) - else: - nextI = currentContext.compiledMatcher.findNextMatchEnd(self) - logger.debug("lineNr: %d nextI: %d", self.fIn.lineNr, nextI) - - if nextI < 0: - cNames = self.contextDesc() - while self.context: - self.contextPop() - if nextI != -1 and nextI != -3: - raise Exception("finished with error with parsing context %s" % (cNames)) + currentContext = self.context[len(self.context) - 1] + if logger.isEnabledFor(logging.DEBUG): + logger.debug("lineNr: %d context: %s", self.fIn.lineNr,self.contextDesc()) + if currentContext.startEnd == ParsingContext.Start: + nextI = currentContext.compiledMatcher.findNextMatch(self) + else: + nextI = currentContext.compiledMatcher.findNextMatchEnd(self) + logger.debug("lineNr: %d nextI: %d", self.fIn.lineNr, nextI) + + if nextI < 0: + cNames = self.contextDesc() + while self.context: + self.contextPop() + if nextI != -1 and nextI != -3: + raise Exception("finished with error with parsing context %s" % (cNames)) + else: + index = nextI // 2 + startEnd = nextI % 2 + matcherNew = self.parserBuilder.flatIndex[index] + self.goToMatcher(matcherNew, startEnd) + logger.debug("new context: %s\n", self.contextDesc()) + currentCtx = self.context[len(self.context) - 1] + if startEnd == ParsingContext.End: + self.lastMatch = currentCtx.compiledMatcher.handleEndMatch(self) + self.contextClose(currentCtx) else: - index = nextI // 2 - startEnd = nextI % 2 - matcherNew = self.parserBuilder.flatIndex[index] - self.goToMatcher(matcherNew, startEnd) - logger.debug("new context: %s\n", self.contextDesc()) - currentCtx = self.context[len(self.context) - 1] - if startEnd == ParsingContext.End: - self.lastMatch = currentCtx.compiledMatcher.handleEndMatch(self) - self.contextClose(currentCtx) - else: - self.lastMatch = currentCtx.compiledMatcher.handleStartMatch(self) - except Exception as e: - origin = traceback.format_exc() - raise_from(Exception("Failure, context %s, line %d coming from %s"%(self.contextDesc(), self.fIn.lineNr, origin)), e) + self.lastMatch = currentCtx.compiledMatcher.handleStartMatch(self) return len(self.context) > 0 def parse(self): @@ -1300,7 +1295,29 @@ def mainFunction(mainFileDescription, toRead = dictReader.readNextDict() if toRead is None: break - parseFile(parseFile, toRead['mainFileUri'], toRead['mainFile'], superContext) + outFileName = toRead.get('outFile', None) + outFile = None + if outFileName is not None: + # create file-specific json backend + outFile = open(outFileName, 'w') + outFile.write("[") + outFilejsonBackend = JsonParseEventsWriterBackend(metaInfoEnv, outFile) + # override superBackend for this parseFile + backend.superBackend = outFilejsonBackend + try: + parseFile(parserBuilder, toRead['mainFileUri'], toRead['mainFile'], backend, superContext) + except Exception as e: + logger.error(traceback.format_exc()) + if outFile is not None: + # we had the comma already from last round + writeComma = False + # finish stream json + outFile.write("]\n") + outFile.close() + # reset everything to defaults + outFile = None + outFileName = None + backend.superBackend = jsonBackend outF.write("]\n") class AncillaryParser(object):