Skip to content
Snippets Groups Projects
Commit c161aa0f authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Linting, reading capabilities, performance improvements.

parent aee4be74
No related branches found
No related tags found
No related merge requests found
...@@ -67,7 +67,7 @@ class LocalBackend(object): ...@@ -67,7 +67,7 @@ class LocalBackend(object):
# metaInfoEnv.infoKinds.items() gives a dictionary with keys meta_info # metaInfoEnv.infoKinds.items() gives a dictionary with keys meta_info
# and the associated value a nomadcore.local_meta_info.InfoKindEl object. # and the associated value a nomadcore.local_meta_info.InfoKindEl object.
for ikNames, ik in metaInfoEnv.infoKinds.items(): for _, ik in metaInfoEnv.infoKinds.items():
if ik.kindStr == "type_section": if ik.kindStr == "type_section":
parentS = list(metaInfoEnv.firstAncestorsByType(ik.name).get("type_section", [[]])[0]) parentS = list(metaInfoEnv.firstAncestorsByType(ik.name).get("type_section", [[]])[0])
parentS.sort() parentS.sort()
...@@ -75,7 +75,7 @@ class LocalBackend(object): ...@@ -75,7 +75,7 @@ class LocalBackend(object):
metaInfo=ik, metaInfo=ik,
parentSectionNames=parentS, debug=self.debug) parentSectionNames=parentS, debug=self.debug)
# We go through each key, value in the dictionary of meta infos # We go through each key, value in the dictionary of meta infos
for ikNames, ik in metaInfoEnv.infoKinds.items(): for _, ik in metaInfoEnv.infoKinds.items():
if ik.kindStr == "type_document_content" or ik.kindStr == "type_dimension": if ik.kindStr == "type_document_content" or ik.kindStr == "type_dimension":
# Now we find out what the supersections are of this meta_info # Now we find out what the supersections are of this meta_info
superSectionNames = metaInfoEnv.firstAncestorsByType(ik.name).get("type_section", [[]])[0] superSectionNames = metaInfoEnv.firstAncestorsByType(ik.name).get("type_section", [[]])[0]
...@@ -191,6 +191,23 @@ class LocalBackend(object): ...@@ -191,6 +191,23 @@ class LocalBackend(object):
dataManager.superSectionManager.addArrayValues(dataManager.metaInfo, values, gIndex=gIndex, **kwargs) dataManager.superSectionManager.addArrayValues(dataManager.metaInfo, values, gIndex=gIndex, **kwargs)
def get_value(self, meta_name, g_index=-1):
dataManager = self.results._datamanagers.get(meta_name)
if dataManager is None:
return None
return dataManager.superSectionManager.get_value(dataManager.metaInfo, g_index)
def get_sections(self, meta_name, g_index=-1):
if g_index == -1:
sections = self.results._sectionmanagers[meta_name].openSections
else:
sectionManager = self.results._sectionmanagers[meta_name]
parent_meta_name = self.results._sectionmanagers[meta_name].parentSectionNames[0]
sections = self.results._sectionmanagers[parent_meta_name].get_subsections(sectionManager.metaInfo, g_index)
return [section.gIndex for section in sections]
def setSectionInfo(self, metaName, gIndex, references): def setSectionInfo(self, metaName, gIndex, references):
""" """
Sets info values of an open section references should be a dictionary with the Sets info values of an open section references should be a dictionary with the
...@@ -575,6 +592,16 @@ class Section(object): ...@@ -575,6 +592,16 @@ class Section(object):
else: else:
vals.append(section) vals.append(section)
def get_value(self, metaInfo):
if metaInfo.name in self.simple_values:
return self.simple_values[metaInfo.name]
elif metaInfo.name in self.array_values:
return self.array_values[metaInfo.name]
raise KeyError(metaInfo.name)
def get_subsections(self, metaInfo):
return self.subsections[metaInfo.name]
class SectionManager(object): class SectionManager(object):
"""Manages the sections for the given metainfo. """Manages the sections for the given metainfo.
...@@ -655,6 +682,27 @@ class SectionManager(object): ...@@ -655,6 +682,27 @@ class SectionManager(object):
except (KeyError, IndexError): except (KeyError, IndexError):
raise Exception("Cannot add array values for metadata %s to section %d (%d) of %s, as it is not open" % (valueMetaInfo.name, gI, gIndex, self.metaInfo.name)) raise Exception("Cannot add array values for metadata %s to section %d (%d) of %s, as it is not open" % (valueMetaInfo.name, gI, gIndex, self.metaInfo.name))
def get_value(self, valueMetaInfo, g_index):
if (g_index == -1):
gI = self.lastSectionGIndex
else:
gI = g_index
try:
return self.openSections[gI].get_value(valueMetaInfo)
except IndexError:
raise IndexError("Cannot get value for metadata %s to section %d (%d) of %s, as it is not open" % (valueMetaInfo.name, gI, g_index, self.metaInfo.name))
def get_subsections(self, valueMetaInfo, g_index=-1):
if (g_index == -1):
gI = self.lastSectionGIndex
else:
gI = g_index
try:
return self.openSections[gI].get_subsections(valueMetaInfo)
except (KeyError, IndexError):
return []
class DataManager(object): class DataManager(object):
"""Stores the parent (SectionManager) for the given metainfo. """Stores the parent (SectionManager) for the given metainfo.
......
from __future__ import division
from future import standard_library
standard_library.install_aliases()
from builtins import str
from builtins import range
from builtins import object
# todo: detect loops generated by forwardMatch # todo: detect loops generated by forwardMatch
# debugging parser (side by side) # debugging parser (side by side)
# unit conversion # unit conversion
# caching and section triggers # caching and section triggers
from collections import OrderedDict from collections import OrderedDict
import re, sys, os import types
import re
import sys
import os
import json import json
import logging, traceback, inspect import logging
import traceback
import inspect
import io
from urllib.parse import urlparse, urlunparse
from builtins import str
from builtins import range
from builtins import object
from nomadcore.json_support import jsonIndentF from nomadcore.json_support import jsonIndentF
from nomadcore.parser_backend import JsonParseEventsWriterBackend from nomadcore.parser_backend import JsonParseEventsWriterBackend
from nomadcore.parse_streamed_dicts import ParseStreamedDicts from nomadcore.parse_streamed_dicts import ParseStreamedDicts
from nomadcore.unit_conversion import unit_conversion from nomadcore.unit_conversion import unit_conversion
from nomadcore.caching_backend import CachingLevel, ActiveBackend from nomadcore.caching_backend import CachingLevel, ActiveBackend
from nomadcore.annotator import Annotator from nomadcore.annotator import Annotator
import io
from future.standard_library import install_aliases
install_aliases()
from urllib.parse import urlparse, urlunparse
logger = logging.getLogger("nomadcore.simple_parser") logger = logging.getLogger("nomadcore.simple_parser")
logger.debug = types.MethodType(lambda *args, **kwargs: None, logger)
annotate = False annotate = False
class SkipFileException(Exception): class SkipFileException(Exception):
pass pass
class PushbackLineFile(object): class PushbackLineFile(object):
"""a file interface where it is possible to put back read lines""" """a file interface where it is possible to put back read lines"""
...@@ -62,9 +68,13 @@ class PushbackLineFile(object): ...@@ -62,9 +68,13 @@ class PushbackLineFile(object):
self.lines.append(line) self.lines.append(line)
self.lineNr -= 1 self.lineNr -= 1
def uriFromRelPath(baseUri, basePath, newPath): def uriFromRelPath(baseUri, basePath, newPath):
"""returns an uri corresponding to newPath assuming that base path has uri baseUri. """
This will never change the net location (archive).""" Returns an uri corresponding to newPath assuming that base path has uri baseUri.
This will never change the net location (archive).
"""
p1 = os.path.normpath(os.path.abspath(basePath)) p1 = os.path.normpath(os.path.abspath(basePath))
p2 = os.path.normpath(os.path.abspath(newPath)) p2 = os.path.normpath(os.path.abspath(newPath))
rPath = os.path.relpath(p2, p1) rPath = os.path.relpath(p2, p1)
...@@ -72,9 +82,13 @@ This will never change the net location (archive).""" ...@@ -72,9 +82,13 @@ This will never change the net location (archive)."""
nUri = bUri._replace(path=os.path.normpath(os.path.join(bUri.path, rPath))) nUri = bUri._replace(path=os.path.normpath(os.path.join(bUri.path, rPath)))
return urlunparse(nUri) return urlunparse(nUri)
class SimpleMatcher(object): class SimpleMatcher(object):
"""A Something that matches either a single line, or multiple lines grouped together. """
This is the base of a declarative parser.""" A Something that matches either a single line, or multiple lines grouped together.
This is the base of a declarative parser.
"""
class SubFlags(object): class SubFlags(object):
Sequenced = 0 # the subMatchers should be executed in sequence Sequenced = 0 # the subMatchers should be executed in sequence
Unordered = 1 # the subMatchers can be in any order Unordered = 1 # the subMatchers can be in any order
...@@ -87,9 +101,11 @@ class SimpleMatcher(object): ...@@ -87,9 +101,11 @@ class SimpleMatcher(object):
weak=False, # this matcher should not "steal" the position weak=False, # this matcher should not "steal" the position
repeats=False, # this matcher is expected to repeat repeats=False, # this matcher is expected to repeat
required=False, # this value is required to have been matched on section close required=False, # this value is required to have been matched on section close
floating = False, # this section goes not steal the context, useful for example for low level debugging/error messages. # this section goes not steal the context, useful for example for low level debugging/error messages.
floating=False,
# valid from the point they are read until the exit from the enclosing section # valid from the point they are read until the exit from the enclosing section
forwardMatch = False, # if start match should not eat input, but be forwarded to adHoc and subMatchers # if start match should not eat input, but be forwarded to adHoc and subMatchers
forwardMatch=False,
name="", name="",
adHoc=None, adHoc=None,
otherMetaInfo=tuple(), # The metainfos that are later manually added ot the backend otherMetaInfo=tuple(), # The metainfos that are later manually added ot the backend
...@@ -101,7 +117,8 @@ class SimpleMatcher(object): ...@@ -101,7 +117,8 @@ class SimpleMatcher(object):
coverageIgnore=False, # mark line as ignored in coverage analysis coverageIgnore=False, # mark line as ignored in coverage analysis
onClose=None, # A dictionary of onClose callbacks that are specific to this SimpleMatcher onClose=None, # A dictionary of onClose callbacks that are specific to this SimpleMatcher
onOpen=None, # A dictionary of onOpen callbacks that are specific to this SimpleMatcher onOpen=None, # A dictionary of onOpen callbacks that are specific to this SimpleMatcher
startReAction = None, # A callback function that is called when the starting regex is matcher. If the regex has any capturing groups, they will be provided as well to this function as parameter called "groups". # A callback function that is called when the starting regex is matcher. If the regex has any capturing groups, they will be provided as well to this function as parameter called "groups".
startReAction=None,
): ):
self.index = -1 self.index = -1
self.startReStr = startReStr self.startReStr = startReStr
...@@ -126,7 +143,8 @@ class SimpleMatcher(object): ...@@ -126,7 +143,8 @@ class SimpleMatcher(object):
self.onClose = onClose self.onClose = onClose
self.onOpen = onOpen self.onOpen = onOpen
self.startReAction = startReAction self.startReAction = startReAction
self.keep = False # Boolean flag used by the ParserOptimizer to determine which SimpleMatchers to keep # Boolean flag used by the ParserOptimizer to determine which SimpleMatchers to keep
self.keep = False
# boolean flag to signal that this SimpleMatcher does not have any # boolean flag to signal that this SimpleMatcher does not have any
# effect (besides progressing input file): # effect (besides progressing input file):
# - no data extracted # - no data extracted
...@@ -134,12 +152,14 @@ class SimpleMatcher(object): ...@@ -134,12 +152,14 @@ class SimpleMatcher(object):
# - no adHoc # - no adHoc
# - no sections # - no sections
# - no startReActions # - no startReActions
self.does_nothing = (len(subMatchers) == 0 and self.does_nothing = \
len(sections) == 0 and len(subMatchers) == 0 and \
fixedStartValues is None and len(sections) == 0 and \
fixedEndValues is None and fixedStartValues is None and \
adHoc is None and fixedEndValues is None and \
startReAction is None) adHoc is None and \
startReAction is None
if self.does_nothing: if self.does_nothing:
if startReStr is not None and len(extractGroupNames(startReStr)) > 0: if startReStr is not None and len(extractGroupNames(startReStr)) > 0:
self.does_nothing = False self.does_nothing = False
...@@ -149,8 +169,7 @@ class SimpleMatcher(object): ...@@ -149,8 +169,7 @@ class SimpleMatcher(object):
self.coverageIgnore = coverageIgnore self.coverageIgnore = coverageIgnore
if coverageIgnore and not self.does_nothing: if coverageIgnore and not self.does_nothing:
logger.debug( logger.debug(
"SimpleMatcher '%s' has an effect " + "SimpleMatcher '%s' has an effect and is marked as coverageIgnore", name)
"and is marked as coverageIgnore", name)
self.coverageIgnore = coverageIgnore self.coverageIgnore = coverageIgnore
caller = inspect.currentframe() caller = inspect.currentframe()
...@@ -195,8 +214,7 @@ class SimpleMatcher(object): ...@@ -195,8 +214,7 @@ class SimpleMatcher(object):
defFile=self.defFile, defFile=self.defFile,
onClose=self.onClose, onClose=self.onClose,
onOpen=self.onOpen, onOpen=self.onOpen,
startReAction = self.startReAction, startReAction=self.startReAction)
)
simpleMatcher.keep = self.keep simpleMatcher.keep = self.keep
return simpleMatcher return simpleMatcher
...@@ -237,7 +255,8 @@ class SimpleMatcher(object): ...@@ -237,7 +255,8 @@ class SimpleMatcher(object):
"""calculates the flat index of this and each sub matcher""" """calculates the flat index of this and each sub matcher"""
flatIndex.append(self) flatIndex.append(self)
if self.index != -1: if self.index != -1:
raise Exception("matcher already had index %d, now inserted at %d" % (self.index, len(flatIndex) - 1)) raise Exception("matcher already had index %d, now inserted at %d" % (
self.index, len(flatIndex) - 1))
self.index = len(flatIndex) - 1 self.index = len(flatIndex) - 1
for m in self.subMatchers: for m in self.subMatchers:
m.calcIndexes(flatIndex) m.calcIndexes(flatIndex)
...@@ -312,8 +331,8 @@ class SimpleMatcher(object): ...@@ -312,8 +331,8 @@ class SimpleMatcher(object):
def closingParen(regex, i, nparen): def closingParen(regex, i, nparen):
"""returns the position after the closing parethesis""" """returns the position after the closing parethesis"""
j = i j = i
l = len(regex) length = len(regex)
while j < l: while j < length:
c = regex[j] c = regex[j]
j += 1 j += 1
if c == '\\': if c == '\\':
...@@ -324,7 +343,7 @@ def closingParen(regex, i, nparen): ...@@ -324,7 +343,7 @@ def closingParen(regex, i, nparen):
nparen -= 1 nparen -= 1
if nparen <= 0: if nparen <= 0:
return j return j
return l return length
def disableGroups(regex): def disableGroups(regex):
...@@ -333,9 +352,9 @@ def disableGroups(regex): ...@@ -333,9 +352,9 @@ def disableGroups(regex):
j = 0 j = 0
res = "" res = ""
flags = "" flags = ""
l = len(regex) length = len(regex)
try: try:
while j < l: while j < length:
c = regex[j] c = regex[j]
j += 1 j += 1
if c == '\\': if c == '\\':
...@@ -344,12 +363,12 @@ def disableGroups(regex): ...@@ -344,12 +363,12 @@ def disableGroups(regex):
raise Exception('backreferneces not supported') raise Exception('backreferneces not supported')
j += 1 j += 1
elif c == '(': elif c == '(':
if j >= l: if j >= length:
break break
c = regex[j] c = regex[j]
j += 1 j += 1
if c == '?': if c == '?':
if j >= l: if j >= length:
break break
c = regex[j] c = regex[j]
j += 1 j += 1
...@@ -368,12 +387,13 @@ def disableGroups(regex): ...@@ -368,12 +387,13 @@ def disableGroups(regex):
elif c == '=': elif c == '=':
raise Exception('backreferneces not supported') raise Exception('backreferneces not supported')
else: else:
raise Exception('unexpected character sequence "(P%s"' % c) raise Exception(
'unexpected character sequence "(P%s"' % c)
elif c in 'im': elif c in 'im':
if c == 'i': if c == 'i':
flags += 'i' flags += 'i'
res += regex[i: j - 3] res += regex[i: j - 3]
while j < l: while j < length:
c = regex[j] c = regex[j]
j += 1 j += 1
if c == ')': if c == ')':
...@@ -381,10 +401,12 @@ def disableGroups(regex): ...@@ -381,10 +401,12 @@ def disableGroups(regex):
if c == 'i': if c == 'i':
flags += 'i' flags += 'i'
elif not c == 'm': elif not c == 'm':
raise Exception('Regexp flag %s is not supported by disableGroups only i (ignore case) is supported, and m (multiline) is the default' % c) raise Exception(
'Regexp flag %s is not supported by disableGroups only i (ignore case) is supported, and m (multiline) is the default' % c)
i = j i = j
elif c in 'Lsux': elif c in 'Lsux':
raise Exception('Regexp flag %s is not supported by disableGroups' % c) raise Exception(
'Regexp flag %s is not supported by disableGroups' % c)
elif c == '#': elif c == '#':
res += regex[i] res += regex[i]
j = closingParen(regex, j, 1) j = closingParen(regex, j, 1)
...@@ -392,23 +414,27 @@ def disableGroups(regex): ...@@ -392,23 +414,27 @@ def disableGroups(regex):
elif c in '=!': elif c in '=!':
pass pass
elif c == '<': elif c == '<':
if j < l: if j < length:
c = regex[j] c = regex[j]
j += 1 j += 1
if c in '=!': if c in '=!':
raise Exception("Regexp sequence (?<%s...) unknwon" % c) raise Exception(
"Regexp sequence (?<%s...) unknwon" % c)
elif c == '(': elif c == '(':
raise Exception("boolean matching not supported by disableGroups") raise Exception(
"boolean matching not supported by disableGroups")
else: else:
raise Exception("unsupported character " + c + " after (?") raise Exception("unsupported character " + c + " after (?")
else: else:
res += regex[i: j - 1] + "?:" res += regex[i: j - 1] + "?:"
i = j - 1 i = j - 1
except Exception as e: except Exception as e:
raise Exception("disableGroups failure on '%s' at %d" % (regex, j - 1), e) raise Exception("disableGroups failure on '%s' at %d" %
(regex, j - 1), e)
res += regex[i:] res += regex[i:]
return res return res
def decodeUnits(units): def decodeUnits(units):
"""decodes units: _ between letters is a *, _ before a number is a -, and ^ is inserted before numbers """decodes units: _ between letters is a *, _ before a number is a -, and ^ is inserted before numbers
for example "m_s_1" becomes m*s^-1""" for example "m_s_1" becomes m*s^-1"""
...@@ -420,10 +446,10 @@ def decodeUnits(units): ...@@ -420,10 +446,10 @@ def decodeUnits(units):
underscoreAfterChar = 3 underscoreAfterChar = 3
underscoreAfterNumber = 4 underscoreAfterNumber = 4
state = chars state = chars
l = len(units) length = len(units)
while j < l: while j < length:
if state == chars: if state == chars:
while j < l: while j < length:
c = units[j] c = units[j]
j += 1 j += 1
if c.isalpha(): if c.isalpha():
...@@ -440,9 +466,10 @@ def decodeUnits(units): ...@@ -440,9 +466,10 @@ def decodeUnits(units):
state = underscoreAfterChar state = underscoreAfterChar
break break
else: else:
raise Exception("invalid char %r at %d while decoding units from %r" % (c, j - 1, units)) raise Exception(
"invalid char %r at %d while decoding units from %r" % (c, j - 1, units))
elif state == number: elif state == number:
while j < l: while j < length:
c = units[j] c = units[j]
j += 1 j += 1
if c.isalpha(): if c.isalpha():
...@@ -459,9 +486,10 @@ def decodeUnits(units): ...@@ -459,9 +486,10 @@ def decodeUnits(units):
state = underscoreAfterNumber state = underscoreAfterNumber
break break
else: else:
raise Exception("invalid char %r at %d while decoding units from %r" % (c, j - 1, units)) raise Exception(
"invalid char %r at %d while decoding units from %r" % (c, j - 1, units))
elif state == underscoreAfterChar: elif state == underscoreAfterChar:
while j < l: while j < length:
c = units[j] c = units[j]
j += 1 j += 1
if c.isalpha(): if c.isalpha():
...@@ -475,9 +503,10 @@ def decodeUnits(units): ...@@ -475,9 +503,10 @@ def decodeUnits(units):
state = number state = number
break break
else: else:
raise Exception("invalid char %r at %d while decoding units from %r" % (c, j - 1, units)) raise Exception(
"invalid char %r at %d while decoding units from %r" % (c, j - 1, units))
elif state == underscoreAfterNumber: elif state == underscoreAfterNumber:
while j < l: while j < length:
c = units[j] c = units[j]
j += 1 j += 1
if c.isalpha(): if c.isalpha():
...@@ -486,25 +515,28 @@ def decodeUnits(units): ...@@ -486,25 +515,28 @@ def decodeUnits(units):
state = chars state = chars
break break
else: else:
raise Exception("invalid char %r at %d while decoding units from %r" % (c, j - 1, units)) raise Exception(
"invalid char %r at %d while decoding units from %r" % (c, j - 1, units))
else: else:
raise Exception("invalid state %s at %d while decoding units from %r" % (state, j - 1, units)) raise Exception(
"invalid state %s at %d while decoding units from %r" % (state, j - 1, units))
if state == chars: if state == chars:
res.append(units[i:]) res.append(units[i:])
elif state == number: elif state == number:
res.append(units[i:]) res.append(units[i:])
else: else:
raise Exception("invalid state %s at end of decodeUnits(%r)" % (state, units)) raise Exception(
"invalid state %s at end of decodeUnits(%r)" % (state, units))
return "".join(res) return "".join(res)
def extractGroupNames(regex): def extractGroupNames(regex):
"""extracts all group names from a regex""" """extracts all group names from a regex"""
j = 0 j = 0
groupNames = [] groupNames = []
flags = "" length = len(regex)
l = len(regex)
try: try:
while j < l: while j < length:
c = regex[j] c = regex[j]
j += 1 j += 1
if c == '\\': if c == '\\':
...@@ -513,12 +545,12 @@ def extractGroupNames(regex): ...@@ -513,12 +545,12 @@ def extractGroupNames(regex):
raise Exception('backreferences not supported') raise Exception('backreferences not supported')
j += 1 j += 1
elif c == '(': elif c == '(':
if j >= l: if j >= length:
break break
c = regex[j] c = regex[j]
j += 1 j += 1
if c == '?': if c == '?':
if j >= l: if j >= length:
break break
c = regex[j] c = regex[j]
j += 1 j += 1
...@@ -533,7 +565,8 @@ def extractGroupNames(regex): ...@@ -533,7 +565,8 @@ def extractGroupNames(regex):
raise Exception("expected (P<xx>)") raise Exception("expected (P<xx>)")
gs = regex[j: k].split("__") gs = regex[j: k].split("__")
if len(gs) > 2: if len(gs) > 2:
raise Exception("only a __ is accepted in a group name (to add units), got %s" % regex[j : k]) raise Exception(
"only a __ is accepted in a group name (to add units), got %s" % regex[j: k])
if len(gs) == 2: if len(gs) == 2:
unit = decodeUnits(gs[1]) unit = decodeUnits(gs[1])
else: else:
...@@ -542,16 +575,19 @@ def extractGroupNames(regex): ...@@ -542,16 +575,19 @@ def extractGroupNames(regex):
groupNames.append(gAndUnit) groupNames.append(gAndUnit)
j = k + 1 j = k + 1
except Exception as e: except Exception as e:
raise Exception("extractGroupNames failure on '%s' at %d" % (regex, j - 1), e) raise Exception("extractGroupNames failure on '%s' at %d" %
(regex, j - 1), e)
return groupNames return groupNames
class CompiledMatcher(object): class CompiledMatcher(object):
def matchersToRe(self, possibleNexts): def matchersToRe(self, possibleNexts):
"builds a unique regular expression for the given possible matchers" "builds a unique regular expression for the given possible matchers"
res = [] res = []
for startEnd, nextMatcher in possibleNexts: for startEnd, nextMatcher in possibleNexts:
baseRe = disableGroups(nextMatcher.startReStr if startEnd == 0 else nextMatcher.endReStr) baseRe = disableGroups(
nextMatcher.startReStr if startEnd == 0 else nextMatcher.endReStr)
newRe = "(?P<i%d>%s)" % (nextMatcher.index * 2 + startEnd, baseRe) newRe = "(?P<i%d>%s)" % (nextMatcher.index * 2 + startEnd, baseRe)
res.append(newRe) res.append(newRe)
return re.compile("|".join(res)) return re.compile("|".join(res))
...@@ -560,13 +596,15 @@ class CompiledMatcher(object): ...@@ -560,13 +596,15 @@ class CompiledMatcher(object):
self.matcher = matcher self.matcher = matcher
self.groupname = [[], []] self.groupname = [[], []]
self.startRe = re.compile(matcher.startReStr) self.startRe = re.compile(matcher.startReStr)
self.groupname[0].extend(str(num) for num in range(1, self.startRe.groups+1)) self.groupname[0].extend(str(num)
for num in range(1, self.startRe.groups + 1))
for name, num in self.startRe.groupindex.items(): for name, num in self.startRe.groupindex.items():
self.groupname[0][num - 1] = name self.groupname[0][num - 1] = name
self.endRe = None self.endRe = None
if matcher.endReStr: if matcher.endReStr:
self.endRe = re.compile(matcher.endReStr) self.endRe = re.compile(matcher.endReStr)
self.groupname[1].extend(str(num) for num in range(1, self.endRe.groups+1)) self.groupname[1].extend(str(num)
for num in range(1, self.endRe.groups + 1))
for name, num in self.endRe.groupindex.items(): for name, num in self.endRe.groupindex.items():
self.groupname[1][num - 1] = name self.groupname[1][num - 1] = name
self.floating = matcher.floating self.floating = matcher.floating
...@@ -574,7 +612,8 @@ class CompiledMatcher(object): ...@@ -574,7 +612,8 @@ class CompiledMatcher(object):
while currentM: while currentM:
if currentM.floating: if currentM.floating:
if self.floating: if self.floating:
raise Exception("nesting of floating in floating not supported (%s in %s)" % (currentM.desc(), matcher.desc())) raise Exception("nesting of floating in floating not supported (%s in %s)" % (
currentM.desc(), matcher.desc()))
self.floating = True self.floating = True
currentM = currentM.superMatcher currentM = currentM.superMatcher
self.possibleNexts = possibleNexts self.possibleNexts = possibleNexts
...@@ -602,7 +641,8 @@ class CompiledMatcher(object): ...@@ -602,7 +641,8 @@ class CompiledMatcher(object):
# If there is a metainfo-specific unit conversion, use it # If there is a metainfo-specific unit conversion, use it
metainfo_unit = None metainfo_unit = None
if parserBuilder.metainfo_units is not None: if parserBuilder.metainfo_units is not None:
metainfo_unit = parserBuilder.metainfo_units.get(metaInfo.name) metainfo_unit = parserBuilder.metainfo_units.get(
metaInfo.name)
if metainfo_unit is not None: if metainfo_unit is not None:
target_unit = metainfo_unit target_unit = metainfo_unit
# If there is a dimension-specific unit conversion, use it # If there is a dimension-specific unit conversion, use it
...@@ -612,7 +652,8 @@ class CompiledMatcher(object): ...@@ -612,7 +652,8 @@ class CompiledMatcher(object):
if map_unit: if map_unit:
target_unit = map_unit target_unit = map_unit
converters[gName] = unit_conversion.convert_unit_function(units, target_unit) converters[gName] = unit_conversion.convert_unit_function(
units, target_unit)
self.converters = converters self.converters = converters
self.transformers = transformers self.transformers = transformers
...@@ -649,10 +690,12 @@ class CompiledMatcher(object): ...@@ -649,10 +690,12 @@ class CompiledMatcher(object):
def handleStartMatch(self, parser): def handleStartMatch(self, parser):
line = parser.fIn.readline() line = parser.fIn.readline()
logger.debug("handleStartMatch of %s on (%s)", self.matcher.desc(),line) logger.debug("handleStartMatch of %s on (%s)",
self.matcher.desc(), line)
m = self.startRe.match(line) # , re.MULTILINE) m = self.startRe.match(line) # , re.MULTILINE)
if not m: if not m:
raise Exception("Expected to match %s on %r" % (self.startRe.pattern, line)) raise Exception("Expected to match %s on %r" %
(self.startRe.pattern, line))
self.handleMatchTelemetry(parser, m, line, 0) self.handleMatchTelemetry(parser, m, line, 0)
result_dict = {} result_dict = {}
...@@ -677,7 +720,8 @@ class CompiledMatcher(object): ...@@ -677,7 +720,8 @@ class CompiledMatcher(object):
# If the match needs to be forwarded, push the read line back into the # If the match needs to be forwarded, push the read line back into the
# file stream # file stream
if self.matcher.forwardMatch: if self.matcher.forwardMatch:
logger.debug("handleStartMatch of %s on (%s) pushing back line", self.matcher.desc(),line) logger.debug(
"handleStartMatch of %s on (%s) pushing back line", self.matcher.desc(), line)
parser.fIn.pushbackLine(line) parser.fIn.pushbackLine(line)
return result_dict return result_dict
...@@ -710,8 +754,6 @@ class CompiledMatcher(object): ...@@ -710,8 +754,6 @@ class CompiledMatcher(object):
if not line: if not line:
return -1 return -1
m = regex.match(line) # , re.MULTILINE) m = regex.match(line) # , re.MULTILINE)
matchedFile=''
matchedLine=0
if m: if m:
# logger.debug("line %d has a match for %d: %s", parser.fIn.lineNr, self.matcher.index, m.groupdict()) # logger.debug("line %d has a match for %d: %s", parser.fIn.lineNr, self.matcher.index, m.groupdict())
groups = m.groups() groups = m.groups()
...@@ -721,7 +763,8 @@ class CompiledMatcher(object): ...@@ -721,7 +763,8 @@ class CompiledMatcher(object):
targetStartEnd, targetMatcher = possible[i] targetStartEnd, targetMatcher = possible[i]
nextI = 2 * targetMatcher.index + targetStartEnd nextI = 2 * targetMatcher.index + targetStartEnd
if m.group("i%s" % nextI) is None: if m.group("i%s" % nextI) is None:
raise Exception("inconsistent match (group name vs group pos)") raise Exception(
"inconsistent match (group name vs group pos)")
# The call to self.annotate needs to be in # The call to self.annotate needs to be in
# handleStartMatch/handleEndMatch, where the # handleStartMatch/handleEndMatch, where the
# original RE is applied, including all capturing groups # original RE is applied, including all capturing groups
...@@ -776,13 +819,15 @@ class CompiledMatcher(object): ...@@ -776,13 +819,15 @@ class CompiledMatcher(object):
# Forward compatibility with 'regex' or 're2', which support # Forward compatibility with 'regex' or 're2', which support
# multiple captures per group: # multiple captures per group:
# span: list of lists of tuples: [groups, captures, 2] # span: list of lists of tuples: [groups, captures, 2]
result['span'] = [[span]] + list([] for i in range(match.re.groups)) result['span'] = [[span]] + list([]
for i in range(match.re.groups))
# capture groups # capture groups
for groupi in range(1, match.re.groups + 1): for groupi in range(1, match.re.groups + 1):
if match.group(groupi) is not None: if match.group(groupi) is not None:
result['span'][groupi].append(match.span(groupi)) result['span'][groupi].append(match.span(groupi))
else: else:
m_ci = parser.coverageIgnore.match(fInLine) # check global coverageIgnore m_ci = parser.coverageIgnore.match(
fInLine) # check global coverageIgnore
if m_ci: if m_ci:
result['coverageIgnore'] = 3 result['coverageIgnore'] = 3
span = m_ci.span() span = m_ci.span()
...@@ -810,26 +855,32 @@ class CompiledMatcher(object): ...@@ -810,26 +855,32 @@ class CompiledMatcher(object):
for s in g: for s in g:
span_flat.extend(s) span_flat.extend(s)
result['matchSpansFlat'] = span_flat result['matchSpansFlat'] = span_flat
result['matchFlags'] = ( result['matchFlags'] = \
result['match'] | (result['coverageIgnore'] << 2) | result['match'] | \
targetStartEnd << 5 | int(result['matcher_does_nothing'] and result['match']) << 6) (result['coverageIgnore'] << 2) | \
targetStartEnd << 5 | \
int(result['matcher_does_nothing'] and result['match']) << 6
return result return result
def handleMatchTelemetry(self, parser, match, line, targetStartEnd): def handleMatchTelemetry(self, parser, match, line, targetStartEnd):
match_telemetry = self.getMatchTelemetry(parser, match, line, targetStartEnd) match_telemetry = self.getMatchTelemetry(
parser, match, line, targetStartEnd)
parser.updateParsingStats(match_telemetry) parser.updateParsingStats(match_telemetry)
parser.backend.addMatchTelemetry(match_telemetry) parser.backend.addMatchTelemetry(match_telemetry)
parser.annotator.annotate(match_telemetry) parser.annotator.annotate(match_telemetry)
class ParsingContext(object): # use slots? class ParsingContext(object): # use slots?
Start = 0 Start = 0
End = 1 End = 1
def __init__(self, index, sections, compiledMatcher, startEnd): def __init__(self, index, sections, compiledMatcher, startEnd):
self.index = index self.index = index
self.startEnd = startEnd self.startEnd = startEnd
self.sections = sections self.sections = sections
self.compiledMatcher = compiledMatcher self.compiledMatcher = compiledMatcher
def extractOnCloseTriggers(obj): def extractOnCloseTriggers(obj):
"""extracts all triggers from the object obj """extracts all triggers from the object obj
...@@ -843,6 +894,7 @@ def extractOnCloseTriggers(obj): ...@@ -843,6 +894,7 @@ def extractOnCloseTriggers(obj):
triggers[name] = getattr(obj, attr) triggers[name] = getattr(obj, attr)
return triggers return triggers
def extractOnOpenTriggers(obj): def extractOnOpenTriggers(obj):
"""extracts all triggers from the object obj """extracts all triggers from the object obj
...@@ -856,6 +908,7 @@ def extractOnOpenTriggers(obj): ...@@ -856,6 +908,7 @@ def extractOnOpenTriggers(obj):
triggers[name] = getattr(obj, attr) triggers[name] = getattr(obj, attr)
return triggers return triggers
def extractStrValueTransforms(obj): def extractStrValueTransforms(obj):
"""extracts all string-to-value transformations from obj """extracts all string-to-value transformations from obj
...@@ -880,6 +933,7 @@ def extractStrValueTransforms(obj): ...@@ -880,6 +933,7 @@ def extractStrValueTransforms(obj):
transform[name] = [callback, units] transform[name] = [callback, units]
return transform return transform
class SimpleParserBuilder(object): class SimpleParserBuilder(object):
def __init__(self, rootMatcher, metaInfoEnv, metaInfoToKeep=None, default_units=None, metainfo_units=None, strValueTransform=None): def __init__(self, rootMatcher, metaInfoEnv, metaInfoToKeep=None, default_units=None, metainfo_units=None, strValueTransform=None):
""" """
...@@ -909,21 +963,22 @@ class SimpleParserBuilder(object): ...@@ -909,21 +963,22 @@ class SimpleParserBuilder(object):
missingSections = {} missingSections = {}
for matcher in self.flatIndex: for matcher in self.flatIndex:
for metaName in matcher.directMetaNames(): for metaName in matcher.directMetaNames():
if not metaName in self.metaInfoEnv: if metaName not in self.metaInfoEnv:
ms = missingMeta.get(metaName, None) ms = missingMeta.get(metaName, None)
if ms: if ms:
ms.append(matcher) ms.append(matcher)
else: else:
missingMeta[metaName] = [matcher] missingMeta[metaName] = [matcher]
for sectionName in matcher.sections: for sectionName in matcher.sections:
if not sectionName in self.metaInfoEnv: if sectionName not in self.metaInfoEnv:
ms = missingSections.get(sectionName, None) ms = missingSections.get(sectionName, None)
if ms: if ms:
ms.append(matcher) ms.append(matcher)
else: else:
missingSections[sectionName] = [matcher] missingSections[sectionName] = [matcher]
for metaName, matchers in missingMeta.items(): for metaName, matchers in missingMeta.items():
outF.write("Meta info %s is not in the meta info, but is used in the following matchers:\n" % metaName) outF.write(
"Meta info %s is not in the meta info, but is used in the following matchers:\n" % metaName)
for m in matchers: for m in matchers:
outF.write(" ") outF.write(" ")
outF.write(m.desc()) outF.write(m.desc())
...@@ -937,7 +992,8 @@ class SimpleParserBuilder(object): ...@@ -937,7 +992,8 @@ class SimpleParserBuilder(object):
} }
""" % (metaName)) """ % (metaName))
for sectionName, matchers in missingSections.items(): for sectionName, matchers in missingSections.items():
outF.write("A section named %s is not defined in the meta info, but is used in the following matchers:\n" % sectionName) outF.write(
"A section named %s is not defined in the meta info, but is used in the following matchers:\n" % sectionName)
for m in matchers: for m in matchers:
outF.write(" ") outF.write(" ")
outF.write(m.desc()) outF.write(m.desc())
...@@ -1002,7 +1058,8 @@ class SimpleParserBuilder(object): ...@@ -1002,7 +1058,8 @@ class SimpleParserBuilder(object):
if m == matcher: if m == matcher:
continue continue
if m.weak: if m.weak:
raise Exception("Weak not supported in unordered superMatcher for " + m.desc()) raise Exception(
"Weak not supported in unordered superMatcher for " + m.desc())
if m.floating: if m.floating:
floatingMatchers.append((ParsingContext.Start, m)) floatingMatchers.append((ParsingContext.Start, m))
floatingMatchersEnd.append((ParsingContext.Start, m)) floatingMatchersEnd.append((ParsingContext.Start, m))
...@@ -1011,12 +1068,18 @@ class SimpleParserBuilder(object): ...@@ -1011,12 +1068,18 @@ class SimpleParserBuilder(object):
firstMatchersEnd.append((ParsingContext.Start, m)) firstMatchersEnd.append((ParsingContext.Start, m))
else: else:
raise Exception("unknown subFlags %d" % subFlags) raise Exception("unknown subFlags %d" % subFlags)
logger.debug("firstMatchers: %s", [(x[0],x[1].index) for x in firstMatchers]) logger.debug("firstMatchers: %s", [
logger.debug("firstMatchersEnd %s", [(x[0],x[1].index) for x in firstMatchersEnd]) (x[0], x[1].index) for x in firstMatchers])
logger.debug("inheritedStealingMatchers\n %s", [(x[0],x[1].index) for x in inheritedStealingMatchers]) logger.debug("firstMatchersEnd %s", [
logger.debug("floatingMatchers %s", [(x[0],x[1].index) for x in floatingMatchers]) (x[0], x[1].index) for x in firstMatchersEnd])
logger.debug("floatingMatchersEnd %s", [(x[0],x[1].index) for x in floatingMatchersEnd]) logger.debug("inheritedStealingMatchers\n %s", [
logger.debug("inheritedFloatingMatchers %s", [(x[0],x[1].index) for x in inheritedFloatingMatchers]) (x[0], x[1].index) for x in inheritedStealingMatchers])
logger.debug("floatingMatchers %s", [
(x[0], x[1].index) for x in floatingMatchers])
logger.debug("floatingMatchersEnd %s", [
(x[0], x[1].index) for x in floatingMatchersEnd])
logger.debug("inheritedFloatingMatchers %s", [
(x[0], x[1].index) for x in inheritedFloatingMatchers])
self.compiledMatchers[matcher.index] = CompiledMatcher( self.compiledMatchers[matcher.index] = CompiledMatcher(
matcher=matcher, matcher=matcher,
possibleNexts=firstMatchers + inheritedStealingMatchers + floatingMatchers + inheritedFloatingMatchers, possibleNexts=firstMatchers + inheritedStealingMatchers + floatingMatchers + inheritedFloatingMatchers,
...@@ -1024,7 +1087,8 @@ class SimpleParserBuilder(object): ...@@ -1024,7 +1087,8 @@ class SimpleParserBuilder(object):
parserBuilder=self) parserBuilder=self)
def scanMatcher(self, matcher, inheritedStealingMatchers, inheritedFloatingMatchers): def scanMatcher(self, matcher, inheritedStealingMatchers, inheritedFloatingMatchers):
self.handleMatcher(matcher, inheritedStealingMatchers, inheritedFloatingMatchers) self.handleMatcher(matcher, inheritedStealingMatchers,
inheritedFloatingMatchers)
superMatcher = matcher.superMatcher superMatcher = matcher.superMatcher
firstMatchers = [] firstMatchers = []
floatingMatchers = [] floatingMatchers = []
...@@ -1048,7 +1112,8 @@ class SimpleParserBuilder(object): ...@@ -1048,7 +1112,8 @@ class SimpleParserBuilder(object):
if m == matcher: if m == matcher:
continue continue
if m.weak: if m.weak:
raise Exception("Weak not supported in unordered superMatcher for " + m.desc()) raise Exception(
"Weak not supported in unordered superMatcher for " + m.desc())
if m.floating: if m.floating:
floatingMatchers.append((ParsingContext.Start, m)) floatingMatchers.append((ParsingContext.Start, m))
else: else:
...@@ -1075,7 +1140,8 @@ class SimpleParserBuilder(object): ...@@ -1075,7 +1140,8 @@ class SimpleParserBuilder(object):
if self.metaInfoToKeep: if self.metaInfoToKeep:
logger.info("Optimizing parsing tree...") logger.info("Optimizing parsing tree...")
optimizer = ParserOptimizer() optimizer = ParserOptimizer()
optimizer.optimizeParsingTree(self.rootMatcher, self.metaInfoToKeep) optimizer.optimizeParsingTree(
self.rootMatcher, self.metaInfoToKeep)
# self.rootMatcher.pruneParsingTree(self.metaInfoToKeep) # self.rootMatcher.pruneParsingTree(self.metaInfoToKeep)
# If logging is activated, print out the pruned tree for debugging # If logging is activated, print out the pruned tree for debugging
...@@ -1092,7 +1158,8 @@ class SimpleParserBuilder(object): ...@@ -1092,7 +1158,8 @@ class SimpleParserBuilder(object):
for i in range(len(self.flatIndex)): for i in range(len(self.flatIndex)):
if i != 0: if i != 0:
outF.write(", ") outF.write(", ")
jsonIndentF(self.flatIndex[i].toDict(), outF, extraIndent = extraIndent + 2) jsonIndentF(self.flatIndex[i].toDict(),
outF, extraIndent=extraIndent + 2)
outF.write("\n") outF.write("\n")
outF.write(" " * extraIndent) outF.write(" " * extraIndent)
outF.write("]") outF.write("]")
...@@ -1109,11 +1176,13 @@ class SimpleParserBuilder(object): ...@@ -1109,11 +1176,13 @@ class SimpleParserBuilder(object):
outF.write(", ") outF.write(", ")
jsonIndentF(str(i), outF) jsonIndentF(str(i), outF)
outF.write(":") outF.write(":")
jsonIndentF(self.compiledMatchers[i].toDict(), outF, extraIndent = extraIndent + 2) jsonIndentF(self.compiledMatchers[i].toDict(
), outF, extraIndent=extraIndent + 2)
outF.write("\n") outF.write("\n")
outF.write(" " * extraIndent) outF.write(" " * extraIndent)
outF.write("}") outF.write("}")
class SimpleParser(object): class SimpleParser(object):
def __init__(self, parserBuilder, fIn, backend, superContext=None, baseUri=None, basePath=None): def __init__(self, parserBuilder, fIn, backend, superContext=None, baseUri=None, basePath=None):
self.parserBuilder = parserBuilder self.parserBuilder = parserBuilder
...@@ -1181,7 +1250,8 @@ class SimpleParser(object): ...@@ -1181,7 +1250,8 @@ class SimpleParser(object):
if callback: if callback:
callback(self.backend, None, None) callback(self.backend, None, None)
self.context.append(ParsingContext(stateIndex, sects, compiledMatcher, ParsingContext.Start)) self.context.append(ParsingContext(
stateIndex, sects, compiledMatcher, ParsingContext.Start))
def contextClose(self, cNow): def contextClose(self, cNow):
...@@ -1218,11 +1288,12 @@ class SimpleParser(object): ...@@ -1218,11 +1288,12 @@ class SimpleParser(object):
return str(["#%d %s" % (c.index, ["start", "end"][c.startEnd]) for c in self.context]) return str(["#%d %s" % (c.index, ["start", "end"][c.startEnd]) for c in self.context])
def goToMatcher(self, targetMatcher, startEnd): def goToMatcher(self, targetMatcher, startEnd):
logger.debug("goToMatcher(#%d, %s)", targetMatcher.index, ["start", "end"][startEnd]) logger.debug("goToMatcher(#%d, %s)", targetMatcher.index,
["start", "end"][startEnd])
states = set(c.index for c in self.context) states = set(c.index for c in self.context)
targetStack = [] targetStack = []
mNow = targetMatcher mNow = targetMatcher
while (mNow and not mNow.index in states): while (mNow and mNow.index not in states):
targetStack.append(mNow) targetStack.append(mNow)
mNow = mNow.superMatcher mNow = mNow.superMatcher
if not mNow: if not mNow:
...@@ -1254,7 +1325,8 @@ class SimpleParser(object): ...@@ -1254,7 +1325,8 @@ class SimpleParser(object):
return False return False
currentContext = self.context[len(self.context) - 1] currentContext = self.context[len(self.context) - 1]
if logger.isEnabledFor(logging.DEBUG): if logger.isEnabledFor(logging.DEBUG):
logger.debug("lineNr: %d context: %s", self.fIn.lineNr,self.contextDesc()) logger.debug("lineNr: %d context: %s",
self.fIn.lineNr, self.contextDesc())
if currentContext.startEnd == ParsingContext.Start: if currentContext.startEnd == ParsingContext.Start:
nextI = currentContext.compiledMatcher.findNextMatch(self) nextI = currentContext.compiledMatcher.findNextMatch(self)
else: else:
...@@ -1266,7 +1338,8 @@ class SimpleParser(object): ...@@ -1266,7 +1338,8 @@ class SimpleParser(object):
while self.context: while self.context:
self.contextPop() self.contextPop()
if nextI != -1 and nextI != -3: if nextI != -1 and nextI != -3:
raise Exception("finished with error with parsing context %s" % (cNames)) raise Exception(
"finished with error with parsing context %s" % (cNames))
else: else:
index = nextI // 2 index = nextI // 2
startEnd = nextI % 2 startEnd = nextI % 2
...@@ -1275,10 +1348,12 @@ class SimpleParser(object): ...@@ -1275,10 +1348,12 @@ class SimpleParser(object):
logger.debug("new context: %s\n", self.contextDesc()) logger.debug("new context: %s\n", self.contextDesc())
currentCtx = self.context[len(self.context) - 1] currentCtx = self.context[len(self.context) - 1]
if startEnd == ParsingContext.End: if startEnd == ParsingContext.End:
self.lastMatch = currentCtx.compiledMatcher.handleEndMatch(self) self.lastMatch = currentCtx.compiledMatcher.handleEndMatch(
self)
self.contextClose(currentCtx) self.contextClose(currentCtx)
else: else:
self.lastMatch = currentCtx.compiledMatcher.handleStartMatch(self) self.lastMatch = currentCtx.compiledMatcher.handleStartMatch(
self)
return len(self.context) > 0 return len(self.context) > 0
def parse(self): def parse(self):
...@@ -1300,7 +1375,8 @@ class SimpleParser(object): ...@@ -1300,7 +1375,8 @@ class SimpleParser(object):
def compileParser(simpleParser, metaInfo, metaInfoToKeep, default_units=None, metainfo_units=None, strValueTransform=None): def compileParser(simpleParser, metaInfo, metaInfoToKeep, default_units=None, metainfo_units=None, strValueTransform=None):
"""compiles the given simple parser""" """compiles the given simple parser"""
parserBuilder = SimpleParserBuilder(simpleParser, metaInfo, metaInfoToKeep, default_units, metainfo_units, strValueTransform) parserBuilder = SimpleParserBuilder(
simpleParser, metaInfo, metaInfoToKeep, default_units, metainfo_units, strValueTransform)
if logger.isEnabledFor(logging.DEBUG): if logger.isEnabledFor(logging.DEBUG):
s = io.StringIO() s = io.StringIO()
s.write("matchers:") s.write("matchers:")
...@@ -1316,9 +1392,11 @@ def compileParser(simpleParser, metaInfo, metaInfoToKeep, default_units=None, me ...@@ -1316,9 +1392,11 @@ def compileParser(simpleParser, metaInfo, metaInfoToKeep, default_units=None, me
logger.debug(s.getvalue()) logger.debug(s.getvalue())
return parserBuilder return parserBuilder
def runParser(compiledParser, backend, superContext, fIn, uri, path): def runParser(compiledParser, backend, superContext, fIn, uri, path):
"""parses the open file fIn with the given compiledParser into the backend using superContext as parser SuperContext""" """parses the open file fIn with the given compiledParser into the backend using superContext as parser SuperContext"""
parser = compiledParser.buildParser(PushbackLineFile(fIn), backend, superContext = superContext, baseUri = uri, basePath = path) parser = compiledParser.buildParser(PushbackLineFile(
fIn), backend, superContext=superContext, baseUri=uri, basePath=path)
try: try:
superContext.startedParsing(fIn.name, parser) superContext.startedParsing(fIn.name, parser)
except AttributeError: except AttributeError:
...@@ -1327,12 +1405,14 @@ def runParser(compiledParser, backend, superContext, fIn, uri, path): ...@@ -1327,12 +1405,14 @@ def runParser(compiledParser, backend, superContext, fIn, uri, path):
parser.parse() parser.parse()
return parser.parsingStats return parser.parsingStats
def defaultParseFile(parserInfo): def defaultParseFile(parserInfo):
def parseF(parserBuilder, uri, path, backend, superContext): def parseF(parserBuilder, uri, path, backend, superContext):
with open(path, "r") as fIn: with open(path, "r") as fIn:
backend.startedParsingSession(uri, parserInfo) backend.startedParsingSession(uri, parserInfo)
try: try:
parsingStats = runParser(parserBuilder, backend, superContext, fIn, uri, path) parsingStats = runParser(
parserBuilder, backend, superContext, fIn, uri, path)
backend.finishedParsingSession( backend.finishedParsingSession(
"ParseSuccess", None, "ParseSuccess", None,
parsingStats=parsingStats) parsingStats=parsingStats)
...@@ -1343,6 +1423,7 @@ def defaultParseFile(parserInfo): ...@@ -1343,6 +1423,7 @@ def defaultParseFile(parserInfo):
parsingStats={}) parsingStats={})
return parseF return parseF
def mainFunction(mainFileDescription, def mainFunction(mainFileDescription,
metaInfoEnv, metaInfoEnv,
parserInfo, parserInfo,
...@@ -1458,12 +1539,12 @@ def mainFunction(mainFileDescription, ...@@ -1458,12 +1539,12 @@ def mainFunction(mainFileDescription,
metaInfoEnv.serialize(outF.write, subGids=True, selfGid=True) metaInfoEnv.serialize(outF.write, subGids=True, selfGid=True)
outF.flush() outF.flush()
dictReader = ParseStreamedDicts(sys.stdin) dictReader = ParseStreamedDicts(sys.stdin)
toOuput = list(metaInfoEnv.infoKinds.keys())
# Initialize the backend where the final results are printed after they are # Initialize the backend where the final results are printed after they are
# possibly filtered by the caching ActiveBackend. # possibly filtered by the caching ActiveBackend.
if superBackend is None: if superBackend is None:
jsonBackend = JsonParseEventsWriterBackend(metaInfoEnv, outF, writeMatchTelemetry=writeMatchTelemetry) jsonBackend = JsonParseEventsWriterBackend(
metaInfoEnv, outF, writeMatchTelemetry=writeMatchTelemetry)
else: else:
jsonBackend = superBackend jsonBackend = superBackend
...@@ -1505,7 +1586,8 @@ def mainFunction(mainFileDescription, ...@@ -1505,7 +1586,8 @@ def mainFunction(mainFileDescription,
if specialize: if specialize:
specializationInfo = dictReader.readNextDict() specializationInfo = dictReader.readNextDict()
if specializationInfo is None or specializationInfo.get("type", "") != "nomad_parser_specialization_1_0": if specializationInfo is None or specializationInfo.get("type", "") != "nomad_parser_specialization_1_0":
raise Exception("expected a nomad_parser_specialization_1_0 as first dictionary, got " + json.dumps(specializationInfo)) raise Exception(
"expected a nomad_parser_specialization_1_0 as first dictionary, got " + json.dumps(specializationInfo))
metaInfoToKeep = specializationInfo.get("metaInfoToKeep") metaInfoToKeep = specializationInfo.get("metaInfoToKeep")
if fileToParse: if fileToParse:
...@@ -1529,13 +1611,15 @@ def mainFunction(mainFileDescription, ...@@ -1529,13 +1611,15 @@ def mainFunction(mainFileDescription,
# create file-specific json backend # create file-specific json backend
outFile = open(outFileName, 'w') outFile = open(outFileName, 'w')
outFile.write("[") outFile.write("[")
outFilejsonBackend = JsonParseEventsWriterBackend(metaInfoEnv, outFile, writeMatchTelemetry=writeMatchTelemetry) outFilejsonBackend = JsonParseEventsWriterBackend(
metaInfoEnv, outFile, writeMatchTelemetry=writeMatchTelemetry)
# override superBackend for this parseFile # override superBackend for this parseFile
backend.superBackend = outFilejsonBackend backend.superBackend = outFilejsonBackend
logger.warning("# parsing: %s", toRead['mainFile']) logger.warning("# parsing: %s", toRead['mainFile'])
try: try:
parseFile(parserBuilder, toRead['mainFileUri'], toRead['mainFile'], backend, superContext) parseFile(
except Exception as e: parserBuilder, toRead['mainFileUri'], toRead['mainFile'], backend, superContext)
except Exception:
logger.error(traceback.format_exc()) logger.error(traceback.format_exc())
if outFile is not None: if outFile is not None:
# we had the comma already from last round # we had the comma already from last round
...@@ -1549,6 +1633,7 @@ def mainFunction(mainFileDescription, ...@@ -1549,6 +1633,7 @@ def mainFunction(mainFileDescription,
backend.superBackend = jsonBackend backend.superBackend = jsonBackend
outF.write("]\n") outF.write("]\n")
class AncillaryParser(object): class AncillaryParser(object):
"""This is an ancillary parser that can be used inside a running parser to parse external files. """This is an ancillary parser that can be used inside a running parser to parse external files.
...@@ -1560,6 +1645,7 @@ class AncillaryParser(object): ...@@ -1560,6 +1645,7 @@ class AncillaryParser(object):
that are already opened by the main parser. You can use cachingLevelForMetaName to set a section to Ignore or Cache. that are already opened by the main parser. You can use cachingLevelForMetaName to set a section to Ignore or Cache.
Thus, you can still use sections and their onClose triggers, but no opening events are written. Thus, you can still use sections and their onClose triggers, but no opening events are written.
""" """
def __init__(self, fileDescription, parser, cachingLevelForMetaName, superContext): def __init__(self, fileDescription, parser, cachingLevelForMetaName, superContext):
"""Compiles parser and set up backend. """Compiles parser and set up backend.
...@@ -1573,7 +1659,8 @@ class AncillaryParser(object): ...@@ -1573,7 +1659,8 @@ class AncillaryParser(object):
default_units = parser.parserBuilder.default_units default_units = parser.parserBuilder.default_units
metainfo_units = parser.parserBuilder.metainfo_units metainfo_units = parser.parserBuilder.metainfo_units
# compile parser # compile parser
self.compiledParser = compileParser(fileDescription, parser.parserBuilder.metaInfoEnv, parser.parserBuilder.metaInfoToKeep, default_units, metainfo_units) self.compiledParser = compileParser(fileDescription, parser.parserBuilder.metaInfoEnv,
parser.parserBuilder.metaInfoToKeep, default_units, metainfo_units)
# save superContext # save superContext
self.superContext = superContext self.superContext = superContext
# extract onClose functions from superContext # extract onClose functions from superContext
...@@ -1590,7 +1677,8 @@ class AncillaryParser(object): ...@@ -1590,7 +1677,8 @@ class AncillaryParser(object):
onClose=onClose, onClose=onClose,
onOpen=onOpen, onOpen=onOpen,
superBackend=parser.backend.superBackend, superBackend=parser.backend.superBackend,
propagateStartFinishParsing = False, # write no parser info upon start and end of parsing # write no parser info upon start and end of parsing
propagateStartFinishParsing=False,
default_units=default_units, default_units=default_units,
metainfo_units=metainfo_units) metainfo_units=metainfo_units)
...@@ -1602,14 +1690,17 @@ class AncillaryParser(object): ...@@ -1602,14 +1690,17 @@ class AncillaryParser(object):
""" """
currentUri = self.mainParser.uriForPath(fIn.name) currentUri = self.mainParser.uriForPath(fIn.name)
if currentUri: if currentUri:
self.backend.superBackend.addValue("calculation_file_uri", currentUri) self.backend.superBackend.addValue(
runParser(self.compiledParser, self.backend, self.superContext, PushbackLineFile(fIn), self.mainParser.baseUri, self.mainParser.basePath) "calculation_file_uri", currentUri)
runParser(self.compiledParser, self.backend, self.superContext, PushbackLineFile(
fIn), self.mainParser.baseUri, self.mainParser.basePath)
class ParserOptimizer(object): class ParserOptimizer(object):
"""For optimizing a hierarchy of SimpleMatchers based on a list of metainfo """For optimizing a hierarchy of SimpleMatchers based on a list of metainfo
names that should be included in the parsing. names that should be included in the parsing.
""" """
def optimizeParsingTree(self, rootMatcher, metaInfoToKeep=None): def optimizeParsingTree(self, rootMatcher, metaInfoToKeep=None):
"""This function will remove any parsing unnecessary parsing actions """This function will remove any parsing unnecessary parsing actions
from the parsing tree based on the given list of metainfo names to keep from the parsing tree based on the given list of metainfo names to keep
...@@ -1634,7 +1725,8 @@ class ParserOptimizer(object): ...@@ -1634,7 +1725,8 @@ class ParserOptimizer(object):
parseList = [] parseList = []
leaves = [] leaves = []
matchDict = {} matchDict = {}
self.searchMatchers(rootMatcher, metaInfoToKeep, parseList, leaves, matchDict) self.searchMatchers(rootMatcher, metaInfoToKeep,
parseList, leaves, matchDict)
# If something was not matched, prompt the user # If something was not matched, prompt the user
for name, found in matchDict.items(): for name, found in matchDict.items():
...@@ -1675,12 +1767,13 @@ class ParserOptimizer(object): ...@@ -1675,12 +1767,13 @@ class ParserOptimizer(object):
# See if this matcher has metainfos of interest and add any possible # See if this matcher has metainfos of interest and add any possible
# dependencies to the list of metainfos to keep # dependencies to the list of metainfos to keep
found = False found = False
for i_name, name in enumerate(metaNames): for _, name in enumerate(metaNames):
if name in myMetanames: if name in myMetanames:
found = True found = True
matchDict[name] = True matchDict[name] = True
dependencies = simpleMatcher.dependencies.get(name) dependencies = simpleMatcher.dependencies.get(name)
logger.debug("Found matcher: {}".format(simpleMatcher.startReStr)) logger.debug("Found matcher: {}".format(
simpleMatcher.startReStr))
simpleMatcher.keep = True simpleMatcher.keep = True
parseList.append(simpleMatcher) parseList.append(simpleMatcher)
...@@ -1695,7 +1788,8 @@ class ParserOptimizer(object): ...@@ -1695,7 +1788,8 @@ class ParserOptimizer(object):
# Check the children # Check the children
for submatcher in simpleMatcher.subMatchers: for submatcher in simpleMatcher.subMatchers:
self.searchMatchers(submatcher, metaNames, parseList, unParsedleaves, matchDict) self.searchMatchers(submatcher, metaNames,
parseList, unParsedleaves, matchDict)
# Save non-parsed leaves # Save non-parsed leaves
if not simpleMatcher.subMatchers and not found: if not simpleMatcher.subMatchers and not found:
...@@ -1740,8 +1834,8 @@ class ParserOptimizer(object): ...@@ -1740,8 +1834,8 @@ class ParserOptimizer(object):
simpleMatcher: The simplematcher that is the target of the operation simpleMatcher: The simplematcher that is the target of the operation
""" """
if simpleMatcher.keep: if simpleMatcher.keep:
raise Exception("Trying to remove a matcher that should not be removed from tree.") raise Exception(
return "Trying to remove a matcher that should not be removed from tree.")
parent = simpleMatcher.superMatcher parent = simpleMatcher.superMatcher
if not parent: if not parent:
...@@ -1754,7 +1848,8 @@ class ParserOptimizer(object): ...@@ -1754,7 +1848,8 @@ class ParserOptimizer(object):
children = parent.subMatchers children = parent.subMatchers
for i_child, child in enumerate(children): for i_child, child in enumerate(children):
if child == simpleMatcher: if child == simpleMatcher:
logger.debug("REMOVED: {}".format(simpleMatcher.startReStr)) logger.debug("REMOVED: {}".format(
simpleMatcher.startReStr))
del children[i_child] del children[i_child]
return return
else: else:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment