Commit fd3199ae authored by Markus Scheidgen's avatar Markus Scheidgen

Removed dependency on old metainfo.

parent dd38c551
......@@ -82,22 +82,18 @@ class ParserInterface(with_metaclass(ABCMeta, object)):
self.parser_context.parser_info = self.get_parser_info()
self.main_parser = None
# Setup the metainfo environment. All parsers that inherit from this
# class will have a static class attribute that will store the metainfo
# environment. This way every instance of a parser doesn't have to load
# the environment separately because it is identical for each instance.
if type(self).metainfo_env is None:
metainfo_env, warn = load_metainfo(self.get_metainfo_filename())
type(self).metainfo_env = metainfo_env
self.parser_context.metainfo_env = metainfo_env
else:
self.parser_context.metainfo_env = type(self).metainfo_env
# Initialize the backend. Use local backend if none given
# Initialize the backend.
metainfo_package = os.path.basename(self.get_metainfo_filename())
if backend is not None:
self.parser_context.super_backend = backend(type(self).metainfo_env)
self.parser_context.super_backend = backend(metainfo_package)
else:
self.parser_context.super_backend = LocalBackend(type(self).metainfo_env, debug=self.debug, store=self.store)
from nomad.parsing.legacy import Backend
self.parser_context.super_backend = Backend(metainfo_package)
# Setup the metainfo environment.
metainfo_env = self.parser_context.super_backend.metaInfoEnv()
self.parser_context.metainfo_env = metainfo_env
type(self).metainfo_env = metainfo_env
# Check the list of default units
default_unit_map = {}
......
......@@ -261,14 +261,19 @@ class CachingSectionManager(object):
except:
raise Exception("Cannot add array values for metadata %s to section %d (%d) of %s, as it is not open" % (valueMetaInfo.name, gI, gIndex, self.metaInfo.name))
class CachingDataManager(object):
def __init__(self, metaInfo, superSectionManager, cachingLevel):
self.metaInfo = metaInfo
self.superSectionManager = superSectionManager
self.cachingLevel = cachingLevel
class ActiveBackend(object):
def __init__(self, metaInfoEnv, sectionManagers, dataManagers, superBackend, propagateStartFinishParsing = True, default_units=None, metainfo_units=None):
def __init__(
self, metaInfoEnv, sectionManagers, dataManagers, superBackend,
propagateStartFinishParsing=True, default_units=None, metainfo_units=None):
self.__metaInfoEnv = metaInfoEnv
self.sectionManagers = sectionManagers
self.dataManagers = dataManagers
......@@ -278,36 +283,62 @@ class ActiveBackend(object):
self.metainfo_units = metainfo_units # A mapping between metaname and an unit definition.
@classmethod
def activeBackend(cls, metaInfoEnv, cachingLevelForMetaName = {}, defaultDataCachingLevel = CachingLevel.ForwardAndCache, defaultSectionCachingLevel = CachingLevel.Forward, superBackend = None,
onClose = {}, onOpen = {}, propagateStartFinishParsing = True, default_units=None, metainfo_units=None):
def activeBackend(
cls,
metaInfoEnv,
cachingLevelForMetaName={},
defaultDataCachingLevel=CachingLevel.ForwardAndCache,
defaultSectionCachingLevel=CachingLevel.Forward,
superBackend=None,
onClose={}, onOpen={},
propagateStartFinishParsing=True,
default_units=None,
metainfo_units=None):
for sectionName in onClose.keys():
if not sectionName in metaInfoEnv:
raise Exception("Found trigger for non existing section %s" % sectionName)
if sectionName not in metaInfoEnv:
raise Exception(
"Found trigger for non existing section %s" % sectionName)
elif metaInfoEnv.infoKinds[sectionName].kindStr != "type_section":
raise Exception("Found trigger for %s which is not a section but %s" %
(sectionName, json.dumps(metaInfoEnv.infoKinds[sectionName].toDict(), indent=2)))
raise Exception(
"Found trigger for %s which is not a section but %s" %
(sectionName, json.dumps(metaInfoEnv.infoKinds[sectionName].toDict(), indent=2)))
for sectionName in onOpen.keys():
if not sectionName in metaInfoEnv:
raise Exception("Found trigger for non existing section %s" % sectionName)
if sectionName not in metaInfoEnv:
raise Exception(
"Found trigger for non existing section %s" % sectionName)
elif metaInfoEnv.infoKinds[sectionName].kindStr != "type_section":
raise Exception("Found trigger for %s which is not a section but %s" %
(sectionName, json.dumps(metaInfoEnv.infoKinds[sectionName].toDict(), indent=2)))
raise Exception(
"Found trigger for %s which is not a section but %s" %
(sectionName, json.dumps(metaInfoEnv.infoKinds[sectionName].toDict(), indent=2)))
sectionManagers = {}
for ikNames, ik in metaInfoEnv.infoKinds.items():
for ik in metaInfoEnv.infoKinds.values():
if ik.kindStr == "type_section":
parentS, parentO = list(metaInfoEnv.firstAncestorsByType(ik.name).get("type_section", [[],[]]))
parentS, parentO = list(
metaInfoEnv.firstAncestorsByType(ik.name).get("type_section", [[], []]))
parentS.sort()
cachingLevel = reduce(CachingLevel.restrict, [cachingLevelForMetaName.get(x, defaultSectionCachingLevel) for x in ([ik.name] + parentS + parentO)])
cachingLevel = reduce(
CachingLevel.restrict,
[
cachingLevelForMetaName.get(x, defaultSectionCachingLevel)
for x in ([ik.name] + parentS + parentO)])
sectionManagers[ik.name] = CachingSectionManager(
metaInfo = ik,
parentSectionNames = parentS,
storeInSuper = (cachingLevel == CachingLevel.ForwardAndCache or cachingLevel == CachingLevel.Cache or cachingLevel == CachingLevel.PreOpenedCache),
forwardOpenClose = (cachingLevel == CachingLevel.Forward or cachingLevel == CachingLevel.ForwardAndCache),
preOpened = (cachingLevel == CachingLevel.PreOpenedCache or cachingLevel == CachingLevel.PreOpenedIgnore),
onClose = onClose.get(ik.name, []),
onOpen = onOpen.get(ik.name, []))
metaInfo=ik,
parentSectionNames=parentS,
storeInSuper=(cachingLevel == CachingLevel.ForwardAndCache or cachingLevel == CachingLevel.Cache or cachingLevel == CachingLevel.PreOpenedCache),
forwardOpenClose=(cachingLevel == CachingLevel.Forward or cachingLevel == CachingLevel.ForwardAndCache),
preOpened=(cachingLevel == CachingLevel.PreOpenedCache or cachingLevel == CachingLevel.PreOpenedIgnore),
onClose=onClose.get(ik.name, []),
onOpen=onOpen.get(ik.name, []))
dataManagers = {}
for ikNames, ik in metaInfoEnv.infoKinds.items():
for ik in metaInfoEnv.infoKinds.values():
if ik.kindStr == "type_document_content" or ik.kindStr == "type_dimension":
superSectionNames = metaInfoEnv.firstAncestorsByType(ik.name).get("type_section", [[]])[0]
if not superSectionNames:
......@@ -316,9 +347,15 @@ class ActiveBackend(object):
raise Exception("MetaInfo of concrete value %s has multiple superSections (%s)" %
(ik.name, superSectionNames))
sectionManager = sectionManagers[superSectionNames[0]]
dataManagers[ik.name] = CachingDataManager(ik, sectionManager,
CachingLevel.restrict(cachingLevelForMetaName.get(ik.name, defaultDataCachingLevel), CachingLevel.Forward if sectionManager.forwardOpenClose or sectionManager.preOpened else CachingLevel.Ignore))
return ActiveBackend(metaInfoEnv, sectionManagers, dataManagers, superBackend, propagateStartFinishParsing, default_units, metainfo_units)
dataManagers[ik.name] = CachingDataManager(
ik, sectionManager,
CachingLevel.restrict(
cachingLevelForMetaName.get(ik.name, defaultDataCachingLevel),
CachingLevel.Forward if sectionManager.forwardOpenClose or sectionManager.preOpened else CachingLevel.Ignore))
return ActiveBackend(
metaInfoEnv, sectionManagers, dataManagers, superBackend,
propagateStartFinishParsing, default_units, metainfo_units)
def appendOnClose(self, sectionName, onClose):
self.sectionManagers.onClose.append(onClose)
......
......@@ -10,7 +10,7 @@ import json
import os, re
from nomadcore.json_support import jsonCompactS, jsonCompactD, jsonIndentD
from io import open
import nomad_meta_info
import metainfo
"""objects to handle a local InfoKinds with unique name (think self written json)"""
class InfoKindEl(object):
......@@ -186,7 +186,7 @@ class RelativeDependencySolver(object):
def __call__(self, infoKindEnv, source, dep):
if "metainfoPath" in dep:
basePath = nomad_meta_info.__file__
basePath = metainfo.__file__
path = dep["metainfoPath"]
elif "relativePath" in dep:
basePath = source.get('path')
......@@ -677,11 +677,10 @@ def load_metainfo(filename, dependencyLoader=None, extraArgsHandling=InfoKindEl.
Tuple containing the metainfo environment, and any possible warnings
that were encountered in the loading.
"""
import nomad_meta_info
if os.path.isfile(filename):
path = filename
else:
path = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(nomad_meta_info.__file__)), "{}".format(filename)))
path = os.path.join(os.path.dirname(metainfo.__file__), filename)
return loadJsonFile(path, dependencyLoader, extraArgsHandling, uri)
def loadJsonStream(fileStream, name = None, dependencyLoader = None, extraArgsHandling = InfoKindEl.ADD_EXTRA_ARGS, filePath = None, uri = None):
......
from future import standard_library
standard_library.install_aliases()
from builtins import object
import json
import io
# the 3 possible states while reading input character per character
IN_NORMAL_TEXT = 0
IN_STRING = 1
IN_STRING_ESCAPE = 2
def readDict(inF, line0):
"reads a dictionary from an indented json"
status = IN_NORMAL_TEXT
# iterator for character position in string
i = 0
# counts number of open {
n_paren = 0
# used for StringIO if dictionary is distributed over several blocks
fullDictStr = None
line = line0
iline = 0
while True:
# iterate through characters of current block
while i < len(line):
c = line[i]
i += 1
if status == IN_STRING_ESCAPE:
status = IN_STRING
continue
elif status == IN_NORMAL_TEXT:
# skip characters that do not change state
while (c != '{' and c!= '}' and c != '"' and i < len(line)):
c = line[i]
i += 1
if c == '{':
# skip the characters before the first opening {
if n_paren == 0:
line = line[i - 1:]
i = 1
n_paren += 1
elif c == '}':
n_paren -= 1
# found closing }, return dictionary
if n_paren == 0:
# write block upto current character to string or StringIO
# only use StringIO if dictionary is distributed over several blocks
if fullDictStr == None:
outS = line[:i]
else:
fullDictStr.write(line[:i])
outS = fullDictStr.getvalue()
# reset StringIO
fullDictStr.close()
fullDictStr = None
# reset block to remaining characters
line = line[i:]
i = 0
# dictionary output
try:
outD = json.loads(outS)
except:
raise Exception("Could not convert string " + repr(outS) + " with json.loads to dictionary.")
return outD
elif c == '"':
status = IN_STRING
elif status == IN_STRING:
# skip characters that do not change state
while (c != '"' and c != '\\' and i < len(line)):
c = line[i]
i += 1
if c == '"':
status = IN_NORMAL_TEXT
elif c == '\\':
status = IN_STRING_ESCAPE
# if we arrive here, then the dictionary is distributed over several blocks
# write block to StringIO but only if an opening { already found
if n_paren > 0:
if fullDictStr == None:
fullDictStr = io.StringIO()
fullDictStr.write(line)
else:
# failed to find dictionary in first line, stopping
return None
# read new block from input
line = inF.readline()
if not line:
# early EOF
return None
iline += 1
i = 0
def readArray(inF, line0):
"reads an array from an indented json"
status = IN_NORMAL_TEXT
# iterator for character position in string
i = 0
# counts number of open [
n_paren = 0
# used for StringIO if dictionary is distributed over several blocks
fullDictStr = None
line = line0
iline = 0
while True:
# iterate through characters of current block
while i < len(line):
c = line[i]
i += 1
if status == IN_STRING_ESCAPE:
status = IN_STRING
continue
elif status == IN_NORMAL_TEXT:
# skip characters that do not change state
while (c != '[' and c!= ']' and c != '"' and i < len(line)):
c = line[i]
i += 1
if c == '[':
# skip the characters before the first opening [
if n_paren == 0:
line = line[i - 1:]
i = 1
n_paren += 1
elif c == ']':
n_paren -= 1
# found closing ], return dictionary
if n_paren == 0:
# write block upto current character to string or StringIO
# only use StringIO if dictionary is distributed over several blocks
if fullDictStr == None:
outS = line[:i]
else:
fullDictStr.write(line[:i])
outS = fullDictStr.getvalue()
# reset StringIO
fullDictStr.close()
fullDictStr = None
# reset block to remaining characters
line = line[i:]
i = 0
# dictionary output
try:
outD = json.loads(outS)
except:
raise Exception("Could not convert string " + repr(outS) + " with json.loads to dictionary.")
return outD
elif c == '"':
status = IN_STRING
elif status == IN_STRING:
# skip characters that do not change state
while (c != '"' and c != '\\' and i < len(line)):
c = line[i]
i += 1
if c == '"':
status = IN_NORMAL_TEXT
elif c == '\\':
status = IN_STRING_ESCAPE
# if we arrive here, then the dictionary is distributed over several blocks
# write block to StringIO but only if an opening [ already found
if n_paren > 0:
if fullDictStr == None:
fullDictStr = io.StringIO()
fullDictStr.write(line)
else:
# failed to find dictionary in first line, stopping
return None
# read new block from input
line = inF.readline()
if not line:
# early EOF
return None
iline += 1
i = 0
class ParseStreamedDicts(object):
"""allows the extraction of JSON dictionaries out of file objects
which are then converted to python dictionaries
therefore, strings must be passed as StringIO objects"""
def __init__(self, inF, blockSize = 8192):
self.inF = inF
# read at least 2 unicode characters (2 x 4 bytes)
if blockSize < 8:
self.blockSize = 8
else:
self.blockSize = blockSize
# read input in blocks
self.blockRead = ""
# set initial state
self.status = IN_NORMAL_TEXT
# iterator for character position in string
self.i = 0
# counts number of open {
self.n_paren = 0
# used for StringIO if dictionary is distributed over several blocks
self.fullDictStr = None
def readNextDict(self):
"""reads input until a complete set of {} is found
the so obtained string is then converted to a python dictionary with json.loads"""
while True:
# iterate through characters of current block
while self.i < len(self.blockRead):
c = self.blockRead[self.i]
self.i += 1
if self.status == IN_STRING_ESCAPE:
self.status = IN_STRING
continue
elif self.status == IN_NORMAL_TEXT:
# skip characters that do not change state
while (c != '{' and c!= '}' and c != '"' and self.i < len(self.blockRead)):
c = self.blockRead[self.i]
self.i += 1
if c == '{':
# skip the characters before the first opening {
if self.n_paren == 0:
self.blockRead = self.blockRead[self.i - 1:]
self.i = 1
self.n_paren += 1
elif c == '}':
self.n_paren -= 1
# found closing }, return dictionary
if self.n_paren == 0:
# write block upto current character to string or StringIO
# only use StringIO if dictionary is distributed over several blocks
if self.fullDictStr == None:
outS = self.blockRead[:self.i]
else:
self.fullDictStr.write(self.blockRead[:self.i])
outS = self.fullDictStr.getvalue()
# reset StringIO
self.fullDictStr.close()
self.fullDictStr = None
# reset block to remaining characters
self.blockRead = self.blockRead[self.i:]
self.i = 0
# dictionary output
try:
outD = json.loads(outS)
except:
raise Exception("Could not convert string " + repr(outS) + " with json.loads to dictionary.")
return outD
elif c == '"':
self.status = IN_STRING
elif self.status == IN_STRING:
# skip characters that do not change state
while (c != '"' and c != '\\' and self.i < len(self.blockRead)):
c = self.blockRead[self.i]
self.i += 1
if c == '"':
self.status = IN_NORMAL_TEXT
elif c == '\\':
self.status = IN_STRING_ESCAPE
# if we arrive here, then the dictionary is distributed over several blocks
# write block to StringIO but only if an opening { already found
if self.n_paren > 0:
if self.fullDictStr == None:
self.fullDictStr = io.StringIO()
self.fullDictStr.write(self.blockRead)
# read new block from input
self.blockRead = self.inF.read(self.blockSize)
self.i = 0
# reached end of input
if not self.blockRead:
return None
......@@ -48,246 +48,3 @@ def valueForStrValue(strValue, dtypeStr):
raise Exception("unexpected dtypeStr %s" % (dtypeStr))
except Exception as e:
raise Exception("Error when converting %r to dtypeStr %r" % (strValue, dtypeStr), e)
class JsonParseEventsWriterBackend(object):
"""Simple backend that writes out the parse events in json format"""
# json content is written to fileOut
def __init__(self, metaInfoEnv, fileOut = sys.stdout, writeMatchTelemetry=False):
self.__metaInfoEnv = metaInfoEnv
self.fileOut = fileOut
self.__gIndex = -1
self.__openSections = set()
self.__writeComma = False
self.__lastIndex = {}
self.writeMatchTelemetry = writeMatchTelemetry
self.stats = {}
def addStat(self, name):
self.stats[name] = self.stats.get(name, 0) + 1
def sendStats(self):
stats = {"parser":{"name":"fhi-aims", "version": "0.3"},
"data":self.stats}
url = 'https://nomad-dev.rz-berlin.mpg.de/parsers/addStat'
#url = 'http://127.0.0.1:8081/parsers/addStat'
data = json.dumps(stats, sort_keys=True)
req = urllib.request.Request(url, data)
response = urllib.request.urlopen(req)
the_page = response.read()
sys.stderr.write("stat sending did answer:" + the_page)
@staticmethod
def __numpyEncoder(self, o):
"""new default function for json class so that numpy arrays can be encoded"""
# check if object is a numpy array
if isinstance(o, np.ndarray):
# ensure that we have an array with row-major memory order (C like)
if not o.flags['C_CONTIGUOUS']:
o = np.ascontiguousarray(o)
return o.tolist()
# see default method in python/json/encoder.py
elif isinstance(o, set):
return list(sorted(o))
else:
raise TypeError(repr(o) + " is not JSON serializable")
def __jsonOutput(self, dic):
"""method to define format of json output"""
if self.__writeComma:
self.fileOut.write(", ")
else:
self.__writeComma = True
json.dump(dic, self.fileOut, indent = 2, separators = (',', ':'), sort_keys=True) # default = self.__numpyEncoder)
def startedParsingSession(self, mainFileUri, parserInfo, parserStatus = None, parserErrors = None):
"""should be called when the parsing starts, parserInfo should be a valid json dictionary"""
self.fileOut.write("{\n \"type\": \"nomad_parse_events_1_0\"")
self.sessionMainFileUri = mainFileUri
self.sessionParserInfo = parserInfo
self.sessionParserStatus = parserStatus
self.sessionParserErrors = parserErrors
if mainFileUri is not None:
self.fileOut.write(",\n \"mainFileUri\": " + json.dumps(mainFileUri, sort_keys=True))
if parserInfo is not None:
self.fileOut.write(",\n \"parserInfo\": " + json.dumps(parserInfo, indent = 2, separators = (',', ':'), sort_keys=True))
if parserStatus is not None:
self.fileOut.write(",\n \"parserStatus\": " + json.dumps(parserStatus, indent = 2, separators = (',', ':'), sort_keys=True))
if parserErrors is not None:
self.fileOut.write(",\n \"parserStatus\": " + json.dumps(parserErrors, indent = 2, separators = (',', ':'), sort_keys=True))
self.fileOut.write(""",
"events": [""")
def finishedParsingSession(self, parserStatus, parserErrors, mainFileUri = None, parserInfo = None,
parsingStats = None):
"""should be called when the parsing finishes"""
self.fileOut.write("]")
if mainFileUri is not None and self.sessionMainFileUri is None:
self.fileOut.write(",\n \"mainFileUri\": " + json.dumps(mainFileUri, sort_keys=True))
if parserInfo is not None and self.sessionParserInfo is None:
self.fileOut.write(",\n \"parserInfo\": " + json.dumps(parserInfo, indent = 2, separators = (',', ':'), sort_keys=True))
if parserStatus is not None and self.sessionParserStatus is None:
self.fileOut.write(",\n \"parserStatus\": " + json.dumps(parserStatus, indent = 2, separators = (',', ':'), sort_keys=True))
if parserErrors is not None and self.sessionParserErrors is None:
self.fileOut.write(",\n \"parserErrors\": " + json.dumps(parserErrors, indent = 2, separators = (',', ':'), sort_keys=True))
if parsingStats is not None:
self.fileOut.write(",\n \"parsingStats\": " + json.dumps(parsingStats, indent = 4, separators = (',', ':'), sort_keys=True))
self.fileOut.write("""
}""")
self.fileOut.flush()
def openContext(self, contextUri):
self.__jsonOutput({"event":"openContext", "nomadUri":contextUri})
def closeContext(self, contextUri):
self.__jsonOutput({"event":"closeContext", "nomadUri":contextUri})
def metaInfoEnv(self):
"""the metaInfoEnv this parser was optimized for"""
return self.__metaInfoEnv
def openSections(self):
"""returns the sections that are still open
sections are identified by metaName and their gIndex"""
return self.__openSections
def sectionInfo(self, metaName, gIndex):
"""returns information on a section (for debugging purposes)"""
if (metaName,gIndex) in self.__openSections:
return "section {} gIndex: {} is open".format(metaName, gIndex)
else:
return "section {} gIndex: {} is closed".format(metaName, gIndex)
def openSection(self, metaName):
"""opens a new section and returns its new unique gIndex"""
newIndex = self.__lastIndex.get(metaName, -1) + 1
self.openSectionWithGIndex(metaName, newIndex)
return newIndex
def openNonOverlappingSection(self, metaName):
"""opens a new non overlapping section"""
if any(x[0] == metaName for x in self.__openSections):
raise Exception("Section %s is not supposed to overlap" % metaName)
return self.openSection(metaName)
def openSectionWithGIndex(self, metaName, gIndex):
"""opens a new section where gIndex is generated externally
gIndex should be unique (no reopening of a closed section)"""
self.__lastIndex[metaName] = gIndex
self.__openSections.add((metaName, gIndex))
self.__jsonOutput({"event":"openSection",