From f3ca80af2073c3087fc14e6626eed7019d06ffd8 Mon Sep 17 00:00:00 2001 From: Berk Onat <b.onat@warwick.ac.uk> Date: Fri, 8 Dec 2017 10:48:18 +0000 Subject: [PATCH] Fixed numpy array update at MetaInfoStorage and string to list conversion at SmartParser --- .../metainfo_storage/MetaInfoStorage.py | 126 +++++++++++++----- .../smart_parser/SmartParserCommon.py | 61 ++++++++- 2 files changed, 152 insertions(+), 35 deletions(-) diff --git a/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py b/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py index d32489a..8fab71d 100644 --- a/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py +++ b/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py @@ -5,6 +5,7 @@ import logging import json import os import re +import ast from collections import namedtuple COMMON_META_INFO_PATH = os.path.normpath(os.path.join( @@ -15,7 +16,60 @@ PUBLIC_META_INFO_PATH = os.path.normpath(os.path.join( os.path.dirname(os.path.abspath(__file__)), "../../../../../nomad-meta-info/meta_info/nomad_meta_info/public.nomadmetainfo.json")) -NOTEXCEPT = re.compile(r'[^a-cf-zA-CF-Z!\.?,]') +NOTEXCEPT = re.compile(r'[a-cf-zA-CF-Z!\?,{}\[\]]') + +def is_number(val): + try: + float(val) + return True + except ValueError: + return False + +def strcleaner(val): + unwantedkeys = [ + "system", "eval", "return", + "ctypes", "setup", "import", + "git", "swig", "cython"] + for keyword in unwantedkeys: + val = val.replace(keyword, '') + return val + +def strisinstance(val, typ): + typlist=[] + if isinstance(typ, str): + typlist.append(typ) + elif isinstance(typ, (tuple,list)): + for ty in typ: + typlist.append(ty) + anytype=None + for t in typlist: + try: + if("list" in t and + "[" in val and + "]" in val and + "," in val): + if isinstance(literal_eval(strcleaner(val)), list): + anytype="list" + break + elif isinstance(literal_eval(strcleaner(val)), np.ndarray): + anytype="np.ndarray" + break + elif("tuple" in t and + "(" in val and + ")" in val and + "," in val): + if isinstance(literal_eval(strcleaner(val)), tuple): + anytype="tuple" + break + except (TypeError,ValueError,AttributeError): + pass + return anytype + +def literal_eval(val): + try: + return ast.literal_eval(val) + except (TypeError,ValueError): + return val class Container(object): """The container class for nested data storage @@ -299,19 +353,32 @@ class Container(object): return storeValue, updateValue, localdict def convertToNumber(self, updateValue, valtype): - if(isinstance(updateValue, list) or isinstance(updateValue, tuple)): - newUpdateValue = [eval( - valtype+"("+str(ival)+")" - ) for ival in updateValue] - elif isinstance(updateValue, np.ndarray): - newUpdateValue = np.asarray([eval( - valtype+"("+str(ival)+")" - ) for ival in updateValue]) - elif self.is_number(updateValue): - newUpdateValue = eval(valtype+"("+str(updateValue)+")") + acceptvals = ["float", "int", "list", "tuple", + "np.asarray", "np.array", + "set", "boolean", "bytes"] + if valtype in acceptvals: + if(isinstance(updateValue, list) or isinstance(updateValue, tuple)): + try: + newUpdateValue = [eval( + valtype+"("+literal_eval(str(ival))+")" + ) for ival in updateValue] + except (TypeError,ValueError): + newUpdateValue = updateValue + elif isinstance(updateValue, np.ndarray): + try: + newUpdateValue = np.asarray([eval( + valtype+"("+literal_eval(str(ival))+")" + ) for ival in updateValue]) + except (TypeError,ValueError): + newUpdateValue = updateValue + elif is_number(updateValue): + newUpdateValue = eval(valtype+"("+str(updateValue)+")") else: # I hope you know what you are doing - newUpdateValue = updateValue + try: + newUpdateValue = float(updateValue) + except (TypeError,ValueError): + newUpdateValue = updateValue return newUpdateValue def convertUnits(self, updateValue, unit, unitdict): @@ -322,17 +389,20 @@ class Container(object): elif isinstance(updateValue, np.ndarray): updateValue = updateValue * self.unitConverter( unit, unitdict) - #updateValue = np.asarray([self.unitConverter( - # ival, unit, unitdict - # ) for ival in updateValue]) - elif self.is_number(updateValue): + elif is_number(updateValue): updateValue = self.convertToNumber(updateValue, "float") - updateValue = updateValue * self.unitConverter( - unit, unitdict) - else: + if updateValue: + updateValue = updateValue * self.unitConverter( + unit, unitdict) + elif isinstance(updateValue, str): # I hope you know what you are doing - updateValue = float(updateValue) * self.unitConverter( - unit, unitdict) + try: + newUpdateVal = strcleaner(updateValue) + newUpdateVal = NOTEXCEPT.sub('', newUpdateVal) + updateValue = float(newUpdateVal) * self.unitConverter( + unit, unitdict) + except (TypeError,ValueError): + pass return updateValue def unitConverter(self, unit, unitdict): @@ -352,8 +422,11 @@ class Container(object): newunit = newunit.replace('-','*').replace(' ', '*').replace('^', "**") for key,value in unitdict.items(): newunit = newunit.replace(str(key), str(value)) - NOTEXCEPT.sub('', newunit) - return float(eval(newunit)) + newunit = NOTEXCEPT.sub('', newunit) + try: + return float(eval(newunit)) + except (ValueError,TypeError): + return None def checkTestsDicts(self, item, localdict): for depdict in item["depends"]: @@ -519,13 +592,6 @@ class Container(object): newValue = itemv["unitconverter"](self, itemv) self.Storage.__dict__[itemk["val"]] = newvalue - def is_number(self, val): - try: - float(val) - return True - except ValueError: - return False - def __str__(self, caller=None, decorate='', color=None, printactive=None, onlynames=None): string = '' if onlynames is None: diff --git a/common/python/nomadcore/smart_parser/SmartParserCommon.py b/common/python/nomadcore/smart_parser/SmartParserCommon.py index 318f743..38b191a 100644 --- a/common/python/nomadcore/smart_parser/SmartParserCommon.py +++ b/common/python/nomadcore/smart_parser/SmartParserCommon.py @@ -6,6 +6,7 @@ from nomadcore.simple_parser import mainFunction from nomadcore.simple_parser import SimpleMatcher as SM from nomadcore.metainfo_storage.MetaInfoStorage import COMMON_META_INFO_PATH, PUBLIC_META_INFO_PATH import nomadcore.metainfo_storage.MetaInfoStorage as mStore +from nomadcore.metainfo_storage.MetaInfoStorage import strcleaner, strisinstance, literal_eval from nomadcore.smart_parser.SmartParserDictionary import getDict_MetaStrInDict, getList_MetaStrInDict, get_unitDict from contextlib import contextmanager import numpy as np @@ -150,6 +151,9 @@ class ParserBase(object): #r"^\s*%\s*%\s*$", ] self.coverageIgnore = None + self.strcleaner = strcleaner + self.strisinstance = strisinstance + self.literal_eval = literal_eval def parse(self): self.coverageIgnore = re.compile(r"^(?:" + r"|".join(self.coverageIgnoreList) + r")$") @@ -983,31 +987,78 @@ class ParserBase(object): matchWith = mNameDict[key].matchWith else: matchWith = '' + if mNameDict[key].replaceDict: + replaceDict = mNameDict[key].replaceDict + else: + replaceDict = None + if mNameDict[key].subFunc: + if isinstance(subFunc, str): + subFunc = getattr(self,mNameDict[key].subFunc) + else: + subFunc = mNameDict[key].subFunc + else: + subFunc = None + if mNameDict[key].addAsList: + addAsList = mNameDict[key].addAsList + else: + addAsList = None + if mNameDict[key].appendToList: + appendToList = getattr(self,mNameDict[key].appendToList) + else: + appendToList = None if 'EOL' in matchWith: matchThisParsy = re.compile(r"(?:%s)\s*(?:=|:)\s*(?:'|\")?" "(?P<%s>.*)(?:'|\")?\s*,?" % (cText, key)) elif 'UD' in matchWith: delimeter = matchWith.replace('UD', '') - matchThisParsy = re.compile(r"(?:%s)\s*(?:=|:)\s*(?:'|\")?" + matchThisParsy = re.compile(r"(?:%s)\s*(?:\s|=|:)\s*(?:'|\")?" "(?P<%s>[\-+0-9.a-zA-Z:]+)" "(?:'|\")?\s*[%s]" % (cText, key, delimeter)) - else: # Default matchWith 'NW' - matchThisParsy = re.compile(r"(?:%s)\s*(?:=|:)\s*(?:'|\"|{|\[|\()?" + elif 'BOL' in matchWith: + matchThisParsy = re.compile(r"(?:'|\")?(?P<%s>.*)(?:'|\")?" + "\s*(?:%s)\s*" + % (key, cText)) + elif 'PW' in matchWith: + matchThisParsy = re.compile(r"(?:'|\")?(?P<%s>[\-+0-9.a-zA-Z:]+)" + "(?:'|\")?\s*(?:%s)\s*" + % (key, cText)) + elif 'FD' in matchWith: + delimeter = matchWith.replace('FD', '') + matchThisParsy = re.compile(r"[%s]\s*(?:'|\")?" "(?P<%s>[\-+0-9.a-zA-Z:]+)" + "(?:'|\")?\s*(?:%s)\s*" + % (delimeter, key, cText)) + else: # Default matchWith 'NW' + matchThisParsy = re.compile(r"(?:%s)\s*(?:\s|=|:)\s*(?:'|\"|{|\[|\()?" + "(?P<%s>[\S]+)" "(?:'|\"|}|\]|\))?\s*" % (cText, key)) + #"(?P<%s>[\-+0-9.a-zA-Z:_/;]+)" reDict={key:value for value in matchThisParsy.findall(lastLine)} if reDict: for k,v in reDict.items(): if k == key: if isinstance(v, str): - v=v.replace('{', '[').replace('}', ']') + if replaceDict is not None: + for repK, repV in replaceDict.items(): + v=v.replace(repK, repV) + if subFunc is not None: + v=subFunc(v) if k in list(parser.lastMatch.keys()): parser.lastMatch[k]=v else: - mNameDict[k].value=v + if addAsList is not None: + if mNameDict[k].value is None: + mNameDict[k].value=[v] + else: + if isinstance(mNameDict[k].value, list): + mNameDict[k].value.append(v) + else: + mNameDict[k].value=v + else: + mNameDict[k].value=v mNameDict[k].activeInfo=True parserDict.update({"parserSuccess" : True}) -- GitLab