From f3ca80af2073c3087fc14e6626eed7019d06ffd8 Mon Sep 17 00:00:00 2001
From: Berk Onat <b.onat@warwick.ac.uk>
Date: Fri, 8 Dec 2017 10:48:18 +0000
Subject: [PATCH] Fixed numpy array update at MetaInfoStorage and string to
 list conversion at SmartParser

---
 .../metainfo_storage/MetaInfoStorage.py       | 126 +++++++++++++-----
 .../smart_parser/SmartParserCommon.py         |  61 ++++++++-
 2 files changed, 152 insertions(+), 35 deletions(-)

diff --git a/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py b/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py
index d32489a..8fab71d 100644
--- a/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py
+++ b/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py
@@ -5,6 +5,7 @@ import logging
 import json
 import os
 import re
+import ast
 from collections import namedtuple
 
 COMMON_META_INFO_PATH = os.path.normpath(os.path.join(
@@ -15,7 +16,60 @@ PUBLIC_META_INFO_PATH = os.path.normpath(os.path.join(
     os.path.dirname(os.path.abspath(__file__)), 
     "../../../../../nomad-meta-info/meta_info/nomad_meta_info/public.nomadmetainfo.json"))
 
-NOTEXCEPT = re.compile(r'[^a-cf-zA-CF-Z!\.?,]')
+NOTEXCEPT = re.compile(r'[a-cf-zA-CF-Z!\?,{}\[\]]')
+    
+def is_number(val):
+        try:
+            float(val)
+            return True
+        except ValueError:
+            return False
+
+def strcleaner(val):
+        unwantedkeys = [ 
+                "system", "eval", "return",
+                "ctypes", "setup", "import", 
+                "git", "swig", "cython"]
+        for keyword in unwantedkeys:
+            val = val.replace(keyword, '')
+        return val
+
+def strisinstance(val, typ):
+        typlist=[]
+        if isinstance(typ, str):
+            typlist.append(typ)
+        elif isinstance(typ, (tuple,list)):
+            for ty in typ:
+                typlist.append(ty)
+        anytype=None
+        for t in typlist:
+            try:
+                if("list" in t and 
+                   "[" in val and 
+                   "]" in val and 
+                   "," in val):
+                    if isinstance(literal_eval(strcleaner(val)), list):
+                        anytype="list"
+                        break
+                    elif isinstance(literal_eval(strcleaner(val)), np.ndarray):
+                        anytype="np.ndarray"
+                        break
+                elif("tuple" in t and 
+                   "(" in val and 
+                   ")" in val and 
+                   "," in val):
+                    if isinstance(literal_eval(strcleaner(val)), tuple):
+                        anytype="tuple"
+                        break
+            except (TypeError,ValueError,AttributeError):
+                pass
+        return anytype
+
+def literal_eval(val):
+        try:
+            return ast.literal_eval(val)
+        except (TypeError,ValueError):
+            return val
 
 class Container(object):
     """The container class for nested data storage
@@ -299,19 +353,32 @@ class Container(object):
         return storeValue, updateValue, localdict
 
     def convertToNumber(self, updateValue, valtype):
-        if(isinstance(updateValue, list) or isinstance(updateValue, tuple)):
-            newUpdateValue = [eval(
-                valtype+"("+str(ival)+")"
-                ) for ival in updateValue]
-        elif isinstance(updateValue, np.ndarray):
-            newUpdateValue = np.asarray([eval(
-                valtype+"("+str(ival)+")"
-                ) for ival in updateValue])
-        elif self.is_number(updateValue):
-            newUpdateValue = eval(valtype+"("+str(updateValue)+")")
+        acceptvals = ["float", "int", "list", "tuple", 
+                      "np.asarray", "np.array", 
+                      "set", "boolean", "bytes"]
+        if valtype in acceptvals:
+            if(isinstance(updateValue, list) or isinstance(updateValue, tuple)):
+                try:
+                    newUpdateValue = [eval(
+                        valtype+"("+literal_eval(str(ival))+")"
+                        ) for ival in updateValue]
+                except (TypeError,ValueError):
+                    newUpdateValue = updateValue
+            elif isinstance(updateValue, np.ndarray):
+                try:
+                    newUpdateValue = np.asarray([eval(
+                        valtype+"("+literal_eval(str(ival))+")"
+                        ) for ival in updateValue])
+                except (TypeError,ValueError):
+                    newUpdateValue = updateValue
+            elif is_number(updateValue):
+                newUpdateValue = eval(valtype+"("+str(updateValue)+")")
         else:
             # I hope you know what you are doing
-            newUpdateValue = updateValue
+            try:
+                newUpdateValue = float(updateValue)
+            except (TypeError,ValueError):
+                newUpdateValue = updateValue
         return newUpdateValue
     
     def convertUnits(self, updateValue, unit, unitdict):
@@ -322,17 +389,20 @@ class Container(object):
         elif isinstance(updateValue, np.ndarray):
             updateValue = updateValue * self.unitConverter(
                 unit, unitdict)
-            #updateValue = np.asarray([self.unitConverter(
-            #    ival, unit, unitdict
-            #    ) for ival in updateValue])
-        elif self.is_number(updateValue):
+        elif is_number(updateValue):
             updateValue = self.convertToNumber(updateValue, "float")
-            updateValue = updateValue * self.unitConverter(
-                unit, unitdict)
-        else:
+            if updateValue:
+                updateValue = updateValue * self.unitConverter(
+                        unit, unitdict)
+        elif isinstance(updateValue, str):
             # I hope you know what you are doing
-            updateValue = float(updateValue) * self.unitConverter(
-                unit, unitdict)
+            try:
+                newUpdateVal = strcleaner(updateValue)
+                newUpdateVal = NOTEXCEPT.sub('', newUpdateVal)
+                updateValue = float(newUpdateVal) * self.unitConverter(
+                        unit, unitdict)
+            except (TypeError,ValueError):
+                pass
         return updateValue
 
     def unitConverter(self, unit, unitdict):
@@ -352,8 +422,11 @@ class Container(object):
         newunit = newunit.replace('-','*').replace(' ', '*').replace('^', "**")
         for key,value in unitdict.items():
             newunit = newunit.replace(str(key), str(value))
-        NOTEXCEPT.sub('', newunit)
-        return float(eval(newunit))
+        newunit = NOTEXCEPT.sub('', newunit)
+        try:
+            return float(eval(newunit))
+        except (ValueError,TypeError):
+            return None
 
     def checkTestsDicts(self, item, localdict):
         for depdict in item["depends"]:
@@ -519,13 +592,6 @@ class Container(object):
                     newValue = itemv["unitconverter"](self, itemv)
                     self.Storage.__dict__[itemk["val"]] = newvalue
 
-    def is_number(self, val):
-        try:
-            float(val)
-            return True
-        except ValueError:
-            return False
-
     def __str__(self, caller=None, decorate='', color=None, printactive=None, onlynames=None):
         string = ''
         if onlynames is None:
diff --git a/common/python/nomadcore/smart_parser/SmartParserCommon.py b/common/python/nomadcore/smart_parser/SmartParserCommon.py
index 318f743..38b191a 100644
--- a/common/python/nomadcore/smart_parser/SmartParserCommon.py
+++ b/common/python/nomadcore/smart_parser/SmartParserCommon.py
@@ -6,6 +6,7 @@ from nomadcore.simple_parser import mainFunction
 from nomadcore.simple_parser import SimpleMatcher as SM
 from nomadcore.metainfo_storage.MetaInfoStorage import COMMON_META_INFO_PATH, PUBLIC_META_INFO_PATH
 import nomadcore.metainfo_storage.MetaInfoStorage as mStore
+from nomadcore.metainfo_storage.MetaInfoStorage import strcleaner, strisinstance, literal_eval 
 from nomadcore.smart_parser.SmartParserDictionary import getDict_MetaStrInDict, getList_MetaStrInDict, get_unitDict
 from contextlib import contextmanager
 import numpy as np
@@ -150,6 +151,9 @@ class ParserBase(object):
             #r"^\s*%\s*%\s*$",
         ]
         self.coverageIgnore = None
+        self.strcleaner = strcleaner
+        self.strisinstance = strisinstance
+        self.literal_eval = literal_eval
 
     def parse(self):
         self.coverageIgnore = re.compile(r"^(?:" + r"|".join(self.coverageIgnoreList) + r")$")
@@ -983,31 +987,78 @@ class ParserBase(object):
                     matchWith = mNameDict[key].matchWith
                 else:
                     matchWith = ''
+                if mNameDict[key].replaceDict:
+                    replaceDict = mNameDict[key].replaceDict
+                else:
+                    replaceDict = None 
+                if mNameDict[key].subFunc:
+                    if isinstance(subFunc, str):
+                        subFunc = getattr(self,mNameDict[key].subFunc)
+                    else:
+                        subFunc = mNameDict[key].subFunc
+                else:
+                    subFunc = None
+                if mNameDict[key].addAsList:
+                    addAsList = mNameDict[key].addAsList
+                else:
+                    addAsList = None 
+                if mNameDict[key].appendToList:
+                    appendToList = getattr(self,mNameDict[key].appendToList)
+                else:
+                    appendToList = None
                 if 'EOL' in matchWith:
                     matchThisParsy = re.compile(r"(?:%s)\s*(?:=|:)\s*(?:'|\")?"
                                                  "(?P<%s>.*)(?:'|\")?\s*,?"
                                                  % (cText, key))
                 elif 'UD' in matchWith:
                     delimeter = matchWith.replace('UD', '')
-                    matchThisParsy = re.compile(r"(?:%s)\s*(?:=|:)\s*(?:'|\")?"
+                    matchThisParsy = re.compile(r"(?:%s)\s*(?:\s|=|:)\s*(?:'|\")?"
                                                  "(?P<%s>[\-+0-9.a-zA-Z:]+)"
                                                  "(?:'|\")?\s*[%s]" 
                                                  % (cText, key, delimeter))
-                else: # Default matchWith 'NW'
-                    matchThisParsy = re.compile(r"(?:%s)\s*(?:=|:)\s*(?:'|\"|{|\[|\()?"
+                elif 'BOL' in matchWith:
+                    matchThisParsy = re.compile(r"(?:'|\")?(?P<%s>.*)(?:'|\")?"
+                                                 "\s*(?:%s)\s*"
+                                                 % (key, cText))
+                elif 'PW' in matchWith:
+                    matchThisParsy = re.compile(r"(?:'|\")?(?P<%s>[\-+0-9.a-zA-Z:]+)"
+                                                 "(?:'|\")?\s*(?:%s)\s*"
+                                                 % (key, cText))
+                elif 'FD' in matchWith:
+                    delimeter = matchWith.replace('FD', '')
+                    matchThisParsy = re.compile(r"[%s]\s*(?:'|\")?"
                                                  "(?P<%s>[\-+0-9.a-zA-Z:]+)"
+                                                 "(?:'|\")?\s*(?:%s)\s*"
+                                                 % (delimeter, key, cText))
+                else: # Default matchWith 'NW'
+                    matchThisParsy = re.compile(r"(?:%s)\s*(?:\s|=|:)\s*(?:'|\"|{|\[|\()?"
+                                                 "(?P<%s>[\S]+)"
                                                  "(?:'|\"|}|\]|\))?\s*" 
                                                  % (cText, key))
+                                                 #"(?P<%s>[\-+0-9.a-zA-Z:_/;]+)"
                 reDict={key:value for value in matchThisParsy.findall(lastLine)}
                 if reDict:
                     for k,v in reDict.items():
                         if k == key: 
                             if isinstance(v, str):
-                                v=v.replace('{', '[').replace('}', ']')
+                                if replaceDict is not None:
+                                    for repK, repV in replaceDict.items():
+                                        v=v.replace(repK, repV)
+                            if subFunc is not None:
+                                v=subFunc(v)
                             if k in list(parser.lastMatch.keys()):
                                 parser.lastMatch[k]=v
                             else:
-                                mNameDict[k].value=v
+                                if addAsList is not None:
+                                    if mNameDict[k].value is None:
+                                        mNameDict[k].value=[v]
+                                    else:
+                                        if isinstance(mNameDict[k].value, list):
+                                            mNameDict[k].value.append(v)
+                                        else:
+                                            mNameDict[k].value=v
+                                else:
+                                    mNameDict[k].value=v
                                 mNameDict[k].activeInfo=True
                             parserDict.update({"parserSuccess"  : True})
 
-- 
GitLab