diff --git a/common/python/nomadcore/md_data_access/MDDataAccess.py b/common/python/nomadcore/md_data_access/MDDataAccess.py index 09fddf2f11979aaf5b8d592142f057fdfd0d73ee..806a692cb326a64dbb18b0010360138f7ae649aa 100644 --- a/common/python/nomadcore/md_data_access/MDDataAccess.py +++ b/common/python/nomadcore/md_data_access/MDDataAccess.py @@ -25,7 +25,11 @@ with warnings.catch_warnings(record=True) as w: import MDAnalysis.core.universe as mda_u import MDAnalysis.coordinates as mda_c import panedr +import parmed as pmd import pymolfile as pym +import io +import struct +import copy mda_coordinates_modules = tuple(x[1] for x in inspect.getmembers(mda_c,inspect.ismodule)) logger = logging.getLogger(__name__) @@ -66,6 +70,87 @@ ELEMENTS_MASS_TABLE = { "Zn" : 65.37000, "Zr" : 91.224 } +TEXTCHARS = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100)) - {0x7f}) + +def is_file_binary(fName, checkBytes=None): + testin=None + if checkBytes is None: + checkBytes = 1024 + try: + with open(fName, 'rb') as fin: + testin = fin.read(checkBytes) + except(FileNotFoundError, IOError): + pass + if testin: + if is_binary_string(testin): + return True + else: + return False + else: + return None + +def is_binary_string(inbytes): + return bool(inbytes.translate(None, TEXTCHARS)) + +def bytes_per_record(record_type): + + if record_type == 'f': return 4 + if record_type == 'd': return 8 + if record_type == 'i': return 4 + if record_type == 's': return 1 + if record_type == 's4': return 4 + + # Default we read 1 byte + return 1 + +def read_start_marker(f): + # Read Start Marker + try: + struct.unpack('i', f.read(4)) + except(struct.error): + pass + +def read_end_marker(f): + # Read End Marker + try: + struct.unpack('i', f.read(4)) + except(struct.error): + pass + +def read_record(f, records, marker=False): + + if marker: + # Read Start Marker + read_start_marker(f) + + res=[] + for record_type, length, smarker, emarker in records: + if smarker: + # Read Start Marker + read_start_marker(f) + x=[] + if length<0: + length= res[-length-1][0] + if 's' in record_type: + record_type = 's' + for i in range(length): + try: + x.append(struct.unpack(record_type, f.read(bytes_per_record(record_type)))[0]) + except(struct.error): + return None + if 's' in record_type: + x = ''.join([ s.decode('utf-8') for s in x]) + res.append(x) + if emarker: + # Read End Marker + read_end_marker(f) + + if marker: + # Read End Marker + read_end_marker(f) + + return res + def get_any_element_name(atomname): elementlist = ELEMENTS_MASS_TABLE.keys() # check if the element is in list @@ -113,9 +198,123 @@ def get_element_name(atomname): if len(atomname)<3: return atomname[0:1] else: - return get_any_element_name() + return atomname.upper() return None +def charmm_stream_reader(slines, topo=None): + if topo is None: + topo = pmd.charmm.CharmmParameterSet() + + lines = [] + title = [] + command = [] + + return topo + +def charmm_coor_reader(fname, ftextmem=None): + coorDict = None + if ftextmem is not None: + title = [] + natoms = 0 + countatoms = 0 + atomlist = [] + for line in ftextmem: + # If the line is a title line + # add it to title list + if(line.startswith('*') or 'TITLE>' in line.upper()): + title.append(line) + # else start readinf COOR format + else: + if natoms<1: + natoms = int(line.split()[0]) + else: + if countatoms<natoms: + atomlist.append(line.split()) + countatoms+=1 + if coorDict is None: + coorDict = {} + coorDict.update({'binary':False}) + coorDict.update({'title':title}) + coorDict.update({'numatoms':natoms}) + coorDict.update({'atomlist':atomlist}) + coords=np.asarray(atomlist) + if len(coords[0,:])>6: + coorDict.update({'coords':np.asarray([[[float(x) for x in v] for v in coords[:,4:7]]])}) + else: + coorDict.update({'coords':np.asarray([[[float(x) for x in v] for v in coords[:,3:6]]])}) + else: + if is_file_binary(fname): + try: + with open(fname, 'rb') as fin: + hdr, icntrl = read_record(f=fin, records=[ + ('s', 4, True, False), ('i', 20, False, True)]) + if hdr == 'COOR': + ntitl = read_record(f=fin, records=[('i', 1, True, False)]) + ntitl=ntitl[0][0] + for t in range(int(ntitl)): + title = read_record(f=fin, records=[('s', 80, False, False)]) + endtitle = read_record(f=fin, records=[('i', 1, True, False)]) + natoms = read_record(f=fin, records=[('i', 1, False, False)]) + natoms = natoms[0][0] + xt = read_record(f=fin, records=[('i', 1, False, False)]) + wx = read_record(f=fin, records=[('f', natoms, True, True)]) + wy = read_record(f=fin, records=[('f', natoms, True, True)]) + wz = read_record(f=fin, records=[('f', natoms, True, True)]) + ww = read_record(f=fin, records=[('f', natoms, True, True)]) + if ww is None: + ww = [0.0 for atm in range(natoms)] + if coorDict is None: + coorDict = {} + coords=[] + for r in range(len(wx)): + coords.append(np.column_stack([wx[r],wy[r],wz[r]])) + coorDict.update({'binary':True}) + coorDict.update({'title':title}) + coorDict.update({'numatoms':natoms}) + coorDict.update({'xt':xt}) + coorDict.update({'coords':np.asarray(coords)}) + coorDict.update({'ww':ww}) + except(FileNotFoundError, IOError): + pass + else: + try: + with open(fname, 'r') as fin: + title = [] + natoms = 0 + countatoms = 0 + atomlist = [] + for line in fin: + # If the line is a title line + # add it to title list + if(line.startswith('*') or + line.startswith('TITLE')): + title.append(line) + # else start readinf COOR format + else: + if natoms<1: + try: + natoms = int(line.split()[0]) + except(ValueError): + return coorDict + else: + if countatoms<natoms: + atomlist.append(line.split()) + countatoms+=1 + if coorDict is None: + coorDict = {} + coorDict.update({'binary':False}) + coorDict.update({'title':title}) + coorDict.update({'numatoms':natoms}) + coorDict.update({'atomlist':atomlist}) + coords=np.asarray(atomlist) + if len(coords[0,:])>6: + coorDict.update({'coords':np.asarray([[[float(x) for x in v] for v in coords[:,4:7]]])}) + else: + coorDict.update({'coords':np.asarray([[[float(x) for x in v] for v in coords[:,3:6]]])}) + except(FileNotFoundError, IOError): + pass + return coorDict + def get_dir_base_extension(file_name): """ Splits directory, file base and file extensions @@ -127,6 +326,569 @@ def get_dir_base_extension(file_name): file_dir = os.path.dirname(file_name) return file_dir, file_base, file_extension +def pmdConvertTopoDict(topoStor, topoPMD): + """ Function to convert ParmEd topology info + from CharmmPsfFile and CharmmParameterSet + to MDDataAccess + + """ + topo=None + if(isinstance(topoPMD, pmd.charmm.CharmmParameterSet) or + isinstance(topoPMD, pmd.charmm.CharmmPsfFile) or + (topoPMD is None and topoStor.charmmcoor is not None) or + topoStor.charmmcoortopo is not None): + topo = topoPMD + + def getatomall(atom): + atmid=0 + atmname = atom[0] + atmtyp = atom[1] + if atom[10]: + atmmass = float(atom[10]) + else: + atmmass = None + if atom[3]: + atmresid = atom[3] + atmres = atom[2] + else: + atmresid = None + atmres = None + if atom[4]: + atmsegid = atom[4] + else: + atmsegid = None + if atom[11]: + atmchrg = float(atom[11]) + else: + atmchrg = None + if atom[12]: + atmrad = float(atom[12]) + else: + atmrad = None + if atom[9]: + atmbfac = float(atom[9]) + else: + atmbfac = None + atm_unique = '' + if atmname: + atm_unique = atm_unique + atmname + if atmtyp: + atm_unique = atm_unique + "-" + atmtyp + if atom[16]: + aeps = float(atom[16]) + else: + aeps = None + if atom[17]: + arm = float(atom[17]) + else: + arm = None + if atom[18]: + aeps14 = float(atom[18]) + else: + aeps14 = None + if atom[19]: + arm14 = float(atom[19]) + else: + arm14 = None + if atom[20]: + anb = [[x,float(y)] for x,y in atom[20]] + else: + anb = None + #if atmres: + # atm_unique = atm_unique + "-" + atmres + #if atmsegid: + # atm_unique = atm_unique + "-" + atmsegid + return [atmid,atm_unique,atmname, + atmtyp,atmres,atmresid,atmsegid, + atmmass,atmchrg,atmrad,atmbfac, + aeps,arm,aeps14,arm14,anb,None] + + def checkatomsdiff(atom1,atom2,checklist): + index=0 + target=len(checklist) + for t in checklist: + if atom1[t] is not atom2[t]: + index += 1 + if index==target: + return True + else: + return False + + def atom_respairs(atom1,atom2): + return [get_atomres(atom1),get_atomres(atom2)] + + def atompairs(atom1,atom2): + return [getatom(atom1),getatom(atom2)] + + def atompairids(atom1,atom2): + return [atom1[0],atom2[0]] + + structure=None + segmentList = None + residueList = None + atom_element_list = [] + nbepsList = None + nbrList = None + nbeps14List = None + nbr14List = None + nbfixList = None + # If topo is a PsfFile, the parameter files were not supplied. + # Hence, we can only extract topo info from Psf class in topo. + if isinstance(topoPMD, pmd.charmm.CharmmPsfFile): + structure = [[x.name, x.type, x.residue.name, + x.residue._idx, x.residue.segid, + x.residue.chain, x.altloc, + x.irotat, x.occupancy, x.bfactor, x.mass, + x._charge, x.solvent_radius, x.atomic_number, + x.tree, x.atom_type.name, x.atom_type.epsilon, + x.atom_type.rmin, x.atom_type.epsilon_14, + x.atom_type.rmin_14, x.atom_type.nbfix] for x in topoPMD.atoms] + chainList = [a[5] for a in structure] + segmentList = [a[4] for a in structure] + residList = [a[3] for a in structure] + residueList = [a[2] for a in structure] + nbepsList = [a[16] for a in structure] + nbrList = [a[17] for a in structure] + nbeps14List = [a[18] for a in structure] + nbr14List = [a[19] for a in structure] + nbfixList = [a[20] for a in structure] + atomList = structure + atomAllList = [getatomall(a) for a in structure] + atom_name_list = [a[0] for a in structure] + #for ai, atom in enumerate(structure): + # if atom[2] in ELEMENTS_MASS_TABLE.keys(): + # element = atom[2] + # else: + # element = get_element_name(atom[2]) + # if element is None: + # element = atom[1] + # atom_element_list.append(element) + # If topo is ParSet, the parameter file or files might be supplied. + # If Psf file is also supplied with the parameters, we can extract + # full information of the topology with parameter values. + # However, if there is no Psf file and user only supplied COOR CARD + # or CRD file, than we can only extract atom definitions through + # these two files and we try to fill in the blanks of the topology + # puzzle in CHARMM with parameter set class in ParmEd. + elif(isinstance(topoPMD, pmd.charmm.CharmmParameterSet) and + (topoStor.charmmcoor is not None or + topoStor.charmmcoortopo is not None)): + coortopo=False + #ATOMNO RESNO RES TYPE X Y Z SEGID RESID Weighting + if topoStor.charmmcoortopo is not None: + if len(topoStor.charmmcoortopo[0])>8: + structure = [[at[3], at[2], at[2], + at[1], at[7], at[8], + None, None, None, + None, None, None, + None, None, None, + None, None, None, + None, None, None] for at in topoStor.charmmcoortopo] + else: + structure = [[at[3], at[2], at[2], + at[1], None, None, + None, None, None, + None, None, None, + None, None, None, + None, None, None, + None, None, None] for at in topoStor.charmmcoortopo] + coortopo=True + elif topoStor.charmmcoor is not None: + if 'atomlist' in topoStor.charmmcoor: + topostruct = topoStor.charmmcoor['atomlist'] + if len(topostruct[0])>8: + structure = [[at[3], at[2], at[2], + at[1], at[7], at[8], + None, None, None, + None, None, None, + None, None, None, + None, None, None, + None, None, None] for at in topostruct] + else: + structure = [[at[3], at[2], at[2], + at[1], None, None, + None, None, None, + None, None, None, + None, None, None, + None, None, None, + None, None, None] for at in topostruct] + coortopo=True + if coortopo is True: + param_types = [[topoPMD.atom_types[at].name, topoPMD.atom_types[at].epsilon, + topoPMD.atom_types[at].rmin, topoPMD.atom_types[at].epsilon_14, + topoPMD.atom_types[at].rmin_14, topoPMD.atom_types[at].nbfix] for at in topoPMD.atom_types] + nbond_params = {} + for par in param_types: + nbond_params.update({par[0]:par[1:6]}) + structstor = structure[:] + structure = [] + for ai, at in enumerate(structstor): + structure.append(at) + structure[ai].extend([None]) + if at[2] in nbond_params: + structure[ai].extend(nbond_params[at[2]][0:4]) + if nbond_params[at[2]][4]: + structure[ai].extend(nbond_params[at[2]][4]) + else: + structure[ai].extend([None]) + chainList = [a[5] for a in structure] + segmentList = [a[4] for a in structure] + residList = [a[3] for a in structure] + residueList = [a[2] for a in structure] + nbepsList = [a[16] for a in structure] + nbrList = [a[17] for a in structure] + nbeps14List = [a[18] for a in structure] + nbr14List = [a[19] for a in structure] + nbfixList = [a[20] for a in structure] + atomList = structure + atomAllList = [getatomall(a) for a in structure] + atom_name_list = [a[0] for a in structure] + #for ai, atom in enumerate(structure): + # if atom[2] in ELEMENTS_MASS_TABLE.keys(): + # element = atom[2] + # else: + # element = get_element_name(atom[2]) + # if element is None: + # element = atom[1] + # atom_element_list.append(element) + + # There is also one possibility that we do not have + # Psf or parameter set but only COOR info. If this is + # the case and the info is not from a binary CRD file, + # we can only extract names of residues, atom kinds + # and types from the coordinate card info and + elif(topoStor.charmmcoor is not None or + topoStor.charmmcoortopo is not None): + coortopo=False + #ATOMNO RESNO RES TYPE X Y Z SEGID RESID Weighting + if topoStor.charmmcoortopo is not None: + if len(topoStor.charmmcoortopo[0])>8: + structure = [[at[3], at[2], at[2], + at[1], at[7], at[8], + None, None, None, + None, None, None, + None, None, None, + None, None, None, + None, None, None] for at in topoStor.charmmcoortopo] + else: + structure = [[at[3], at[2], at[2], + at[1], None, None, + None, None, None, + None, None, None, + None, None, None, + None, None, None, + None, None, None] for at in topoStor.charmmcoortopo] + coortopo=True + elif topoStor.charmmcoor is not None: + if 'atomlist' in topoStor.charmmcoor: + topostruct = topoStor.charmmcoor['atomlist'] + if len(topostruct[0])>8: + structure = [[at[3], at[2], at[2], + at[1], at[7], at[8], + None, None, None, + None, None, None, + None, None, None, + None, None, None, + None, None, None] for at in topostruct] + else: + structure = [[at[3], at[2], at[2], + at[1], None, None, + None, None, None, + None, None, None, + None, None, None, + None, None, None, + None, None, None] for at in topostruct] + coortopo=True + if coortopo is True: + chainList = [a[5] for a in structure] + segmentList = [a[4] for a in structure] + residList = [a[3] for a in structure] + residueList = [a[2] for a in structure] + atomList = topo.structure + atomAllList = [getatomall(a) for a in structure] + atom_name_list = [a[0] for a in structure] + #for ai, atom in enumerate(structure): + # if atom[2] in ELEMENTS_MASS_TABLE.keys(): + # element = atom[2] + # else: + # element = get_element_name(atom[2]) + # if element is None: + # element = atom[1] + # atom_element_list.append(element) + + if structure is None: + return topoStor + + count=0 + for i in range(len(atomAllList)): + atomAllList[i][0]=count + count+=1 + + types_list = list(set([a[1] for a in atomAllList])) + #atom_type_list = [a[3] for a in atomList] + + atom_all_list = np.asarray(atomAllList) + atom_names = atom_all_list[:,2] + atom_types = atom_all_list[:,3] + atom_masses = atom_all_list[:,7] + atom_charges = atom_all_list[:,8] + atom_radiuses = atom_all_list[:,9] + atom_bfactors = atom_all_list[:,10] + #print("Atom Bonds:",topo.bonds) + #print("Atom Angles:",topo.angles) + #print("Atom Dihedrals:",topo.dihedrals) + #print("Atom Impropers:",topo.impropers) + + + atom_type_dict = {} + atomtypesDict = {} + atomnameDict = {} + massDict = {} + elementDict = {} + radiusDict = {} + chargeDict = {} + bfactorDict = {} + epsilonDict = {} + rminDict = {} + epsilon14Dict = {} + rmin14Dict = {} + nbfixDict = {} + atomlabelList = [] + atomlabelDict = [] + for ielm in range(len(types_list)): + elm = types_list[ielm] + atom_type_dict.update({elm : ielm+1}) + typelabelList = [] + for ai, atom in enumerate(atomAllList): + if atom[2] in ELEMENTS_MASS_TABLE.keys(): + element = atom[2] + else: + element = get_element_name(atom[2]) + if element is None: + element = atom[1] + if elm == atom[1]: + elementDict.update({elm : element}) + atomnameDict.update({elm : atom[2]}) + atomtypesDict.update({elm : atom[3]}) + massDict.update({elm : atom[7]}) + chargeDict.update({elm : atom[8]}) + radiusDict.update({elm : atom[9]}) + bfactorDict.update({elm : atom[10]}) + epsilonDict.update({elm : atom[11]}) + rminDict.update({elm : atom[12]}) + epsilon14Dict.update({elm : atom[13]}) + rmin14Dict.update({elm : atom[14]}) + nbfixDict.update({elm : atom[15]}) + typelabelList.append(atom[0]) + atomAllList[ai][16]=element + atomlabelDict.append(typelabelList) + + for ai, atom in enumerate(atomAllList): + atom_element_list.append(atom[16]) + + for atom in atomAllList: + atomlabelList.append([atom[0],atom_type_dict[atom[1]]]) + + system_name = '' + if atom_element_list: + system_name = system_name + ''.join([ + el + str(atom_element_list.count(el)) for el in set( + atom_element_list)]) + if segmentList is not None: + segsysname = '-'.join([str(seg) for seg in list( + set(segmentList)) if seg is not None]) + if segsysname: + system_name = system_name + '-' + segsysname + if residueList is not None: + ressysname = '-'.join([str(res) for res in list( + set(residueList)) if res is not None]) + if ressysname: + system_name = system_name + '-' + ressysname + + massList = list(massDict.values()) + atom_type_list = list(atomtypesDict.values()) + name_list = list(atomnameDict.values()) + elementList = list(elementDict.values()) + radiusList = list(radiusDict.values()) + chargesList = list(chargeDict.values()) + bfactorList = list(bfactorDict.values()) + + if isinstance(topoPMD, pmd.charmm.CharmmPsfFile): + if topoPMD.bonds[0].type is not None: + for bi, bondtype in enumerate(topoPMD.bonds[0].type.list): + topoPMD.bonds[0].type.list[bi]._idx=bi + topo_bond_list = np.asarray([ + [x.atom1._idx, x.atom2._idx, x.type._idx, + x.type.k, x.type.req] for x in topoPMD.bonds + ]) + else: + topo_bond_list = np.asarray([ + [x.atom1._idx, x.atom2._idx] for x in topoPMD.bonds + ]) + topbList = topo_bond_list[:,0:2] + elif(isinstance(topoPMD, pmd.charmm.CharmmParameterSet) and + (topoStor.charmmcoor is not None or + topoStor.charmmcoortopo is not None)): + if topoPMD.bond_types is not None: + bi=0 + search_types = list(set(atom_type_list)) + search_names = search_types[:] + search_names.extend(list(set(atom_name_list))) + search_names.extend(list(set(elementList))) + search_names = list(set(search_names)) + search_bonds = [] + for bondtype in topoPMD.bond_types: + for typ in search_names: + if typ in bondtype[0] and bondtype[1] in search_types: + topoPMD.bond_types[bondtype]._idx=bi + search_bonds.append(bondtype) + bi+=1 + elif typ in bondtype[1] and bondtype[0] in search_types: + topoPMD.bond_types[bondtype]._idx=bi + search_bonds.append(bondtype) + bi+=1 + bond_found_list = [] + bond_found_names = [] + # Full bond search, the worst case here. + for btyp in search_bonds: + for ati, at1 in enumerate(atomAllList[0:len(atomAllList)]): + for at2 in atomAllList[ati:len(atomAllList)]: + if at1[5] != at2[5]: # We assume bonds are in same residue since + continue # COOR file only includes residue ids for each atom + if at1[0] == at2[0]: + continue + if((at1[2] in btyp[0] or at1[3] in btyp[0] or at1[16] in btyp[0]) and + (at2[2] in btyp[1] or at2[3] in btyp[1] or at2[16] in btyp[1])): + if(str(at1[0])+'-'+str(at2[0]) in bond_found_names or + str(at2[0])+'-'+str(at1[0]) in bond_found_names): + continue + bond_found_list.append([at1[0], at2[0], topoPMD.bond_types[btyp]._idx, + topoPMD.bond_types[btyp].k,topoPMD.bond_types[btyp].req]) + bond_found_names.append(str(at1[0])+'-'+str(at2[0])) + bond_found_names.append(str(at2[0])+'-'+str(at1[0])) + if((at1[2] in btyp[1] or at1[3] in btyp[1] or at1[16] in btyp[1]) and + (at2[2] in btyp[0] or at2[3] in btyp[0] or at2[16] in btyp[0])): + if(str(at1[0])+'-'+str(at2[0]) in bond_found_names or + str(at2[0])+'-'+str(at1[0]) in bond_found_names): + continue + bond_found_list.append([at1[0], at2[0], topoPMD.bond_types[btyp]._idx, + topoPMD.bond_types[btyp].k,topoPMD.bond_types[btyp].req]) + bond_found_names.append(str(at1[0])+'-'+str(at2[0])) + bond_found_names.append(str(at2[0])+'-'+str(at1[0])) + + topo_bond_list = np.asarray(bond_found_list) + if len(topo_bond_list)>0: + topbList = topo_bond_list[:,0:2] + else: + topbList = [] + else: + topbList = [] + #if topoPMD.angle_types is not None: + # for angletype in topoPMD.angle_types: + # for typ in set(atom_type_list): + # if typ in angletype: + # print("PMDTOPO: angles:",angletype, topoPMD.angle_types[angletype].__dict__) + #if topoPMD.dihedral_types is not None: + # for dihtype in topoPMD.dihedral_types: + # for typ in set(atom_type_list): + # if typ in dihtype: + # print("PMDTOPO: dihedrals:",dihtype, topoPMD.dihedral_types[dihtype].__dict__) + #if topoPMD.improper_types is not None: + # for imptype in topoPMD.improper_types: + # for typ in set(atom_type_list): + # if typ in imptype: + # print("PMDTOPO: impropers:",imptype, topoPMD.improper_types[imptype].__dict__) + else: + topbList = [] + + topbNames = [] + interDict = {} + interTypeDict = {} + topbCont=False + if len(topbList)>0: + topbCont=True + + if topbCont: + for pair in topbList: + topbNames.append(atomAllList[int(pair[0])][1] + '-' + atomAllList[int(pair[1])][1]) + + topb=list(set(topbNames)) + interNum = 0 + for tb in topb: + bondList = [] + typeList = [] + noninter = True + bc = 0 + for bb in topbList: + b=[int(x) for x in bb] + topt=atomAllList[b[0]][1] + '-' + atomAllList[b[1]][1] + if topt == tb: + reslist=[atomAllList[b[0]][4],atomAllList[b[1]][4]] + atmlist=[atomAllList[b[0]],atomAllList[b[1]]] + atmidlist=[b[0],b[1]] + bondList.append(atmidlist) + interDict.update({interNum : bondList}) + if noninter: + noninter = False + typeList.extend(list([ + atom_type_dict[atmlist[0][1]], + atom_type_dict[atmlist[1][1]] + ])) + interTypeDict.update({interNum : typeList}) + interNum += 1 + +# for ielm in range(len(atom_type_list)-1): +# for jelm in range(ielm+1, len(atom_type_list)): +# aelm = atom_type_list[ielm] +# belm = atom_type_list[jelm] +# bondList = [] +# typeList = [] +# bondid = 0 +# noninter = True +# for key in topdk: +# topt = topd[key] +# for b in topt: +# reslist=atom_respairs(b.atoms[0],b.atoms[1]) +# atmlist=atompairs(b.atoms[0],b.atoms[1]) +# atmidlist=atompairids(b.atoms[0],b.atoms[1]) +# if((aelm == str(atmlist[0][2]) and belm == str(atmlist[1][2])) or +# (aelm == str(atmlist[1][2]) and belm == str(atmlist[0][2]))): +# bondList.append(atmidlist) +# interDict.update({interNum : bondList}) +# if noninter: +# noninter = False +# typeList.extend(list([ +# atom_type_dict[aelm], +# atom_type_dict[belm] +# ])) +# interTypeDict.update({interNum : typeList}) +# interNum += 1 +# bondid += 1 + + #atomIndex = np.arange(len(residueList)) + #atom_to_residue = np.zeros((len(residueList), 2), dtype=int) + #atom_to_residue[:,0] = atomIndex+1 + #atom_to_residue[:,1] = np.array(residueList)+1 + + + topoStor.topoDict.update({"system_name" : system_name}) + topoStor.topoDict.update({"atom_name_list" : atom_name_list}) + topoStor.topoDict.update({"atom_label_list" : atomlabelList}) + topoStor.topoDict.update({"name_list" : name_list}) + topoStor.topoDict.update({"types_list" : types_list}) + topoStor.topoDict.update({"atom_type_list" : atom_type_list}) + topoStor.topoDict.update({"atom_mass_list" : massList}) + topoStor.topoDict.update({"atom_element_list" : atom_element_list}) + topoStor.topoDict.update({"element_list" : elementList}) + topoStor.topoDict.update({"atom_radius_list" : radiusList}) + topoStor.topoDict.update({"atom_charge_list" : chargesList}) + topoStor.topoDict.update({"interactions_dict" : interDict}) + topoStor.topoDict.update({"interactions_type_dict" : interTypeDict}) + topoStor.topoDict.update({"mol_list" : residueList}) + #topoStor.topoDict.update({"atom_to_mol_list" : atom_to_residue}) + topoStor.topoDict.update({"residue_list" : residueList}) + return topoStor + def pymConvertTopoDict(topoStor, topoPYM): """ Function to convert Pymolfile topology info to MDDataAccess @@ -233,10 +995,18 @@ def pymConvertTopoDict(topoStor, topoPYM): atom_element_list.append(element) system_name = '' - if segmentList: - system_name = system_name + '-'.join(list(set(segmentList))) - if residueList: - system_name = system_name + '-'.join(list(set(residueList))) + if atom_element_list: + system_name = system_name + ''.join([ + el + str(atom_element_list.count(el)) for el in set( + atom_element_list)]) + if segmentList is not None: + system_name = system_name + '-' + '-'.join([ + str(seg) for seg in list( + set(segmentList)) if seg is not None]) + if residueList is not None: + system_name = system_name + '-' + '-'.join([ + str(res) for res in list( + set(residueList)) if res is not None]) atom_all_list = np.asarray(atomAllList) atom_names = atom_all_list[:,2] @@ -439,7 +1209,15 @@ def mdtConvertTopoDict(topoStor, topoMDT): atom_to_residue[:,0] = atomIndex+1 atom_to_residue[:,1] = np.array(residueList)+1 - system_name = '-'.join(list(set(topologyDict["resName"]))) + system_name = '' + if atom_element_list: + system_name = system_name + ''.join([ + el + str(atom_element_list.count(el)) for el in set( + atom_element_list)]) + if residueList is not None: + system_name = system_name + '-' + '-'.join([ + str(res) for res in list( + set(topologyDict["resName"])) if res is not None]) topoStor.topoDict.update({"system_name" : system_name}) topoStor.topoDict.update({"types_list" : types_list}) @@ -575,10 +1353,18 @@ def mdaConvertTopoDict(topoStor, topoMDA): atom_element_list.append(element) system_name = '' - if segmentList: - system_name = system_name + '-'.join(list(set(segmentList))) - elif residueList: - system_name = system_name + '-'.join(list(set(residueList))) + if atom_element_list: + system_name = system_name + ''.join([ + el + str(atom_element_list.count(el)) for el in set( + atom_element_list)]) + if segmentList is not None: + system_name = system_name + '-' + '-'.join([ + str(seg) for seg in list( + set(segmentList)) if seg is not None]) + if residueList is not None: + system_name = system_name + '-' + '-'.join([ + str(res) for res in list( + set(residueList)) if res is not None]) attrlist = dir(topo.atoms) atom_names = [] @@ -839,12 +1625,15 @@ class MDDataTopology(MDDataCommon): """ def __init__(self, system_name = "", + _topology = {}, atoms = None, bonds = None, - dihedrals = None, - impropers = None, - residues = None, + dihedrals = [], + impropers = [], + residues = [], n_atoms = None, + _numAtoms = None, + numAtoms = None, n_bonds = None, n_dihedrals = None, n_impropers = None, @@ -860,6 +1649,9 @@ class MDDataTopology(MDDataCommon): residue_types = None, residue_labels = None, atom_in_mol = None, + charmmcoor = None, + charmmpsf = None, + charmmcoortopo = None, topoDict = {} ): pass @@ -869,9 +1661,11 @@ class MDDataTrajectory(MDDataCommon): velocities and forces, if they exist. """ def __init__(self, + title = None, natoms = None, positions = None, forces = None, + time = None, velocities = None, unitcell_vectors = None, unitcell_lengths = None, @@ -989,6 +1783,8 @@ class MDDataAccess(object): self.topocode = None # To explicitly define the parsing library (pymolfile, mdtraj, ASE ...) for topology files self.topohandler = None # The object parsing the topology file self.topo_n_atoms = None # Number of atoms at topology file + self.topostream = None # List of lines in a text stream of topology + self.topocharmmpsf = None # List of lines in a text stream of topology self.topology = None def init_traj(self): @@ -998,6 +1794,7 @@ class MDDataAccess(object): self.trajiter = None # The object parsing the trajectory file self.trajcode = None # To explicitly define the parsing library (pymolfile, mdtraj, ASE ...) for trajectory files self.trajfile = None # File name of the trajectory file with path if needed + self.trajstream = None # List of lines in a text stream of input trajectory self.trajectory = None def init_incoord(self): @@ -1008,6 +1805,7 @@ class MDDataAccess(object): self.incoorditer = None # The object parsing the trajectory file self.incoordcode = None # To explicitly define the parsing library (pymolfile, mdtraj, ASE ...) for trajectory files self.incoordfile = None # File name of the trajectory file with path if needed + self.incoordstream = None # List of lines in a text stream of input trajectory self.inputpositions = None def init_outcoord(self): @@ -1018,6 +1816,7 @@ class MDDataAccess(object): self.outcoorditer = None # The iteration at the output file self.outcoordcode = None # To explicitly define the parsing library (pymolfile, mdtraj, ASE ...) for the files self.outcoordfile = None # File name of the file with path if needed + self.outcoordstream = None # List of lines in a text stream of input trajectory self.outputpositions = None def init_thermo(self): @@ -1060,9 +1859,9 @@ class MDDataAccess(object): """Loads the file handles for trajectory and/or topology """ - if self.topohandler is None: - if self.topofile: - self.check_topology_format_support() + #if self.topohandler is None: + # if self.topofile: + self.check_topology_format_support() if self.trajhandler is None: self.check_trajectory_format_support("traj") @@ -1073,9 +1872,13 @@ class MDDataAccess(object): """Loads the file handles for topology only """ - if self.topohandler is None: - if self.topofile: - self.check_topology_format_support() + #if self.topohandler is None: + # if self.topofile: + # self.check_topology_format_support() + #else: + if self.topofile is None: + self.topofile = '' + self.check_topology_format_support() return self.topohandler @@ -1084,8 +1887,8 @@ class MDDataAccess(object): """ if self.incoordhandler is None: - if self.incoordfile: - self.check_trajectory_format_support("input") + # if self.incoordfile: + self.check_trajectory_format_support("input") return self.incoordhandler @@ -1166,8 +1969,10 @@ class MDDataAccess(object): def check_topology_format_support(self): """Check if the given format is supported. """ - topofilename = os.path.basename(self.topofile) - if self.topoformat is None: + topofilename=None + if self.topofile: + topofilename = os.path.basename(self.topofile) + if self.topoformat is None and topofilename is not None: file_format = self.get_file_format(topofilename, self.topoformat) self.topoformat = file_format else: @@ -1177,26 +1982,75 @@ class MDDataAccess(object): # Use the given order to check topology if self.interfaceorder: for interface in self.interfaceorder: - if "pymolfile" in interface: - self.topohandler = self.load_pymolfile_topology(topofilename, file_format, plugin_name) + if "charmmcoor" in interface: + topohandler_check = None + charmmcoor_dict = None + filetopoformat = re.sub('[.]', '', file_format) + if('CHARMMCOR' == filetopoformat.upper() or + 'CHARMMCOOR' == filetopoformat.upper() or + #'COR' == file_format.upper() or + #'CRD' == file_format.upper() or + 'COOR' == filetopoformat.upper()): + if self.topofile: + charmmcoor_dict = charmm_coor_reader(self.topofile) + if('CHARMMSTRCOR' == filetopoformat.upper() or + 'CHARMMSTRCRD' == filetopoformat.upper() or + 'CHARMMSTREAM' == filetopoformat.upper()): + if self.topostream is not None: + if 'cor' in self.topostream: + charmmcoor_dict = charmm_coor_reader(self.topofile, ftextmem=self.topostream['cor']) + else: + charmmcoor_dict = charmm_coor_reader(chkfile, ftextmem=None) + if charmmcoor_dict: + if 'binary' in charmmcoor_dict: + if charmmcoor_dict['binary'] is False: + topohandler_check = charmmcoor_dict + if topohandler_check: + if self.topohandler is None: + if self.topology is None: + self.topology = MDDataTopology() + self.topology.charmmcoortopo = self.charmm_coor_toporead(topohandler_check) + else: + self.topo_n_atoms = topohandler_check['numatoms'] + if self.topology is None: + self.topology = MDDataTopology() + self.topology.charmmcoor = topohandler_check + if self.topohandler: + usedefault=False + self.topocode = "charmmcoor" + break + elif "parmed" in interface: + filetopoformat = re.sub('[.]', '', file_format) + self.topohandler = self.load_parmed_topology(filetopoformat, base_topo=self.topohandler) + if self.topohandler: + usedefault=False + self.topocode = "parmed" + break + elif "pymolfile" in interface: + if self.topohandler is None: + self.topohandler = self.load_pymolfile_topology(file_format) if self.topohandler: usedefault=False self.topocode = "pymolfile" break elif "mdtraj" in interface: - self.topohandler = self.load_mdtraj_topology(file_format) + if self.topohandler is None: + self.topohandler = self.load_mdtraj_topology(file_format) if self.topohandler: usedefault=False self.topocode = "mdtraj" break elif "mdanalysis" in interface: - self.topohandler = self.load_mdanalysis_topology(file_format) + if self.topohandler is None: + self.topohandler = self.load_mdanalysis_topology(file_format) if self.topohandler: usedefault=False self.topocode = "mdanalysis" break elif "ase" in interface: - self.topohandler = self.load_ase_support(self.topofile, file_format=file_format) + if self.topohandler is None: + if self.topofile: + self.topohandler = self.load_ase_support(self.topofile, file_format=file_format) if self.topohandler: usedefault=False self.topocode = "ase" @@ -1204,7 +2058,8 @@ class MDDataAccess(object): elif self.UserSuppliedInterface: if isinstance(self.UserSuppliedInterface, MDDataAccess.UserSuppliedInterface): if self.UserSuppliedInterface.name in interface: - self.topohandler = self.UserSuppliedInterface.topology_support(self.topofile, file_format=file_format) + if self.topohandler is None: + self.topohandler = self.UserSuppliedInterface.topology_support(self.topofile, file_format=file_format) if self.topohandler: usedefault=False self.topocode = self.UserSuppliedInterface.name @@ -1213,9 +2068,10 @@ class MDDataAccess(object): # If no given order or the items in list does not match with the supported pacakges # use the default order in heroistic mode. if usedefault: - # Nothing to lose to be heroistic here. - self.topohandler = self.load_pymolfile_topology(file_format) - if self.topohandler: + if self.topohandler is None: + # Nothing to lose to be heroistic here. + self.topohandler = self.load_pymolfile_topology(file_format) + if self.topohandler is None: self.topocode = "pymolfile" if self.topohandler is None: @@ -1237,12 +2093,14 @@ class MDDataAccess(object): # May still have chance that ASE can recognize the # format with its filetype checking function if ase_support is None: - ase_support = ase_io.formats.filetype(self.topofile) - self.topohandler = self.load_ase_support(self.topofile, file_format=ase_support) - self.topocode = "ase" + if self.topofile: + ase_support = ase_io.formats.filetype(self.topofile) + self.topohandler = self.load_ase_support(self.topofile, file_format=ase_support) + self.topocode = "ase" else: - self.topohandler = self.load_ase_support(self.topofile, file_format=file_format) - self.topocode = "ase" + if self.topofile: + self.topohandler = self.load_ase_support(self.topofile, file_format=file_format) + self.topocode = "ase" # If no success after all attempts return False if self.topohandler is None: @@ -1267,7 +2125,9 @@ class MDDataAccess(object): self.trajtype = "traj" numatoms=None - chkfilename = os.path.basename(chkfile) + chkfilename = None + if chkfile: + chkfilename = os.path.basename(chkfile) if chkformat is None: file_format = self.get_file_format(chkfile, chkformat) chkformat = file_format @@ -1284,18 +2144,97 @@ class MDDataAccess(object): # Use the given order to check topology if self.interfaceorder: for interface in self.interfaceorder: - if "pymolfile" in interface: + if "charmmcoor" in interface: + trajhandler_check = None + charmmcoor_dict = None + filetrajformat = re.sub('[.]', '', file_format) + if('CHARMMCOOR' == filetrajformat.upper() or + 'CHARMMCOR' == filetrajformat.upper() or + #'COR' == file_format.upper() or + #'CRD' == file_format.upper() or + #'COORBIN' == file_format.upper() or + 'COOR' == filetrajformat.upper()): + charmmcoor_dict = charmm_coor_reader(chkfile) + if('CHARMMSTRCOR' == filetrajformat.upper() or + 'CHARMMSTRCRD' == filetrajformat.upper() or + 'CHARMMSTREAM' == filetrajformat.upper()): + if('CHARMMSTR' in filetrajformat.upper() and + self.topostream is not None and + (self.trajstream is None and + self.incoordstream is None and + self.outcoordstream is None )): + if 'cor' in self.topostream: + charmmcoor_dict = charmm_coor_reader(chkfile, ftextmem=self.topostream['cor']) + elif('CHARMMSTR' in filetrajformat.upper() and self.trajstream is not None): + if 'cor' in self.trajstream: + charmmcoor_dict = charmm_coor_reader(chkfile, ftextmem=self.trajstream['cor']) + elif('CHARMMSTR' in filetrajformat.upper() and self.incoordstream is not None): + if 'cor' in self.incoordstream: + charmmcoor_dict = charmm_coor_reader(chkfile, ftextmem=self.incoordstream['cor']) + elif('CHARMMSTR' in filetrajformat.upper() and self.outcoordstream is not None): + if 'cor' in self.outcoordstream: + charmmcoor_dict = charmm_coor_reader(chkfile, ftextmem=self.outcoordstream['cor']) + else: + charmmcoor_dict = charmm_coor_reader(chkfile, ftextmem=None) + if charmmcoor_dict: + trajhandler_check = charmmcoor_dict + if trajhandler_check: + trajhandler = None + trajhandler = self.charmmcoor_iread(coorDict=trajhandler_check) + if trajhandler: + if self.interfacematch: + if interface in self.topocode: + usedefault=False + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "charmmcoor" + self.incoordplugin = charmmcoor_dict + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "charmmcoor" + self.outcoordplugin = charmmcoor_dict + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "charmmcoor" + self.trajplugin = charmmcoor_dict + self.natoms = numatoms + break + else: + trajhandler = None + else: + usedefault=False + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "charmmcoor" + self.incoordplugin = charmmcoor_dict + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "charmmcoor" + self.outcoordplugin = charmmcoor_dict + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "charmmcoor" + self.trajplugin = charmmcoor_dict + self.natoms = numatoms + break + elif "pymolfile" in interface: filetrajformat = re.sub('[.]', '', chkformat) trajhandler_check = None - if self.topohandler is not None: - numatoms = self.get_natoms_from_topo(topocode=self.topocode) + molfile_traj = None + if self.topohandler is not None and chkfile is not None: + numatoms = self.get_natoms_from_topo(self.topocode) if isinstance(self.topohandler, pym.OpenMolfile): molfile_traj = pym.OpenMolfile(chkfile, file_format=filetrajformat, topology=self.topohandler, silent=False) elif numatoms is not None: if numatoms > 0: molfile_traj = pym.OpenMolfile(chkfile, file_format=filetrajformat, natoms=numatoms, silent=False) - if molfile_traj.trajectory is not None: - trajhandler_check = molfile_traj + if molfile_traj is not None: + if molfile_traj.trajectory is not None: + trajhandler_check = molfile_traj if trajhandler_check: trajhandler = None trajhandler = self.pymolfile_iread(trajhandler_check) @@ -1340,6 +2279,9 @@ class MDDataAccess(object): self.natoms = numatoms break if "mdtraj" in interface: + numatoms = None + if self.topohandler is not None: + numatoms = self.get_natoms_from_topo(self.topocode) trajhandler_check = None try: trajhandler_check = mdt_FormatRegistry.fileobjects[file_format] @@ -1356,15 +2298,18 @@ class MDDataAccess(object): if "input" in filetype: self.incoordhandler = trajhandler self.incoordcode = "mdtraj" - self.incoord_natoms = numatoms + if self.incoord_natoms is None: + self.incoord_natoms = numatoms elif "output" in filetype: self.outcoordhandler = trajhandler self.outcoordcode = "mdtraj" - self.outcoord_natoms = numatoms + if self.outcoord_natoms is None: + self.outcoord_natoms = numatoms else: self.trajhandler = trajhandler self.trajcode = "mdtraj" - self.natoms = numatoms + if self.natoms is None: + self.natoms = numatoms break else: trajhandler = None @@ -1373,20 +2318,23 @@ class MDDataAccess(object): if "input" in filetype: self.incoordhandler = trajhandler self.incoordcode = "mdtraj" - self.incoord_natoms = numatoms + if self.incoord_natoms is None: + self.incoord_natoms = numatoms elif "output" in filetype: self.outcoordhandler = trajhandler self.outcoordcode = "mdtraj" - self.outcoord_natoms = numatoms + if self.outcoord_natoms is None: + self.outcoord_natoms = numatoms else: self.trajhandler = trajhandler self.trajcode = "mdtraj" - self.natoms = numatoms + if self.natoms is None: + self.natoms = numatoms break if "mdanalysis" in interface: mdanalysis_format = re.sub('[.]', '', file_format) mdanalysis_format = mdanalysis_format.upper() - if self.topohandler is not None: + if self.topohandler is not None and chkfile is not None: # if the topology handler is a MDAnalysis universe, # we may try replacing the trajectory data in it. if isinstance(self.topohandler, mda_u.Universe): @@ -1408,7 +2356,7 @@ class MDDataAccess(object): self.natoms = numatoms usedefault=False break - except (AttributeError, IOError, OSError, ValueError, TypeError, KeyError): + except (AttributeError, IOError, OSError, ValueError, TypeError): try: universe = mda_u.Universe(self.topohandler, self.trajfile, format=mdanalysis_format) trajhandler = None @@ -1446,8 +2394,8 @@ class MDDataAccess(object): self.natoms = numatoms usedefault=False break - except (IOError, ValueError, TypeError, KeyError): - #except IOError: + #except (AttributeError, IOError, OSError, ValueError, TypeError): + except IOError: pass # if topology handler is not a MDAnalysis Universe # or is not initialized, we can try accessing only @@ -1662,13 +2610,16 @@ class MDDataAccess(object): topoIsPymTopo = False else: topoIsPymTopo = False - if(isinstance(self.topohandler, pym.OpenMolfile) or topoIsPymTopo): - molfile_traj = pym.OpenMolfile(chkfile, file_format=filetrajformat, topology=self.topohandler, silent=False) - elif numatoms is not None: - if numatoms > 0: - molfile_traj = pym.OpenMolfile(chkfile, file_format=filetrajformat, natoms=numatoms, silent=False) - if molfile_traj.trajectory is not None: - trajhandler_check = molfile_traj + molfile_traj = None + if chkfile is not None: + if(isinstance(self.topohandler, pym.OpenMolfile) or topoIsPymTopo): + molfile_traj = pym.OpenMolfile(chkfile, file_format=filetrajformat, topology=self.topohandler, silent=False) + elif numatoms is not None: + if numatoms > 0: + molfile_traj = pym.OpenMolfile(chkfile, file_format=filetrajformat, natoms=numatoms, silent=False) + if molfile_traj is not None: + if molfile_traj.trajectory is not None: + trajhandler_check = molfile_traj if trajhandler_check: trajhandler = None trajhandler = self.pymolfile_iread(trajhandler_check) @@ -1710,7 +2661,7 @@ class MDDataAccess(object): self.natoms = numatoms else: molfile_traj = None - if numatoms is not None: + if numatoms is not None and chkfile is not None: if numatoms > 0: molfile_traj = pym.OpenMolfile(chkfile, file_format=filetrajformat, natoms=numatoms, silent=False) if molfile_traj is not None: @@ -1811,7 +2762,7 @@ class MDDataAccess(object): trajhandler = self.trajhandler if trajhandler is None: mdanalysis_format = re.sub('[.]', '', file_format) - if self.topohandler is not None: + if self.topohandler is not None and chkfile is not None: if isinstance(self.topohandler, mda_u.Universe): try: self.topohandler.load_new(chkfile, format=mdanalysis_format) @@ -1937,12 +2888,18 @@ class MDDataAccess(object): if trajhandler is None: ase_support = None ase_support = self.get_ase_format_support(file_format) + trajcode=None # May still have chance that ASE can recognize the # format with its filetype checking function if ase_support is None: - ase_support = ase_io.formats.filetype(chkfile) + if chkfile is not None: + try: + ase_support = ase_io.formats.filetype(chkfile) + except (FileNotFoundError,IOError): + pass trajhandler = None - trajhandler = self.ase_iread(chkfile, fileformat=ase_support) + if chkfile is not None: + trajhandler = self.ase_iread(chkfile, fileformat=ase_support) if trajhandler: trajcode=None if self.interfacematch: @@ -1954,7 +2911,8 @@ class MDDataAccess(object): trajcode = "ase" else: trajhandler = None - trajhandler = self.ase_iread(chkfile, fileformat=file_format) + if chkfile is not None: + trajhandler = self.ase_iread(chkfile, fileformat=file_format) trajcode=None if trajhandler: if self.interfacematch: @@ -2086,16 +3044,158 @@ class MDDataAccess(object): topology = None universe = None fileformat = re.sub('[.]', '', file_format) - try: - molfile_topo = pym.OpenMolfile(self.topofile, file_format=fileformat) - if molfile_topo.topology is not None: - topology = molfile_topo - else: - if molfile_topo.trajectory is None: - molfile_topo = None - del molfile_topo - except (AttributeError, IOError, OSError, ValueError): - pass + if self.topofile: + try: + molfile_topo = pym.OpenMolfile(self.topofile, file_format=fileformat) + if molfile_topo.topology is not None: + topology = molfile_topo + else: + if molfile_topo.trajectory is None: + molfile_topo = None + del molfile_topo + except (AttributeError, IOError, OSError, ValueError): + pass + + return topology + + def load_parmed_topology(self, file_format, base_topo=None): + """ + Get the topology and parameters from file loaders in parmed + + Returns + ------- + topology : parmed.topologyobjects + """ + + if base_topo is not None: + topology = base_topo + else: + topology = None + + ext = file_format + top = self.topofile + + if ext in ['mol2', 'mol2.gz', 'mol2.bz2']: + if self.topofile: + try: + topology = pmd.load_file(top, structure=True) + except(ValueError, AttributeError, IOError): + pass + elif ext in ['psf', 'psf.gz', 'psf.bz2', + 'pdb', 'pdb.gz', 'pdb.bz2', + 'cif', 'cif.gz', 'cif.bz2', + 'sdf', 'sdf.gz', 'sdf.bz2']: + if self.topofile: + parmset_topo = None + if base_topo is not None: + if isinstance(base_topo, pmd.charmm.CharmmParameterSet): + parmset_topo = base_topo + try: + topology = pmd.load_file(top) + if parmset_topo is not None: + topology.load_parameters(parmset_topo, copy_parameters=True) + except(ValueError, AttributeError, IOError): + pass + elif ext in ['mdl', 'mdl.gz', 'mdl.bz2']: + if self.topofile: + try: + topology = pmd.amber.AmberFormat(top) + except(ValueError, AttributeError, IOError): + pass + elif ext in ['prmtop', 'prmtop.gz', 'prmtop.bz2', + 'parm7', 'parm7.gz', 'parm7.bz2']: + if self.topofile: + try: + topology = pmd.amber.LoadParm(top) + except(ValueError, AttributeError, IOError): + pass + elif ext in ['top', 'gromacstop']: + if self.topofile: + try: + topology = pmd.gromacs.GromacsTopologyFile(top) + except(ValueError, AttributeError, IOError): + pass + elif ext in ['rtf', 'charmmtop', 'charmmstrrtf']: # .top is used by Gromacs. + if base_topo is None: + base_topo = pmd.charmm.CharmmParameterSet() + try: + # top can be str or list of lines + if ext in ['charmmstrrtf'] and self.topostream is not None: + if 'rtf' in self.topostream: + if isinstance(base_topo, pmd.charmm.CharmmParameterSet): + base_topo.read_topology_file(iter(self.topostream['rtf'])) + elif isinstance(base_topo, pmd.charmm.CharmmPsfFile): + self.topocharmmpsf = True + parmset_topo = pmd.charmm.CharmmParameterSet() + parmset_topo.read_topology_file(iter(self.topostream['rtf'])) + base_topo.load_parameters(parmset_topo, copy_parameters=True) + else: + if self.topofile: + if isinstance(base_topo, pmd.charmm.CharmmParameterSet): + base_topo.read_topology_file(top) + elif isinstance(base_topo, pmd.charmm.CharmmPsfFile): + self.topocharmmpsf = True + parmset_topo = pmd.charmm.CharmmParameterSet() + parmset_topo.read_topology_file(top) + base_topo.load_parameters(parmset_topo, copy_parameters=True) + topology = base_topo + except(ValueError, AttributeError, IOError): + pass + elif ext in ['stream', 'str', 'charmmstream']: + if base_topo is None: + base_topo = pmd.charmm.CharmmParameterSet() + try: + # top can be str or list of lines + if ext in ['charmmstream'] and self.topostream is not None: + if isinstance(self.topostream,dict): + if 'rtf' in self.topostream: + if isinstance(base_topo, pmd.charmm.CharmmParameterSet): + base_topo.read_topology_file(iter(self.topostream['rtf'])) + elif isinstance(base_topo, pmd.charmm.CharmmPsfFile): + self.topocharmmpsf = True + parmset_topo = pmd.charmm.CharmmParameterSet() + parmset_topo.read_topology_file(iter(self.topostream['rtf'])) + base_topo.load_parameters(parmset_topo, copy_parameters=True) + if 'par' in self.topostream: + if isinstance(base_topo, pmd.charmm.CharmmParameterSet): + base_topo.read_parameter_file(iter(self.topostream['par'])) + elif isinstance(base_topo, pmd.charmm.CharmmPsfFile): + self.topocharmmpsf = True + parmset_topo = pmd.charmm.CharmmParameterSet() + parmset_topo.read_parameter_file(iter(self.topostream['par'])) + base_topo.load_parameters(parmset_topo, copy_parameters=True) + else: + if self.topofile: + base_topo.read_stream_file(top) + topology = base_topo + except(ValueError, AttributeError, IOError): + pass + elif ext in ['par', 'prm', 'charmmpar', 'charmmstrpar']: + if base_topo is None: + base_topo = pmd.charmm.CharmmParameterSet() + try: + # top can be str or list of lines + if ext in ['charmmstrpar'] and self.topostream is not None: + if 'par' in self.topostream: + if isinstance(base_topo, pmd.charmm.CharmmParameterSet): + base_topo.read_parameter_file(iter(self.topostream['par'])) + elif isinstance(base_topo, pmd.charmm.CharmmPsfFile): + self.topocharmmpsf = True + parmset_topo = pmd.charmm.CharmmParameterSet() + parmset_topo.read_parameter_file(iter(self.topostream['par'])) + base_topo.load_parameters(parmset_topo, copy_parameters=True) + else: + if self.topofile: + if isinstance(base_topo, pmd.charmm.CharmmParameterSet): + base_topo.read_parameter_file(top) + elif isinstance(base_topo, pmd.charmm.CharmmPsfFile): + self.topocharmmpsf = True + parmset_topo = pmd.charmm.CharmmParameterSet() + parmset_topo.read_parameter_file(top) + base_topo.load_parameters(parmset_topo, copy_parameters=True) + topology = base_topo + except(ValueError, AttributeError, IOError): + pass return topology @@ -2121,25 +3221,26 @@ class MDDataAccess(object): ext=file_format top=self.topofile - if ext in ['.pdb', '.pdb.gz', '.h5','.lh5']: - _traj = mdt.load_frame(top, 0, **wrapkwargs) - topology = _traj.topology - elif ext in ['.prmtop', '.parm7']: - topology = mdt_load.prmtop(top, **wrapkwargs) - elif ext in ['.psf']: - topology = mdt_load.psf(top, **wrapkwargs) - elif ext in ['.mol2']: - topology = mdt_load.mol2(top, **wrapkwargs).topology - elif ext in ['.gro']: - topology = mdt.core.trajectory.load_gro(top, **wrapkwargs).topology - elif ext in ['.arc']: - topology = mdt_load.arc(top, **wrapkwargs).topology - elif ext in ['.hoomdxml']: - topology = mdt_load.hoomdxml(top, **wrapkwargs).topology - elif isinstance(top, mdt_Trajectory): - topology = top.topology - elif isinstance(top, mdt_Topology): - topology = top + if top: + if ext in ['.pdb', '.pdb.gz', '.h5','.lh5']: + _traj = mdt.load_frame(top, 0, **wrapkwargs) + topology = _traj.topology + elif ext in ['.prmtop', '.parm7']: + topology = mdt_load.prmtop(top, **wrapkwargs) + elif ext in ['.psf']: + topology = mdt_load.psf(top, **wrapkwargs) + elif ext in ['.mol2']: + topology = mdt_load.mol2(top, **wrapkwargs).topology + elif ext in ['.gro']: + topology = mdt.core.trajectory.load_gro(top, **wrapkwargs).topology + elif ext in ['.arc']: + topology = mdt_load.arc(top, **wrapkwargs).topology + elif ext in ['.hoomdxml']: + topology = mdt_load.hoomdxml(top, **wrapkwargs).topology + elif isinstance(top, mdt_Trajectory): + topology = top.topology + elif isinstance(top, mdt_Topology): + topology = top return topology @@ -2158,16 +3259,17 @@ class MDDataAccess(object): universe = None fileformat = re.sub('[.]', '', file_format) top=self.topofile - try: - universe = mda_u.Universe(top, topology_format=fileformat) - if isinstance(universe, mda_u.Universe): - topology = universe - elif isinstance(universe, mda_u.Topology): - topology = universe - elif isinstance(universe._topology, mda_u.Topology): - topology = universe._topology - except (AttributeError, IOError, OSError, ValueError): - pass + if top: + try: + universe = mda_u.Universe(top, topology_format=fileformat) + if isinstance(universe, mda_u.Universe): + topology = universe + elif isinstance(universe, mda_u.Topology): + topology = universe + elif isinstance(universe._topology, mda_u.Topology): + topology = universe._topology + except (AttributeError, IOError, OSError, ValueError): + pass return topology @@ -2181,7 +3283,7 @@ class MDDataAccess(object): topology : generator """ ioformat = None - ioformat = get_ase_format_support(self, file_format) + ioformat = self.get_ase_format_support(file_format) if ioformat is not None: handler = None try: @@ -2248,11 +3350,55 @@ class MDDataAccess(object): return if handler is not None: - for value in handler: - yield value.get_positions() + try: + for value in handler: + yield value.get_positions() + except (AttributeError, IOError, OSError, + ValueError, ImportError, ModuleNotFoundError): + return else: return + def charmm_coor_toporead(self, coorDict=None): + """Returns the atom list for CHARMM COOR input + """ + if coorDict is not None: + if 'atomlist' in coorDict: + return coorDict['atomlist'] + else: + return None + else: + return None + + def charmmcoor_iread(self, coorDict=None): + """Returns the iterator for CHARMM COOR trajectory + """ + if "input" in self.trajtype: + traj = self.inputcoords + elif "output" in self.trajtype: + traj = self.outputcoords + else: + traj = self.trajectory + + if coorDict is not None: + if traj is None: + traj = MDDataTrajectory() + if 'ww' in coorDict: + traj.charmm = {'ww':coorDict['ww']} + if "input" in self.trajtype: + self.inputcoords = traj + elif "output" in self.trajtype: + self.outputcoords = traj + else: + self.trajectory = traj + if 'coords' in coorDict: + for positions in coorDict['coords']: + if len(positions)>0: + yield np.asarray(positions) + else: + return None + return None + def pymolfile_iread(self, traj_handler=None): """Returns the iterator for pymolfile trajectory """ @@ -2412,7 +3558,7 @@ class MDDataAccess(object): loader = mdt_FormatRegistry.loaders[trajformat] except KeyError: return - t = loader(trajfile) + t = loader(filename) for i in range(0, len(t), chunk): positions = t[i:i+chunk] for pos in positions: @@ -2423,18 +3569,18 @@ class MDDataAccess(object): if trajformat in ('.crd', '.mdcrd'): return else: - #if "pymolfile" in self.topocode: - # n_atoms_set=self.topohandler.n_atoms - #elif "mdtraj" in self.topocode: - # n_atoms_set=self.topohandler.n_atoms - #elif "mdanalysis" in self.topocode: - # n_atoms_set=len(self.topohandler.atoms) - #elif "ase" in self.topocode: - # if isinstance(self.topohandler, ase.Atoms): - # n_atoms_set=len(self.topohandler.get_positions()) - # else: n_atoms_set = None n_atoms_set = self.get_natoms_from_topo(self.topocode) + self.mddata_topology = None + if self.topology is not None: + self.mddata_topology = self.topology + if isinstance(self.topohandler, mdt_Topology): + self.topology = self.topohandler + else: + self.topology = mdt_Topology() + else: + self.topology = mdt_Topology() + self.topology._numAtoms = n_atoms_set try: with (lambda x: mdtraj_handler(x, n_atoms=n_atoms_set) if trajformat in ('.crd', '.mdcrd') @@ -2442,19 +3588,21 @@ class MDDataAccess(object): empty = False while not empty: if trajformat not in mdt_TOPOLOGY_EXTS: - data = f.read_as_traj(self.topohandler, n_frames=chunk) + data = f.read_as_traj(self.topology, n_frames=chunk) else: data = f.read_as_traj(n_frames=chunk) - if isinstance(data, tuple): - positions = data[0] - else: - positions = data - if len(positions) == 0: - empty = True - else: - for pos in positions: - yield pos + for pos in data.xyz: + if self.trajectory is not None: + self.trajectory.unitcell_vectors = data.unitcell_vectors + self.trajectory.unitcell_lengths = data.unitcell_lengths + self.trajectory.unitcell_angles = data.unitcell_angles + self.trajectory.time = data.time + yield pos + empty = True + if self.mddata_topology is not None: + self.topology = self.mddata_topology except IOError: + self.topology = self.mddata_topology return def custom_iread(self, file_handle, format): @@ -2558,6 +3706,8 @@ class MDDataAccess(object): interfacelist = parser_ui.fileDict[fileItem].fileInterface if 'topology' in parser_ui.fileDict[fileItem].infoPurpose: topoformat = self.topologyFileHandler(filename, fileformatlist, interfacelist) + #if 'forcefield' in parser_ui.fileDict[fileItem].infoPurpose: + # forcefieldformat = self.forcefieldFileHandler(filename, fileformatlist, interfacelist) if 'inputcoordinates' in parser_ui.fileDict[fileItem].infoPurpose: incoordformat = self.incoordFileHandler(filename, fileformatlist, interfacelist) if 'outputcoordinates' in parser_ui.fileDict[fileItem].infoPurpose: @@ -2566,8 +3716,6 @@ class MDDataAccess(object): trajformat = self.trajectoryFileHandler(filename, fileformatlist, interfacelist) if 'thermostats' in parser_ui.fileDict[fileItem].infoPurpose: thermoformat = self.thermoFileHandler(filename, fileformatlist, interfacelist) - if 'forcefield' in parser_ui.fileDict[fileItem].infoPurpose: - forcefieldformat = self.forcefieldFileHandler(filename, fileformatlist, interfacelist) if self.topohandler is not None: parser_ui.topology = self.set_TopologyData() if self.inputpositions is not None: @@ -2579,14 +3727,150 @@ class MDDataAccess(object): if self.thermohandler is not None: parser_ui.thermostats = self.set_ThermoData() parser_ui.thermoDict = parser_ui.thermostats.thermoDict - if self.forcefieldhandler is not None: - #self.set_ForceFieldData() - pass + #if self.forcefieldhandler is not None: + # #self.set_ForceFieldData() + # pass + + def initializeTopologyFileHandlers(self, parser_ui): + # Files will be loaded using their extensions initially. + # If this fails, the fileFormat lists will be used in loading process. + self.init_topo() + topoformat = None + self.topohandler = None + for fileItem in parser_ui.fileDict: + fileformatlist = None + interfacelist = None + if (parser_ui.fileDict[fileItem].fileSupplied and + parser_ui.fileDict[fileItem].activeInfo): + filename = parser_ui.fileDict[fileItem].fileName + fileformatlist = parser_ui.fileDict[fileItem].fileFormat + interfacelist = parser_ui.fileDict[fileItem].fileInterface + if 'topology' in parser_ui.fileDict[fileItem].infoPurpose: + if interfacelist is not None: + self.interfaceorder = interfacelist + for fileformat in fileformatlist: + self.set_topo(filename, fileformat) + if 'strDict' in parser_ui.fileDict[fileItem]: + if parser_ui.fileDict[fileItem].strDict: + if self.topostream is None: + self.topostream=parser_ui.fileDict[fileItem].strDict + else: + self.topostream.update(parser_ui.fileDict[fileItem].strDict) + self.load_topology() + if self.topohandler is not None: + parser_ui.topology = self.set_TopologyData() + + def initializeTrajectoryFileHandlers(self, parser_ui): + # Files will be loaded using their extensions initially. + # If this fails, the fileFormat lists will be used in loading process. + self.init_traj() + self.trajectory = None + trajformat = None + self.atompositions = None + for fileItem in parser_ui.fileDict: + fileformatlist = None + interfacelist = None + if (parser_ui.fileDict[fileItem].fileSupplied and + parser_ui.fileDict[fileItem].activeInfo): + filename = parser_ui.fileDict[fileItem].fileName + fileformatlist = parser_ui.fileDict[fileItem].fileFormat + interfacelist = parser_ui.fileDict[fileItem].fileInterface + if 'trajectory' in parser_ui.fileDict[fileItem].infoPurpose: + if interfacelist is not None: + self.interfaceorder = interfacelist + for fileformat in fileformatlist: + if self.atompositions is not None: + continue + self.set_traj(filename, fileformat) + if 'strDict' in parser_ui.fileDict[fileItem]: + if parser_ui.fileDict[fileItem].strDict: + self.trajstream.update(parser_ui.fileDict[fileItem].strDict) + traj_loaded = self.load() + self.atompositions = self.iread() + if self.atompositions is not None: + parser_ui.trajectory = self.set_TrajectoryData() + + def initializeOutputCoordinateFileHandlers(self, parser_ui): + # Files will be loaded using their extensions initially. + # If this fails, the fileFormat lists will be used in loading process. + self.init_outcoord() + trajformat = None + self.outputpositions = None + for fileItem in parser_ui.fileDict: + fileformatlist = None + interfacelist = None + if (parser_ui.fileDict[fileItem].fileSupplied and + parser_ui.fileDict[fileItem].activeInfo): + filename = parser_ui.fileDict[fileItem].fileName + fileformatlist = parser_ui.fileDict[fileItem].fileFormat + interfacelist = parser_ui.fileDict[fileItem].fileInterface + if 'outputcoordinates' in parser_ui.fileDict[fileItem].infoPurpose: + if interfacelist is not None: + self.interfaceorder = interfacelist + for fileformat in fileformatlist: + if self.outputpositions is not None: + continue + self.set_outcoord(filename, fileformat) + if 'strDict' in parser_ui.fileDict[fileItem]: + if parser_ui.fileDict[fileItem].strDict: + self.incoordstream.update(parser_ui.fileDict[fileItem].strDict) + outcoord_loaded = self.load_outcoord() + self.outputpositions = self.outcoord_iread() + if self.outputpositions is not None: + parser_ui.outputcoords = self.set_OutputData() + + def initializeInputCoordinateFileHandlers(self, parser_ui): + # Files will be loaded using their extensions initially. + # If this fails, the fileFormat lists will be used in loading process. + self.init_incoord() + trajformat = None + self.inputpositions = None + for fileItem in parser_ui.fileDict: + fileformatlist = None + interfacelist = None + if (parser_ui.fileDict[fileItem].fileSupplied and + parser_ui.fileDict[fileItem].activeInfo): + filename = parser_ui.fileDict[fileItem].fileName + fileformatlist = parser_ui.fileDict[fileItem].fileFormat + interfacelist = parser_ui.fileDict[fileItem].fileInterface + if 'inputcoordinates' in parser_ui.fileDict[fileItem].infoPurpose: + if interfacelist is not None: + self.interfaceorder = interfacelist + for fileformat in fileformatlist: + if self.inputpositions is not None: + continue + self.set_incoord(filename, fileformat) + if 'strDict' in parser_ui.fileDict[fileItem]: + if parser_ui.fileDict[fileItem].strDict: + if self.incoordstream is None: + self.incoordstream=parser_ui.fileDict[fileItem].strDict + else: + self.incoordstream.update(parser_ui.fileDict[fileItem].strDict) + incoord_loaded = self.load_incoord() + self.inputpositions = self.incoord_iread() + if self.inputpositions is not None: + parser_ui.inputcoords = self.set_InputData() def set_TopologyData(self): - self.topology = MDDataTopology() + if self.topology is None: + self.topology = MDDataTopology() + if self.topocharmmpsf: + self.topology.charmmpsf = True self.topology.filename = self.topofile self.topology.topoDict = {} + if("parmed" in self.topocode or + "charmmcoor" in self.topocode): + self.topology = pmdConvertTopoDict(self.topology, self.topohandler) + if((self.trajcode is not None or + self.incoordcode is not None or + self.outcoordcode is not None) and + self.topology is None): + if "charmmcoor" in self.trajcode: + self.topology = pmdConvertTopoDict(self.topology, self.trajhandler) + if "charmmcoor" in self.incoordcode: + self.topology = pmdConvertTopoDict(self.topology, self.incoordhandler) + if "charmmcoor" in self.outcoordcode: + self.topology = pmdConvertTopoDict(self.topology, self.outcoordhandler) if "pymolfile" in self.topocode: self.topology = pymConvertTopoDict(self.topology, self.topohandler) if(self.trajcode and not self.topology): @@ -2601,6 +3885,9 @@ class MDDataAccess(object): self.topology = mdtConvertTopoDict(self.topology, self.trajhandler.get_topology()) if "mdanalysis" in self.topocode: self.topology = mdaConvertTopoDict(self.topology, self.topohandler) + if(self.topology.n_atoms is None and + self.topology.topo_n_atoms is not None): + self.topology.n_atoms = self.topo_n_atoms return self.topology def trajgen(self): @@ -2641,7 +3928,7 @@ class MDDataAccess(object): if self.trajectory is None: self.trajectory = MDDataTrajectory() self.trajectory.filename = self.trajfile - #self.trajectory.nsteps = self.set_nsteps() + self.trajectory.nsteps = self.set_nsteps() self.trajectory.frame_no = -1 self.trajectory.trajDict = None self.trajectory.positions = self.trajgen @@ -2761,6 +4048,16 @@ class MDDataAccess(object): n_atoms = self.topohandler.topology.natoms elif "mdtraj" in topocode: n_atoms = self.topohandler.topology.natoms + elif "parmed" in topocode: + if isinstance(self.topohandler, pmd.charmm.CharmmParameterSet): + n_atoms = len([i for i in self.topohandler.atom_types]) + elif isinstance(self.topohandler, pmd.charmm.CharmmPsfFile): + self.topocharmmpsf = True + n_atoms = len(self.topohandler.atoms) + elif self.topology.n_atoms is not None: + n_atoms = self.topology.n_atoms + elif "charmmcoor" in topocode: + n_atoms = int(self.topology.charmmcoor['numatoms']) elif "mdanalysis" in topocode: n_atoms = len(self.topohandler.atoms) elif "ase" in topocode: @@ -2787,23 +4084,26 @@ class MDDataAccess(object): return n_atoms def set_nsteps(self): - if "pymolfile" in self.trajcode: - return len(self.trajhandler.nsteps) - if "mdtraj" in self.trajcode: - return len(self.trajhandler.trajectory) + #if "pymolfile" in self.trajcode: + # return len(self.trajhandler.nsteps) + #if "mdtraj" in self.trajcode: + # return len(self.trajhandler.trajectory) if "mdanalysis" in self.trajcode: - return len(self.trajhandler.n_steps) + return len(self.trajhandler.trajectory) if "ase" in self.trajcode: return len(self.trajhandler.trajectory) + return None if __name__ == "__main__": topo_file = sys.argv[1] - topo_format = sys.argv[2] + topo_form = sys.argv[2] traj_file = sys.argv[3] - traj_format = sys.argv[4] + traj_form = sys.argv[4] MDdata = MDDataAccess() MDdata.trajfile = traj_file + MDdata.trajformat = traj_form MDdata.topofile = topo_file + MDdata.topoformat = topo_form if 'None' in topo_format: pass else: @@ -2814,6 +4114,7 @@ if __name__ == "__main__": MDdata.trajformat = traj_format MDdata.interfaceorder = ["mdanalysis", "pymolfile", "mdtraj", "ase"] #MDdata.interfaceorder = ["pymolfile", "mdanalysis", "mdtraj", "ase"] + #MDdata.interfaceorder = ["charmmcoor","parmed","pymolfile", "mdanalysis", "mdtraj"] MDdata.interfacematch = False traj_iterator = MDdata.load() MDdata.trajchunk=300 diff --git a/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py b/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py index a91655c330de0b837b52295ce85c1dd245da7b73..b6c641f697cc1dadaa5177627c065412a970ea15 100644 --- a/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py +++ b/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py @@ -159,6 +159,19 @@ class Container(object): else: self.accumulateValues(self, arg) + def reset(self, *args): + for arg in args: + if isinstance(arg, dict): + if arg["startSection"]: + if self.Name in arg["startSection"]: + self.resetValues(self, arg) + else: + if self.Containers: + for module in self.Containers: + module.reset(arg) + else: + self.resetValues(self, arg) + def updateBackend(self, backend, startsection=None, autoopenclose=False): if startsection: if self.Name == startsection: @@ -247,6 +260,16 @@ class Container(object): if "muteSections" in arg: if self.Name in arg["muteSections"]: self.Active = False + + def resetValues(self, *args): + for arg in args: + if isinstance(arg, dict): + if self.Storage: + self.resetAllValues() + if self.Containers: + for module in self.Containers: + module.resetValues(arg) + self.Active = False def checkUpdateValue(self, item, localdict): """ Updating value with the rules given in depends @@ -605,6 +628,13 @@ class Container(object): newValue = itemv["unitconverter"](self, itemv) self.Storage.__dict__[itemk["val"]] = newvalue + def resetAllValues(self): + for itemk in self.Storage.__dict__: + self.Storage.__dict__[itemk]["val"] = None + self.Storage.__dict__[itemk]["stor"] = False + self.Storage.__dict__[itemk]["act"] = False + self.Active = False + def __str__(self, caller=None, decorate='', color=None, printactive=None, onlynames=None): string = '' if onlynames is None: diff --git a/common/python/nomadcore/smart_parser/SmartParserCommon.py b/common/python/nomadcore/smart_parser/SmartParserCommon.py index 491801bd1eedead4a209c7ddf37e991ca452a6a9..584fa85eb935f77681465d97ae2515e40a06ce29 100644 --- a/common/python/nomadcore/smart_parser/SmartParserCommon.py +++ b/common/python/nomadcore/smart_parser/SmartParserCommon.py @@ -13,6 +13,7 @@ import numpy as np import json import os import re +import io @contextmanager def open_section(parser, name): @@ -117,11 +118,47 @@ def set_metaInfoEnv(parsertag, infoEnv, metaNameTag, newList, listTypStr, supraN return infoEnv +def namelist_matcher(cText, key, matchWith): + matchThisParsy = None + if 'EOL' in matchWith: + matchThisParsy = re.compile(r"(?:%s)\s*(?:\s|=|:)\s*(?:'|\")?" + "(?P<%s>.*)(?:'|\")?\s*,?" + % (cText, key)) + elif 'UD' in matchWith: + delimeter = matchWith.replace('UD', '') + matchThisParsy = re.compile(r"(?:%s)\s*(?:\s|=|:)\s*(?:'|\")?" + "(?P<%s>[\-+0-9.a-zA-Z:]+)" + "(?:'|\")?\s*[%s]" + % (cText, key, delimeter)) + elif 'BOL' in matchWith: + matchThisParsy = re.compile(r"(?:'|\")?(?P<%s>.*)(?:'|\")?" + "\s*(?:%s)\s*" + % (key, cText)) + elif 'PW' in matchWith: + matchThisParsy = re.compile(r"(?:'|\")?(?P<%s>[\-+0-9.a-zA-Z:]+)" + "(?:'|\")?\s*(?:%s)\s*" + % (key, cText)) + elif 'MW' in matchWith: + matchThisParsy = re.compile(r"(?P<%s>[\S]*(?=%s)[\S]*)" + % (key, cText)) + elif 'FD' in matchWith: + delimeter = matchWith.replace('FD', '') + matchThisParsy = re.compile(r"[%s]\s*(?:'|\")?" + "(?P<%s>[\-+0-9.a-zA-Z:]+)" + "(?:'|\")?\s*(?:%s)\s*" + % (delimeter, key, cText)) + else: # Default matchWith 'NW' + matchThisParsy = re.compile(r"(?:%s)\s*(?:\s|=|:)\s*(?:'|\"|{|\[|\()?" + "(?P<%s>[\S]+)" + "(?:'|\"|}|\]|\))?\s*" + % (cText, key)) + return matchThisParsy + class ParserBase(object): """Base class for SmartParsers""" def __init__(self,cachingLevelForMetaName=None, coverageIgnoreList=None, re_program_name=None, parsertag=None, metainfopath=None, - parserinfodef=None): + parserinfodef=None, recorderOn=False): self.PARSERTAG = parsertag self.metaStorage = mStore.Container('section_run') exclude_dict = { @@ -154,6 +191,9 @@ class ParserBase(object): self.strcleaner = strcleaner self.strisinstance = strisinstance self.literal_eval = literal_eval + self.recordList = None + if recorderOn: + self.recordList = io.StringIO() def parse(self): self.coverageIgnore = re.compile(r"^(?:" + r"|".join(self.coverageIgnoreList) + r")$") @@ -162,6 +202,31 @@ class ParserBase(object): parserInfo=self.parserInfo, cachingLevelForMetaName=self.cachingLevelForMetaName, superContext=self) + if self.recordList: + self.recordList.close() + + def peekline(self, parser): + pos = parser.fIn.fIn.tell() + line = parser.fIn.fIn.readline() + parser.fIn.fIn.seek(pos) + return line + + def peekrecord(self): + pos = self.recordList.tell() + line = self.recordList.readline() + self.recordList.seek(pos) + return line + + def topology_system_name(self, backend, gIndex): + """ Function to generate data for system_name + """ + system_name=None + supB = backend.superBackend + if self.topology: + topoDict = self.topology.topoDict + system_name = topoDict["system_name"] + if system_name is not None: + supB.addValue('system_name', str(system_name)) def topology_atom_type_and_interactions(self, backend, gIndex): """ Function to generate data for atom_to_molecule @@ -206,7 +271,8 @@ class ParserBase(object): # Atomic element/symbol supB.addValue(self.PARSERTAG + '_atom_element', elementList[elm]) # Atomic mass - supB.addValue('atom_type_mass', massesList[elm]) + if massesList[elm] is not None: + supB.addValue('atom_type_mass', massesList[elm]) if radiusList[elm]: # Atomic van der Waals radius supB.addValue(self.PARSERTAG + '_atom_radius', radiusList[elm]) @@ -282,17 +348,32 @@ class ParserBase(object): def dictionary_parser(self, parser, stopOnMatchStr, quitOnMatchStr, metaNameStart, matchNameList, matchNameDict, updateMatchDict, onlyCaseSensitive, stopOnFirstLine, parserOptions, - entryline=None, parserID=None, parsername=None): + entryline=None, parserID=None, parsername=None, globalDict=None, localDict=None, parent=None): + record = None + replay = None + replayCount = None + if parent is None: + parent = 0 + rank = parent + 1 + if globalDict is not None: + record = globalDict["record"] + replay = globalDict["replay"] + replayCount = globalDict["replayCount"] + if record is None: + record = False + if replayCount is None: + replayCount = 0 if entryline is None: lastLine = parser.fIn.fInLine else: lastLine = entryline - #print("PRINTING: dictionary_parser:",lastLine) parserDict = { "firstLine" : 0, "storedLines" : '', "numStoredLines" : 0, "parserID" : parserID, + "parent" : parent, + "rank" : rank } parserDict.update({"firstLine" : parserDict["firstLine"] + 1}) parserSuccess = False @@ -310,7 +391,11 @@ class ParserBase(object): parsercntrlin = None lookupdict = None lookupvals = None + rnewline = False checkdict = {} + if "readline" in parserOptions.keys(): + if parserOptions["readline"] == True: + rnewline = True if "dictionary" in parserOptions.keys(): if isinstance(parserOptions["dictionary"], dict): dictionary = parserOptions["dictionary"] @@ -529,11 +614,34 @@ class ParserBase(object): if(mNameDict is not None and updateMatchDict): setattr(self, matchNameDict, mNameDict) #print("PRINTING: dict Md step:",self.MDcurrentstep) - return lastLine, parserSuccess + if rnewline is True: + # Playing from record? + if record and replayCount>0: + lastLine = self.recordList.readline() + else: + lastLine = parser.fIn.readline() + # If not replaying, are we recording? + if record: + self.recordList.write(lastLine) + return lastLine, parserSuccess, globalDict, localDict def readline_control_parser(self, parser, stopOnMatchStr, quitOnMatchStr, metaNameStart, matchNameList, matchNameDict, updateMatchDict, onlyCaseSensitive, stopOnFirstLine, parserOptions, entryline=None, - parserID=None, parsername=None): + parserID=None, parsername=None, globalDict=None, localDict=None, parent=None): + record = None + replay = None + replayCount = None + if parent is None: + parent = 0 + rank = parent + 1 + if globalDict is not None: + record = globalDict["record"] + replay = globalDict["replay"] + replayCount = globalDict["replayCount"] + if record is None: + record = False + if replayCount is None: + replayCount = 0 if entryline is None: lastLine = parser.fIn.fInLine else: @@ -543,8 +651,11 @@ class ParserBase(object): "storedLines" : '', "numStoredLines" : 0, "parserID" : parserID, + "parent" : parent, + "rank" : rank } parserSuccess = False + peeklineFirst = False waitatlineRe = None controlattr = None controlnextattr = None @@ -555,6 +666,12 @@ class ParserBase(object): controlcounter = 0 controldict = None lookupdict = None + stopOnMatchRe = None + quitOnMatchRe = None + if stopOnMatchStr is not None: + stopOnMatchRe = re.compile(stopOnMatchStr) + if quitOnMatchStr is not None: + quitOnMatchRe = re.compile(quitOnMatchStr) if "waitatlineStr" in parserOptions: if parserOptions["waitatlineStr"] is not None: waitatlineRe = re.compile(parserOptions["waitatlineStr"]) @@ -568,6 +685,8 @@ class ParserBase(object): controlnextattr = getattr(self,parserOptions["controlnextattr"]) if "controllast" in parserOptions: controllast = parserOptions["controllast"] + if "peeklineFirst" in parserOptions: + peeklineFirst = parserOptions["peeklineFirst"] if "controlin" in parserOptions: if lookupdict: if parserOptions["controlin"] in lookupdict: @@ -646,50 +765,66 @@ class ParserBase(object): if continuenewline: #print("PRINTING: readline_control lastLine:",lastLine) parserSuccess=True - lastLine = parser.fIn.readline() + peekedline=None + movenewline=True + if peeklineFirst is True: + if "peekedline" in localDict: + peekedline=localDict["peekedline"] + # Playing from record? + if record and replayCount>0: + lastLine = self.peekrecord() + else: + lastLine = self.peekline(parser) + if peekedline is None: + movenewline=False + localDict.update({"peekedline" : lastLine}) + #print("PRINTING: 1 peeklineFirst: ",globalDict["peekedline"],movenewline) + else: + if peekedline == lastLine: + movenewline=True + #print("PRINTING: 2 peeklineFirst: ",peekedline,movenewline) + if movenewline is True: + if stopOnMatchStr is not None: + if stopOnMatchRe.findall(lastLine): + #print("PRINTING: movenewline matchRe:",rank,stopOnMatchRe.findall(lastLine),lastLine) + movenewline = False + if quitOnMatchRe is not None: + if quitOnMatchRe.findall(lastLine): + #print("PRINTING: check_subparser Rank QuitMatched:",quitOnMatchRe.findall(lastLine),lastLine) + movenewline = False + if movenewline is True: + # Playing from record? + if record and replayCount>0: + lastLine = self.recordList.readline() + else: + lastLine = parser.fIn.readline() + # If not replaying, are we recording? + if record: + self.recordList.write(lastLine) + localDict.update({"peekedline" : lastLine}) else: - lastLine = self.peekline(parser) + # Playing from record? + if record and replayCount>0: + lastLine = self.peekrecord() + else: + lastLine = self.peekline(parser) + # No need to record peeklines + localDict.update({"peekedline" : lastLine}) parserDict.update({"firstLine" : parserDict["firstLine"] + 1}) if controldict: setattr(self,parserOptions["controldict"],controldict) - return lastLine, parserSuccess - - -# stopOnMatch = False -# if(stopOnMatchStr is None or "AlwaysStop" in stopOnMatchStr): -# stopOnMatch = True -# else: -# stopOnMatchRe = re.compile(stopOnMatchStr) -# if stopOnMatchRe.findall(lastLine): -# stopOnMatch = True -# if self.firstLine==0: -# if stopOnFirstLine: -# stopOnMatch = True -# else: -# stopOnMatch = False -# if(quitOnMatchStr is None or "AlwaysStop" in quitOnMatchStr): -# stopOnMatch = True -# else: -# quitOnMatchRe = re.compile(quitOnMatchStr) -# if quitOnMatchRe.findall(lastLine): -# stopOnMatch = True -# if stopOnMatch is False: -# while True: -# lastLine = self.peekline(parser) -# parserDict.update({"firstLine" : parserDict["firstLine"] + 1}) -# if not lastLine: -# break -# else: -# if stopOnMatchRe.findall(lastLine): -# stopOnMatch = True -# break -# else: -# lastLine = parser.fIn.readline() + return lastLine, parserSuccess, globalDict, localDict def section_control_parser(self, parser, stopOnMatchStr, quitOnMatchStr, metaNameStart, matchNameList, matchNameDict, updateMatchDict, onlyCaseSensitive, stopOnFirstLine, parserOptions, entryline=None, - parserID=None, parsername=None): + parserID=None, parsername=None, globalDict=None, localDict=None, parent=None): + if parent is None: + parent = 0 + rank = parent + 1 + if globalDict is not None: + record = globalDict["record"] + replayCount = globalDict["replayCount"] if entryline is None: lastLine = parser.fIn.fInLine else: @@ -699,12 +834,13 @@ class ParserBase(object): "storedLines" : '', "numStoredLines" : 0, "parserID" : parserID, + "parent" : parent, + "rank" : rank } if parsername is None: parsername = "section_control_parser_"+str(parserID) sectionopenname = parsername + "_open" sectionclosename = parsername + "_close" - #print("PRINTING: section_control "+parsername) parserSuccess = False backend = parser.backend sectionname = None @@ -777,18 +913,13 @@ class ParserBase(object): if sectionname: activate = False - #print("PRINTING: sectionname activatesection:",sectionname,activatesection) if activatesection is not None: if sectionname in activatesection: activate = activatesection[sectionname] if activate: - #print("PRINTING: activate:",activate) - #print("PRINTING: open record dict:",sectionopenrecord) - #print("PRINTING: close record dict:",sectioncloserecord) if(sectionopen is not None and sectionopenattr is not None and sectionopenrecord is not None): - #print("PRINTING: section open:",str(sectionopenattr)) if str(sectionopenattr) in sectionopenrecord: if sectionopenrecord[str(sectionopenattr)] is False: parserSuccess = True @@ -820,13 +951,13 @@ class ParserBase(object): activatesection[sectionname] = False lookupdict.update({parserOptions["activatesection"] : activatesection}) setattr(self,parserOptions["lookupdict"],lookupdict) - #print("PRINTING: Success sectionname activatesection:",sectionname,activatesection) - return lastLine, parserSuccess + return lastLine, parserSuccess, globalDict, localDict def table_store(self, parser, lastLine, stopOnMatchRe, quitOnMatchRe, metaNameStart, matchNameList, matchNameDict, updateMatchDict, - onlyCaseSensitive, stopOnFirstLine, parserDict, parserOptions): + onlyCaseSensitive, stopOnFirstLine, parserDict, parserOptions, + globalDict=None, localDict=None): """ header = False if True, the header titles will be read wrap = False if True, table titles and values are assumed to be wrapped @@ -836,6 +967,19 @@ class ParserBase(object): headerList = None if header is not True, values will be assigned to titles in the headerList in order as seen in line """ + record = None + replay = None + replayCount = None + if globalDict is not None: + record = globalDict["record"] + replay = globalDict["replay"] + replayCount = globalDict["replayCount"] + if record is None: + record = False + if replayCount is None: + replayCount = 0 + if onlyCaseSensitive is None: + onlyCaseSensitive = True #print("PRINTING: table_store lastLine:",lastLine) tableStartRe = None if "tablestartsat" in parserOptions: @@ -858,7 +1002,7 @@ class ParserBase(object): if tableEndRe.findall(lastLine): stopOnMatch = True if stopOnMatch: - return True, parserDict + return True, parserDict, globalDict, localDict else: # Check parser options to determine # the table properties @@ -875,7 +1019,11 @@ class ParserBase(object): parserOptions["header"] is not False) else False if "headersave" in parserOptions.keys(): if parserOptions["headersave"]: - headersave = getattr(self, parserOptions["headersave"]) + try: + headersave = getattr(self, parserOptions["headersave"]) + except(AttributeError): + headersave = {} + setattr(self, parserOptions["headersave"], headersave) else: headersave = None if "headerList" in parserOptions.keys(): @@ -964,6 +1112,9 @@ class ParserBase(object): # Search for dictionary keys if vlist: for cName, key in getDict_MetaStrInDict(mNameDict).items(): + if onlyCaseSensitive is not True: + cName=cName.upper() + hlist=[h.upper() for h in hlist] if cName in hlist: v=vlist[hlist.index(cName)] #print("PRINTING table_parser save:",cName,v) @@ -1037,11 +1188,25 @@ class ParserBase(object): setattr(self, matchNameDict, mNameDict) if(headersave is not None and anythingmatched is True): setattr(self, parserOptions["headersave"], headersave) - return False, parserDict + return False, parserDict, globalDict, localDict def table_parser(self, parser, stopOnMatchStr, quitOnMatchStr, metaNameStart, matchNameList, matchNameDict, updateMatchDict, onlyCaseSensitive, stopOnFirstLine, parserOptions, - entryline=None, parserID=None, parsername=None): + entryline=None, parserID=None, parsername=None, globalDict=None, localDict=None, parent=None): + record = None + replay = None + replayCount = None + if parent is None: + parent = 0 + rank = parent + 1 + if globalDict is not None: + record = globalDict["record"] + replay = globalDict["replay"] + replayCount = globalDict["replayCount"] + if record is None: + record = False + if replayCount is None: + replayCount = 0 if entryline is None: lastLine = parser.fIn.fInLine else: @@ -1053,8 +1218,9 @@ class ParserBase(object): "numStoredLines" : 0, "parserID" : parserID, "parserSuccess" : False, + "parent" : parent, + "rank" : rank } - #print("PRINTING: table_parser:",parsername,lastLine) updateLastLine = False parsercontinue = True parsercntrlattr = None @@ -1092,39 +1258,69 @@ class ParserBase(object): quitOnMatchRe = None if quitOnMatchStr is not None: quitOnMatchRe = re.compile(quitOnMatchStr) - rtn, parserDict = self.table_store(parser, lastLine, + rtn, parserDict, globalDict, localDict = self.table_store(parser, lastLine, stopOnMatchRe, quitOnMatchRe, metaNameStart, matchNameList, matchNameDict, updateMatchDict, onlyCaseSensitive, stopOnFirstLine, - parserDict, parserOptions) + parserDict, parserOptions, globalDict, localDict) if rtn is not True: while True: - lastLine = self.peekline(parser) + if record and replayCount>0: + lastLine = self.peekrecord() + else: + lastLine = self.peekline(parser) parserDict.update({"firstLine" : parserDict["firstLine"] + 1}) if not lastLine: break else: - # Matched with stopOnMatch. Discarding the line and return SimpleMatcher context. - # Can this line be discarded since it is the end of line for input control - # variables or end of namelist ? - rtn, parserDict = self.table_store(parser, lastLine, + # Matched with stopOnMatch. Discarding the line and + # return SimpleMatcher context. Can this line be + # discarded since it is the end of line for input + # control variables or end of namelist ? + rtn, parserDict, globalDict, localDict = self.table_store(parser, lastLine, stopOnMatchRe, quitOnMatchRe, metaNameStart, matchNameList, matchNameDict, updateMatchDict, onlyCaseSensitive, stopOnFirstLine, - parserDict, parserOptions) + parserDict, parserOptions, globalDict, localDict) if rtn: if updateLastLine: - lastLine = parser.fIn.readline() + # Playing from record? + if record and replayCount>0: + lastLine = self.recordList.readline() + else: + lastLine = parser.fIn.readline() + # If not replaying, are we recording? + if record: + self.recordList.write(lastLine) break else: - lastLine = parser.fIn.readline() - return lastLine, parserDict["parserSuccess"] + # Playing from record? + if record and replayCount>0: + lastLine = self.recordList.readline() + else: + lastLine = parser.fIn.readline() + # If not replaying, are we recording? + if record: + self.recordList.write(lastLine) + return lastLine, parserDict["parserSuccess"], globalDict, localDict def namelist_store(self, parser, lastLine, stopOnMatchRe, quitOnMatchRe, metaNameStart, matchNameList, matchNameDict, updateMatchDict, - onlyCaseSensitive, stopOnFirstLine, parserDict, parserOptions): + onlyCaseSensitive, stopOnFirstLine, parserDict, parserOptions, + globalDict=None, localDict=None): + record = None + replay = None + replayCount = None + if globalDict is not None: + record = globalDict["record"] + replay = globalDict["replay"] + replayCount = globalDict["replayCount"] + if record is None: + record = False + if replayCount is None: + replayCount = 0 stopOnMatch = False if stopOnMatchRe.findall(lastLine): stopOnMatch = True @@ -1137,7 +1333,7 @@ class ParserBase(object): if quitOnMatchRe.findall(lastLine): stopOnMatch = True if stopOnMatch: - return True, parserDict + return True, parserDict, globalDict, localDict else: # If there is at least one namelist in the line, # search for all others in the dictionary. @@ -1148,11 +1344,38 @@ class ParserBase(object): mNameDict = getattr(self, matchNameDict) for cName, key in getDict_MetaStrInDict(mNameDict).items(): if onlyCaseSensitive is not True: + c2Name = cName.upper() + c3Name = cName.lower() cText = "\s*%s|^%s|,%s" % (cName, cName, cName) - cText = cText + "|\s*%s|^%s|,%s" % (cName.upper(), cName.upper(), cName.upper()) - cText = cText + "|\s*%s|^%s|,%s" % (cName.lower(), cName.lower(), cName.lower()) + cText = cText + "|\s*%s|^%s|,%s" % (c2Name, c2Name, c2Name) + cText = cText + "|\s*%s|^%s|,%s" % (c3Name, c3Name, c3Name) else: cText = "\s*%s|^%s|,%s" % (cName, cName, cName) + if mNameDict[key].matchFirst: + matchFirst = int(mNameDict[key].matchFirst) + for numletters in range(matchFirst,len(cName)): + n1Name = cName[0:numletters] + n2Name = n1Name.upper() + n3Name = n1Name.lower() + cText = cText + "|\s*%s|^%s|,%s" % (n1Name, n1Name, n1Name) + cText = cText + "|\s*%s|^%s|,%s" % (n2Name, n2Name, n2Name) + cText = cText + "|\s*%s|^%s|,%s" % (n3Name, n3Name, n3Name) + if mNameDict[key].appendMatchesList: + appendMatchesList = mNameDict[key].appendMatchesList + else: + appendMatchesList = None + if mNameDict[key].appendMatchesUntil: + appendMatchesUntil = mNameDict[key].appendMatchesUntil + else: + appendMatchesUntil = None + if mNameDict[key].appendMaxLines: + appendMaxLines = mNameDict[key].appendMaxLines + else: + appendMaxLines = None + if mNameDict[key].matchAlso: + matchAlso = mNameDict[key].matchAlso + else: + matchAlso = None if mNameDict[key].matchWith: matchWith = mNameDict[key].matchWith else: @@ -1176,37 +1399,11 @@ class ParserBase(object): appendToList = getattr(self,mNameDict[key].appendToList) else: appendToList = None - if 'EOL' in matchWith: - matchThisParsy = re.compile(r"(?:%s)\s*(?:\s|=|:)\s*(?:'|\")?" - "(?P<%s>.*)(?:'|\")?\s*,?" - % (cText, key)) - elif 'UD' in matchWith: - delimeter = matchWith.replace('UD', '') - matchThisParsy = re.compile(r"(?:%s)\s*(?:\s|=|:)\s*(?:'|\")?" - "(?P<%s>[\-+0-9.a-zA-Z:]+)" - "(?:'|\")?\s*[%s]" - % (cText, key, delimeter)) - elif 'BOL' in matchWith: - matchThisParsy = re.compile(r"(?:'|\")?(?P<%s>.*)(?:'|\")?" - "\s*(?:%s)\s*" - % (key, cText)) - elif 'PW' in matchWith: - matchThisParsy = re.compile(r"(?:'|\")?(?P<%s>[\-+0-9.a-zA-Z:]+)" - "(?:'|\")?\s*(?:%s)\s*" - % (key, cText)) - elif 'FD' in matchWith: - delimeter = matchWith.replace('FD', '') - matchThisParsy = re.compile(r"[%s]\s*(?:'|\")?" - "(?P<%s>[\-+0-9.a-zA-Z:]+)" - "(?:'|\")?\s*(?:%s)\s*" - % (delimeter, key, cText)) - else: # Default matchWith 'NW' - matchThisParsy = re.compile(r"(?:%s)\s*(?:\s|=|:)\s*(?:'|\"|{|\[|\()?" - "(?P<%s>[\S]+)" - "(?:'|\"|}|\]|\))?\s*" - % (cText, key)) - #"(?P<%s>[\-+0-9.a-zA-Z:_/;]+)" + matchThisParsy = namelist_matcher(cText, key, matchWith) reDict={key:value for value in matchThisParsy.findall(lastLine)} + if matchAlso: + if matchAlso not in lastLine: + reDict=None if reDict: for k,v in reDict.items(): if k == key: @@ -1255,7 +1452,7 @@ class ParserBase(object): for sec in cntrlsec: secDict.update({sec:True}) else: - addname = "table_store" + addname = "namelist_store" if parserDict["parserID"] is not None: addname = addname + str(parserDict["parserID"]) secDict.update({addname:True}) @@ -1269,19 +1466,35 @@ class ParserBase(object): for sec in cntrlsec: secDict.update({sec:True}) else: - addname = "table_store" + addname = "namelist_store" if parserDict["parserID"] is not None: addname = addname + str(parserDict["parserID"]) secDict.update({addname:True}) setattr(self,parserOptions["controlsave"],secDict) + if "controldict" in parserOptions: + setattr(self,parserOptions["controldict"],cntrlDict) if(mNameDict is not None and updateMatchDict): setattr(self, matchNameDict, mNameDict) - return False, parserDict + return False, parserDict, globalDict, localDict def namelist_parser(self, parser, stopOnMatchStr, quitOnMatchStr, metaNameStart, matchNameList, matchNameDict, updateMatchDict, onlyCaseSensitive, stopOnFirstLine, parserOptions, - entryline=None, parserID=None, parsername=None): + entryline=None, parserID=None, parsername=None, globalDict=None, localDict=None, parent=None): + record = None + replay = None + replayCount = None + if parent is None: + parent = 0 + rank = parent + 1 + if globalDict is not None: + record = globalDict["record"] + replay = globalDict["replay"] + replayCount = globalDict["replayCount"] + if record is None: + record = False + if replayCount is None: + replayCount = 0 if entryline is None: lastLine = parser.fIn.fInLine else: @@ -1292,26 +1505,41 @@ class ParserBase(object): "storedLines" : lastLine, "parserID" : parserID, "parserSuccess" : False, + "parent" : parent, + "rank" : rank } self.firstLine = 0 updateLastLine = False + resetMatchNameDict = False # Check the captured line has Fortran namelist variables and store them. # Continue search and store until the line matches with stopOnMatch. if "movetostopline" in parserOptions: updateLastLine = parserOptions["movetostopline"] + if "resetNameDict" in parserOptions: + resetMatchNameDict = parserOptions["resetNameDict"] + if resetMatchNameDict is True: + mNameDict = getattr(self, matchNameDict) + if mNameDict is not None: + for key, value in mNameDict.items(): + mNameDict[key].value=None + mNameDict[key].activeInfo=False + setattr(self, matchNameDict, mNameDict) stopOnMatchRe = re.compile(stopOnMatchStr) quitOnMatchRe = None if quitOnMatchStr is not None: quitOnMatchRe = re.compile(quitOnMatchStr) - rtn, parserDict = self.namelist_store(parser, lastLine, + rtn, parserDict, globalDict, localDict = self.namelist_store(parser, lastLine, stopOnMatchRe, quitOnMatchRe, metaNameStart, matchNameList, matchNameDict, updateMatchDict, onlyCaseSensitive, stopOnFirstLine, - parserDict, parserOptions) + parserDict, parserOptions, globalDict, localDict) if rtn is not True: while True: - lastLine = self.peekline(parser) + if record and replayCount>0: + lastLine = self.peekrecord() + else: + lastLine = self.peekline(parser) self.firstLine += 1 if not lastLine: break @@ -1319,19 +1547,33 @@ class ParserBase(object): # Matched with stopOnMatch. Discarding the line and return SimpleMatcher context. # Can this line be discarded since it is the end of line for input control # variables or end of namelist ? - rtn, parserDict = self.namelist_store(parser, lastLine, + rtn, parserDict, globalDict, localDict = self.namelist_store(parser, lastLine, stopOnMatchRe, quitOnMatchRe, metaNameStart, matchNameList, matchNameDict, updateMatchDict, onlyCaseSensitive, stopOnFirstLine, - parserDict, parserOptions) + parserDict, parserOptions, globalDict, localDict) if rtn: if updateLastLine: - lastLine = parser.fIn.readline() + # Playing from record? + if record and replayCount>0: + lastLine = self.recordList.readline() + else: + lastLine = parser.fIn.readline() + # If not replaying, are we recording? + if record: + self.recordList.write(lastLine) break else: - lastLine = parser.fIn.readline() - return lastLine, parserDict["parserSuccess"] + # Playing from record? + if record and replayCount>0: + lastLine = self.recordList.readline() + else: + lastLine = parser.fIn.readline() + # If not replaying, are we recording? + if record: + self.recordList.write(lastLine) + return lastLine, parserDict["parserSuccess"], globalDict, localDict def check_namelist_store(self, parser, lastLine, stopOnMatchRe, quitOnMatchRe, metaNameStart, matchNameList, matchNameDict, onlyCaseSensitive, stopOnFirstLine, parserDict): @@ -1356,12 +1598,15 @@ class ParserBase(object): lastLine = ' = '.join([ "%s" % str(line) for line in zip(lastLine, newLine)]) for cName, key in getDict_MetaStrInDict(matchNameDict).items(): reDict={key:value for value in - re.compile(r"(?:\s%s|^%s|,%s)\s*=\s*(?:'|\")?(?P<%s>[\-+0-9.a-zA-Z:]+)(?:'|\")?\s*,?" + re.compile(r"(?:\s%s|^%s|,%s)\s*=\s*(?:'|\")?" + "(?P<%s>[\-+0-9.a-zA-Z:]+)(?:'|\")?\s*,?" % (cName, cName, cName, key)).findall(lastLine)} if onlyCaseSensitive is not True: reDict.update({key:value for value in - re.compile(r"(?:\s%s|^%s|,%s)\s*=\s*(?:'|\")?(?P<%s>[\-+0-9.a-zA-Z:]+)(?:'|\")?\s*,?" - % (cName.upper(), cName.upper(), cName.upper(), key)).findall(lastLine)}) + re.compile(r"(?:\s%s|^%s|,%s)\s*=\s*(?:'|\")?" + "(?P<%s>[\-+0-9.a-zA-Z:]+)(?:'|\")?\s*,?" + % (cName.upper(), cName.upper(), + cName.upper(), key)).findall(lastLine)}) if reDict: for k,v in reDict.items(): if k == key: @@ -1485,6 +1730,46 @@ class ParserBase(object): else: lastLine = parser.fIn.readline() + def adHoc_find_textfile_store(self, parser, + commandLineMatchStr, commandLineFunction): + lastLine = parser.fIn.fInLine + parserDict = { + "firstLine" : 0, + "storedLines" : lastLine, + } + parserDict["firstLine"] = 0 + # Check the captured line has Fortran namelist variables and store them. + # Continue search and store until the line matches with stopOnMatch. + stopOnMatchRe = re.compile(stopOnMatchStr) + commandLineMatchRe = re.compile(commandLineMatchStr) + quitOnMatchRe = None + if quitOnMatchStr is not None: + quitOnMatchRe = re.compile(quitOnMatchStr) + if self.check_commandline_args(parser, lastLine, + stopOnMatchRe, quitOnMatchRe, + metaNameStart, matchNameList, + matchNameDict, onlyCaseSensitive, + stopOnFirstLine, parserDict, + commandLineMatchRe, commandLineFunction) is not True: + while True: + lastLine = self.peekline(parser) + parserDict.update({"firstLine" : parserDict["firstLine"] + 1}) + if not lastLine: + break + else: + # Matched with stopOnMatch. Discarding the line and return SimpleMatcher context. + # Can this line be discarded since it is the end of line for input control + # variables or end of namelist ? + if self.check_commandline_args(parser, lastLine, + stopOnMatchRe, quitOnMatchRe, + metaNameStart, matchNameList, + matchNameDict, onlyCaseSensitive, + stopOnFirstLine, parserDict, + commandLineMatchRe, commandLineFunction): + break + else: + lastLine = parser.fIn.readline() + def check_text_store(self, parser, lastLine, stopOnMatchRe, quitOnMatchRe, metaNameStart, metaNameStore, matchNameList, matchNameDict, onlyCaseSensitive, stopOnFirstLine, storeFirstLine, storeStopQuitLine, parserDict): @@ -1585,23 +1870,27 @@ class ParserBase(object): onQuitRunFunction(parser) def check_subparsers(self, parser, lastLine, stopOnMatchRe, quitOnMatchRe, subParsers, - ordered, parserDict, onStartRunFunction, onQuitRunFunction, onlySubParsersReadLine, - maxcycle=None): + ordered, parserDict, secDict, onStartRunFunction, onQuitRunFunction, + onlySubParsersReadLine, maxcycle=None, parent=None): if maxcycle is None: - maxcycle=50000 + maxcycle=1000000 cyclenum = 0 + if parent is None: + parent = 0 + rank = parent + waitFirstCycle = False + if "waitFirstCycle" in parserDict: + waitFirstCycle = parserDict["waitFirstCycle"] + if "peekedline" in secDict: + secDict.update({"peekedline":None}) while True: stopOnMatch = False - if stopOnMatchRe.findall(lastLine): - stopOnMatch = True - if self.firstLine==0: - if stopOnFirstLine: - stopOnMatch = True - else: - stopOnMatch = False - if quitOnMatchRe is not None: - if quitOnMatchRe.findall(lastLine): + if waitFirstCycle is not True: + if stopOnMatchRe.findall(lastLine): stopOnMatch = True + if quitOnMatchRe is not None: + if quitOnMatchRe.findall(lastLine): + stopOnMatch = True if subParsers: if ordered: for parser_id in range(len(subParsers)): @@ -1634,7 +1923,9 @@ class ParserBase(object): if "parsername" in subParsers[parser_id]: parserName=subParsers[parser_id]["parsername"] currentParser = getattr(self, subParsers[parser_id]["parser"]) - lastLine, parserSuccess = currentParser(parser, + parserDict.update({"matchStr":subParsers[parser_id]["startReStr"]}) + parserDict.update({"matchLine":lastLine}) + lastLine, parserSuccess, parserDict, secDict = currentParser(parser, subParsers[parser_id]["stopOnMatchStr"], subParsers[parser_id]["quitOnMatchStr"], subParsers[parser_id]["metaNameStart"], @@ -1645,7 +1936,10 @@ class ParserBase(object): subParsers[parser_id]["stopOnFirstLine"], subParsers[parser_id]["parserOptions"], entryline=lastLine, parserID=parser_id, - parsername=parserName + parsername=parserName, + globalDict=parserDict, + localDict=secDict, + parent=rank ) parserDict["parserCheckList"][parser_id] = True if "parsername" in subParsers[parser_id]: @@ -1685,7 +1979,9 @@ class ParserBase(object): parserName = None if "parsername" in subParser: parserName=subParser["parsername"] - lastLine, parserSuccess = currentParser(parser, + parserDict.update({"matchStr":subParser["startReStr"]}) + parserDict.update({"matchLine":lastLine}) + lastLine, parserSuccess, parserDict, secDict = currentParser(parser, subParser["stopOnMatchStr"], subParser["quitOnMatchStr"], subParser["metaNameStart"], @@ -1696,7 +1992,10 @@ class ParserBase(object): subParser["stopOnFirstLine"], subParser["parserOptions"], entryline=lastLine, parserID=parser_num, - parsername=parserName + parsername=parserName, + globalDict=parserDict, + localDict=secDict, + parent=rank ) if "parsername" in subParser: parserDict["parserNameList"].update({ @@ -1705,6 +2004,14 @@ class ParserBase(object): parser_num += 1 cyclenum += 1 + if waitFirstCycle is True: + if stopOnMatchRe.findall(lastLine): + #print("PRINTING: check_subparser Rank waitFirstCycle StopMatched:",rank,stopOnMatchRe.findall(lastLine),lastLine) + stopOnMatch = True + if quitOnMatchRe is not None: + if quitOnMatchRe.findall(lastLine): + #print("PRINTING: check_subparser Rank QuitMatched:",quitOnMatchRe.findall(lastLine),lastLine) + stopOnMatch = True if(onlySubParsersReadLine is None or onlySubParsersReadLine is False): break @@ -1718,33 +2025,288 @@ class ParserBase(object): break if stopOnMatch: - return True, lastLine, parserDict + return True, lastLine, parserDict, secDict + else: + return False, lastLine, parserDict, secDict + + def subparser_caller(self, parser, stopOnMatchStr, quitOnMatchStr, metaNameStart, + matchNameList, matchNameDict, updateMatchDict, onlyCaseSensitive, stopOnFirstLine, + parserOptions, entryline=None, parserID=None, parsername=None, globalDict=None, + localDict=None, parent=None): + record = None + replay = None + replayCount = None + if parent is None: + parent = 0 + rank = parent + 1 + if globalDict is not None: + record = globalDict["record"] + replay = globalDict["replay"] + replayCount = globalDict["replayCount"] + if record is None: + record = False + if replayCount is None: + replayCount = 0 + if entryline is None: + lastLine = parser.fIn.fInLine else: - return False, lastLine, parserDict + lastLine = entryline + #print("PRINTING: subparser_caller,rank:",rank,lastLine) + parserDict = { + "firstLine" : 0, + "storedLines" : '', + "numStoredLines" : 0, + "parserID" : parserID, + "parent" : parent, + "rank" : rank + } + #parserDict.update({"firstLine" : parserDict["firstLine"] + 1}) + stopOnMatchRe = None + quitOnMatchRe = None + if stopOnMatchStr is not None: + stopOnMatchRe = re.compile(stopOnMatchStr) + if quitOnMatchStr is not None: + quitOnMatchRe = re.compile(quitOnMatchStr) + parserSuccess = False + ordered=False + onStartRunFunction=None + onQuitRunFunction=None + onlySubParsersReadLine=False + lookupdict = None + lookupvals = None + subparserMapper = None + subparserSection = None + sectionControlAttr = None + matchFirstLetters = None + sectionOnDict = {} + if globalDict is not None: + if "sectionOnDict" in globalDict: + sectionOnDict = globalDict["sectionOnDict"] + if "onStartRunFunction" in parserOptions: + onStartRunFunction = parserOptions["onStartRunFunction"] + if "onQuitRunFunction" in parserOptions: + onQuitRunFunction = parserOptions["onQuitRunFunction"] + if "ordered" in parserOptions: + ordered = parserOptions["ordered"] + if "onlySubParsersReadLine" in parserOptions: + onlySubParsersReadLine = parserOptions["onlySubParsersReadLine"] + if "subparserMapper" in parserOptions: + subparserMapper = parserOptions["subparserMapper"] + if "subparserSection" in parserOptions: + subparserSection = parserOptions["subparserSection"] + if "matchFirstLetters" in parserOptions: + matchFirstLetters = parserOptions["matchFirstLetters"] + if "waitFirstCycle" in parserOptions: + if globalDict is not None: + globalDict.update({"waitFirstCycle":parserOptions["waitFirstCycle"]}) + + if onStartRunFunction is not None: + onStartRunFunction(parser) + + sO = open_section + #supB = parser.backend.superBackend + supB = parser.backend + sectionAct = None + if subparserMapper is not None: + for matchStr, listname in subparserMapper.items(): + cText = matchStr + if onlyCaseSensitive is not True: + c2Name = matchStr.upper() + c3Name = matchStr.lower() + cText = "%s|%s|%s" % (matchStr, c2Name, c3Name) + if matchFirstLetters is not None: + matchFirst = int(matchFirstLetters) + for numletters in range(matchFirst,len(matchStr)): + n1Name = cName[0:numletters] + n2Name = n1Name.upper() + n3Name = n1Name.lower() + cText = cText + "|%s|%s|%s" % (n1Name, n2Name, n3Name) + matchRe = re.compile(cText) + if matchRe.findall(lastLine): + subParsers = getattr(self,listname) + parserSuccess = True + if subparserSection is not None: + if matchStr in subparserSection: + sectionAct = subparserSection[matchStr] + if sectionAct is not None: + sectionOpenClose = sectionAct[1] + if not isinstance(sectionOpenClose, str): + sectionOpenClose = '' + sectionOn = getattr(self,sectionAct[2]) + sectionGIndex = getattr(self,sectionAct[3]) + if sectionAct[0] not in sectionOnDict: + sectionMatchCount = 0 + sectionOnDict.update({sectionAct[0]:[sectionOn,sectionMatchCount]}) + else: + sectionMatchCount = sectionOnDict[sectionAct[0]][1] + 1 + sectionOnDict.update({ + sectionAct[0]:[sectionOnDict[sectionAct[0]][0],sectionMatchCount]}) + #print("PRINTING: subparser_caller, section Count:",sectionMatchCount) + #print("PRINTING: subparser_caller, section On:",sectionOn) + #print("PRINTING: subparser_caller, section First On:",sectionOnDict[sectionAct[0]][0]) + if sectionOn is False: + if "AUTO" in sectionOpenClose.upper(): + if sectionOnDict[sectionAct[0]][0] is False: + with sO(supB, sectionAct[0]): + rtn, lastLine, globalDict, parserDict = self.check_subparsers( + parser, lastLine, stopOnMatchRe, quitOnMatchRe, + subParsers, ordered, globalDict, parserDict, + onStartRunFunction, onQuitRunFunction, + onlySubParsersReadLine, parent=rank) + else: + sectionGIndex = supB.openSection(sectionAct[0]) + setattr(self,sectionAct[3], sectionGIndex) + rtn, lastLine, globalDict, parserDict = self.check_subparsers( + parser, lastLine, stopOnMatchRe, quitOnMatchRe, + subParsers, ordered, globalDict, parserDict, + onStartRunFunction, onQuitRunFunction, + onlySubParsersReadLine, parent=rank) + elif "OPEN" in sectionOpenClose.upper(): + sectionGIndex = supB.openSection(sectionAct[0]) + setattr(self,sectionAct[3], sectionGIndex) + rtn, lastLine, globalDict, parserDict = self.check_subparsers( + parser, lastLine, stopOnMatchRe, quitOnMatchRe, + subParsers, ordered, globalDict, parserDict, + onStartRunFunction, onQuitRunFunction, + onlySubParsersReadLine, parent=rank) + else: + rtn, lastLine, globalDict, parserDict = self.check_subparsers( + parser, lastLine, stopOnMatchRe, quitOnMatchRe, + subParsers, ordered, globalDict, parserDict, + onStartRunFunction, onQuitRunFunction, + onlySubParsersReadLine, parent=rank) + else: + if "AUTO" in sectionOpenClose.upper(): + if sectionMatchCount>0: + if sectionOnDict[sectionAct[0]][0] is True: + supB.closeSection(sectionAct[0], sectionGIndex) + sectionGIndex = supB.openSection(sectionAct[0]) + setattr(self,sectionAct[3], sectionGIndex) + rtn, lastLine, globalDict, parserDict = self.check_subparsers( + parser, lastLine, stopOnMatchRe, quitOnMatchRe, + subParsers, ordered, globalDict, parserDict, + onStartRunFunction, onQuitRunFunction, + onlySubParsersReadLine, parent=rank) + if "CLOSE" in sectionOpenClose.upper(): + supB.closeSection(sectionAct[0], sectionGIndex) + if "AUTO" in sectionOpenClose.upper(): + if sectionOnDict[sectionAct[0]][0] is False: + supB.closeSection(sectionAct[0], sectionGIndex) + + # Infuse the correct gIndex to SimpleMatcher parsers to prevent + # them fail before leaving the SmartParser. + # Check if GIndex changed so far for the selected section + # and update it if there is an open section left. + for SMid, SMcontext in enumerate(self.parser.context): + if sectionAct[0] in SMcontext.sections: + self.parser.context[SMid].sections[sectionAct[0]]=sectionGIndex + else: + rtn, lastLine, globalDict, parserDict = self.check_subparsers( + parser, lastLine, stopOnMatchRe, quitOnMatchRe, + subParsers, ordered, globalDict, parserDict, + onStartRunFunction, onQuitRunFunction, + onlySubParsersReadLine, parent=rank) + parserDict.update({"firstLine" : parserDict["firstLine"] + 1}) + if rtn: + break + else: + if onlySubParsersReadLine is False: + if record and replayCount>0: + lastLine = self.recordList.readline() + else: + lastLine = parser.fIn.readline() + if record: + self.recordList.write(lastLine) + +# stopOnMatch = False +# if stopOnMatchRe.findall(lastLine): +# stopOnMatch = True +# if parserDict["firstLine"] == 0: +# if stopOnFirstLine: +# stopOnMatch = True +# else: +# stopOnMatch = False +# if quitOnMatchRe is not None: +# if quitOnMatchRe.findall(lastLine): +# stopOnMatch = True + + if globalDict is not None: + globalDict.update({"sectionOnDict":sectionOnDict}) + - def adHoc_takingover_parsing(self, parser, stopOnMatchStr, quitOnMatchStr, subParsers=None, - ordered=None, onStartRunFunction=None, onQuitRunFunction=None, onlySubParsersReadLine=False): + if onQuitRunFunction is not None: + onQuitRunFunction(parser) + + return lastLine, parserSuccess, globalDict, localDict + + def adHoc_takingover_parsing(self, parser, stopOnMatchStr, quitOnMatchStr, stopControl=None, + record=False, replay=None, parseOnlyRecorded=False, subParsers=None, ordered=None, + onStartRunFunction=None, onQuitRunFunction=None, onlySubParsersReadLine=False, + onStartReplayRunFunction=None, onQuitReplayRunFunction=None, parent=None): if onStartRunFunction is not None: onStartRunFunction(parser) if ordered is None: ordered = False + if stopControl is None: + stopCntrl = True + else: + try: + stopCntrl = getattr(self,stopControl) + except(TypeError,ValueError,AttributeError): + stopCntrl = True + if record is not None or record is not False: + record = True + if replay is not None: + try: + replay = int(replay) + except(ValueError,TypeError): + replay = None lastLine = parser.fIn.fInLine + if parent is None: + parent = 0 + rank = parent parserDict = { "firstLine" : 0, "parserCheckList" : [False for i in range(len(subParsers))], "parserNameList" : {}, + "stopControl" : stopCntrl, + "record" : record, + "replay" : replay, + "replayCount" : 0, + "parent" : parent } + secDict = {} + secDict.update(parserDict) + # If true, we are recording the output in + # every accurance of parser.fIn.readline + #if record: + # self.recordList.write(lastLine) # parse lines until stopOnMatchStr or quitOnMatchStr is encountered. - stopOnMatchRe = re.compile(stopOnMatchStr) + if onStartRunFunction is not None: + onStartRunFunction(parser) + stopOnMatchRe = None + if stopOnMatchStr is not None: + stopOnMatchRe = re.compile(stopOnMatchStr) quitOnMatchRe = None if quitOnMatchStr is not None: quitOnMatchRe = re.compile(quitOnMatchStr) - # Check first line to store and stop/quit - rtn, lastLine, parserDict = self.check_subparsers(parser, lastLine, - stopOnMatchRe, quitOnMatchRe, - subParsers, ordered, parserDict, - onStartRunFunction, onQuitRunFunction, - onlySubParsersReadLine) + if parseOnlyRecorded is False: + # Check first line to store and stop/quit + rtn, lastLine, parserDict, secDict = self.check_subparsers( + parser, lastLine, stopOnMatchRe, quitOnMatchRe, + subParsers, ordered, parserDict, secDict, + onStartRunFunction, onQuitRunFunction, + onlySubParsersReadLine, parent=rank) + else: + rtn = False + if stopOnMatchRe is not None: + if stopOnMatchRe.findall(lastLine): + rtn = True + if quitOnMatchRe is not None: + if quitOnMatchRe.findall(lastLine): + rtn = True + if secDict is not None: + parserDict.update(secDict) if rtn is not True: # Check all other lines to store and stop/quit while True: @@ -1753,18 +2315,122 @@ class ParserBase(object): if not lastLine: break else: - # Matched with stopOnMatch. The line will be procesed and - # parsing will continue by returning SimpleMatcher context. - rtn, lastLine, parserDict = self.check_subparsers(parser, lastLine, - stopOnMatchRe, quitOnMatchRe, - subParsers, ordered, parserDict, - onStartRunFunction, onQuitRunFunction, - onlySubParsersReadLine) + if parseOnlyRecorded is False: + # Matched with stopOnMatch. The line will be procesed and + # parsing will continue by returning SimpleMatcher context. + rtn, lastLine, parserDict, secDict = self.check_subparsers( + parser, lastLine, stopOnMatchRe, quitOnMatchRe, + subParsers, ordered, parserDict, secDict, + onStartRunFunction, onQuitRunFunction, + onlySubParsersReadLine, parent=rank) + else: + rtn = False + if stopOnMatchRe is not None: + if stopOnMatchRe.findall(lastLine): + rtn = True + if quitOnMatchRe is not None: + if quitOnMatchRe.findall(lastLine): + rtn = True + if secDict is not None: + parserDict.update(secDict) if rtn: + if record: + self.recordList.write(lastLine) break else: if onlySubParsersReadLine is False: lastLine = parser.fIn.readline() + if record: + self.recordList.write(lastLine) + else: + if parseOnlyRecorded is True: + lastLine = parser.fIn.readline() + if record: + self.recordList.write(lastLine) + if record is True and replay is not None: + if replay < 0: + replayCount = 1 + while stopCntrl is not True: + if onStartReplayRunFunction is not None: + onStartReplayRunFunction[replayCount](parser) + self.recordList.seek(0) + lastLine = self.peekrecord() + parserDict["replayCount"]=replayCount + rtn, lastLine, parserDict, secDict = self.check_subparsers( + parser, lastLine, stopOnMatchRe, quitOnMatchRe, + subParsers, ordered, parserDict, secDict, + onStartRunFunction, onQuitRunFunction, + onlySubParsersReadLine, parent=rank) + if secDict is not None: + parserDict.update(secDict) + if rtn is not True: + while True: + lastLine = self.peekrecord() + parserDict.update({"firstLine" : parserDict["firstLine"] + 1}) + if not lastLine: + break + else: + rtn, lastLine, parserDict, secDict = self.check_subparsers( + parser, lastLine, stopOnMatchRe, quitOnMatchRe, + subParsers, ordered, parserDict, secDict, + onStartRunFunction, onQuitRunFunction, + onlySubParsersReadLine, parent=rank) + if secDict is not None: + parserDict.update(secDict) + if rtn: + break + else: + if onlySubParsersReadLine is False: + lastLine = self.recordList.readline() + if onQuitReplayRunFunction is not None: + onQuitReplayRunFunction[replayCount](parser) + replayCount+=1 + try: + stopCntrl = getattr(self,stopControl) + except(TypeError,ValueError,AttributeError): + stopCntrl = True + elif replay > 0: + replayCount = 1 + for repeat in range(replay): + # dirty function caller + if onStartReplayRunFunction is not None: + if replayCount in onStartReplayRunFunction: + if hasattr(self,onStartReplayRunFunction[replayCount]): + func = getattr(self,onStartReplayRunFunction[replayCount]) + outfunc = func(parser) + self.recordList.seek(0) + lastLine = self.peekrecord() + parserDict["replayCount"]=repeat+1 + rtn, lastLine, parserDict, secDict = self.check_subparsers( + parser, lastLine, stopOnMatchRe, quitOnMatchRe, + subParsers, ordered, parserDict, secDict, + onStartRunFunction, onQuitRunFunction, + onlySubParsersReadLine, parent=rank) + if secDict is not None: + parserDict.update(secDict) + if rtn is not True: + while True: + lastLine = self.peekrecord() + parserDict.update({"firstLine" : parserDict["firstLine"] + 1}) + if not lastLine: + break + else: + rtn, lastLine, parserDict, secDict = self.check_subparsers( + parser, lastLine, stopOnMatchRe, quitOnMatchRe, + subParsers, ordered, parserDict, secDict, + onStartRunFunction, onQuitRunFunction, + onlySubParsersReadLine, parent=rank) + if secDict is not None: + parserDict.update(secDict) + if rtn: + break + else: + if onlySubParsersReadLine is False: + lastLine = self.recordList.readline() + if onQuitReplayRunFunction is not None: + onQuitReplayRunFunction[replayCount](parser) + replayCount+=1 + if onQuitRunFunction is not None: onQuitRunFunction(parser)