diff --git a/common/python/nomadcore/md_data_access/MDDataAccess.py b/common/python/nomadcore/md_data_access/MDDataAccess.py index 905aab1ece8aa4fe2d3d25971d49e001ed9ce132..305ff09521f6dd9fb58a0c3b67f80d675f396616 100644 --- a/common/python/nomadcore/md_data_access/MDDataAccess.py +++ b/common/python/nomadcore/md_data_access/MDDataAccess.py @@ -66,7 +66,7 @@ ELEMENTS_MASS_TABLE = { "Zn" : 65.37000, "Zr" : 91.224 } -def get_element_name(atomname): +def get_any_element_name(atomname): elementlist = ELEMENTS_MASS_TABLE.keys() # check if the element is in list # but name is upper- or lower-case @@ -91,6 +91,31 @@ def get_element_name(atomname): pass return None +def get_element_name(atomname): + elementlist = ELEMENTS_MASS_TABLE.keys() + # check if the element is in list + # but name is upper- or lower-case + for name in elementlist: + if atomname == name: + return name + # check if the first two letters define + # element name + if len(atomname)>1: + for name in elementlist: + if atomname[0:2] == name: + return name + if len(atomname)>0: + # check if the first letter defines + # element name + for name in elementlist: + if atomname[0:1] == name: + return name + if len(atomname)<3: + return atomname[0:1] + else: + return get_any_element_name() + return None + def get_dir_base_extension(file_name): """ Splits directory, file base and file extensions @@ -103,75 +128,58 @@ def get_dir_base_extension(file_name): return file_dir, file_base, file_extension def pymConvertTopoDict(topoStor, topoPYM): - if(isinstance(topoPYM, pym.OpenMolfile) or - isinstance(topoPYM, pym.OpenMolfile.topology)): - topo = topoPYM - - def getseg(seg): - regex = re.compile(r"^seg_[0-9]+_b") - if regex.findall(seg.segid): - segname = bytes(re.sub(r"^seg_[0-9]+_b","",seg.segid).replace("'",""), "utf-8").decode("utf-8") - else: - segname = seg.segid - return [seg.ix,segname] - - def getres(res): - regex = re.compile(r"^b'") - if isinstance(res.resname, bytes): - resname = res.resname.decode("utf-8") - else: - resname= res.resname - if regex.findall(resname): - resname = bytes(re.sub(r"^b","",resname).replace("'",""), "utf-8").decode("utf-8") - return [res.ix,resname] - - def get_atomseg(atom): - return [atom.segment.ix,atom.segment.segname.decode('utf-8')] - - def get_atomres(atom): - return [atom.resname.decode('utf-8'),atom.resid] - - def getatom(atom): - atmid = atom.ix - atmname = atom.name.decode('utf-8') - atmtyp = atom.type.decode('utf-8') - atmres = atom.resname.decode('utf-8') - atmresid = atom.resid - atmsegid = atom.segid - atm_unique = atmname + "-" + atmtyp - return [atmid,atm_unique,atmname,atmtyp,atmres,atmresid,atmsegid] + """ Function to convert Pymolfile topology info + to MDDataAccess + + Pymolfile stores structure data in numpy with following fields + (name, type, resname, resid, segid, chain, altloc, insertion, + occupancy, bfactor, mass, charge, radius, atomicnumber) + Ex.: ['N' 'NH3' 'ASP' '48' 'PRO1' 'P' ' ' '' + '0.0' '0.0' '14.007' '-0.3' '0.0' '0'] + """ + topo=None + if isinstance(topoPYM, pym.OpenMolfile): + topo = topoPYM.topology def getatomall(atom): - atmid = atom.ix - atmname = atom.name.decode('utf-8') - atmtyp = atom.type.decode('utf-8') - if hasattr(atom, "mass"): - atmmass = float(atom.mass) + atmid=0 + atmname = atom[0] + atmtyp = atom[1] + if atom[10]: + atmmass = float(atom[10]) else: atmmass = None - if hasattr(atom, "resid"): - atmresid = atom.resid - atmres = atom.resname.decode('utf-8') + if atom[3]: + atmresid = atom[3] + atmres = atom[2] else: atmresid = None atmres = None - if hasattr(atom, "segid"): - atmsegid = atom.segid + if atom[4]: + atmsegid = atom[4] else: atmsegid = None - if hasattr(atom, "charge"): - atmchrg = float(atom.charge) + if atom[11]: + atmchrg = float(atom[11]) else: atmchrg = None - if hasattr(atom, "radius"): - atmrad = float(atom.radius) + if atom[12]: + atmrad = float(atom[12]) else: atmrad = None - if hasattr(atom, "bfactor"): - atmbfac = float(atom.bfactor) + if atom[9]: + atmbfac = float(atom[9]) else: atmbfac = None - atm_unique = atmname + "-" + atmtyp + atm_unique = '' + if atmname: + atm_unique = atm_unique + atmname + if atmtyp: + atm_unique = atm_unique + "-" + atmtyp + #if atmres: + # atm_unique = atm_unique + "-" + atmres + #if atmsegid: + # atm_unique = atm_unique + "-" + atmsegid return [atmid,atm_unique,atmname,atmtyp,atmres,atmresid,atmsegid,atmmass,atmchrg,atmrad,atmbfac] def checkatomsdiff(atom1,atom2,checklist): @@ -192,63 +200,55 @@ def pymConvertTopoDict(topoStor, topoPYM): return [getatom(atom1),getatom(atom2)] def atompairids(atom1,atom2): - return [getatom(atom1)[0],getatom(atom2)[0]] - - segmentList = [getseg(a)[1] for a in topo.segments] - residueList = [getres(a)[1] for a in topo.residues] - atomList = [getatomall(a) for a in topo.atoms] - #atomAllList = [getatomall(a) for a in topo.atoms] - - types_list = list(set([a[1] for a in atomList])) - atom_name_list = [a[2] for a in atomList] + return [atom1[0],atom2[0]] + + #chainList = [a[5] if a[5] else '' for a in topo.structure] + #segmentList = [a[4] if a[4] else '' for a in topo.structure] + #residList = [a[3] if a[3] else None for a in topo.structure] + #residueList = [a[2] if a[2] else '' for a in topo.structure] + chainList = [a[5] for a in topo.structure] + segmentList = [a[4] for a in topo.structure] + residList = [a[3] for a in topo.structure] + residueList = [a[2] for a in topo.structure] + atomList = topo.structure + atomAllList = [getatomall(a) for a in topo.structure] + count=0 + for i in range(len(atomAllList)): + atomAllList[i][0]=count + count+=1 + + types_list = list(set([a[1] for a in atomAllList])) + atom_name_list = [a[0] for a in topo.structure] #atom_type_list = [a[3] for a in atomList] atom_element_list = [] - for atom in atomList: - try: - guessed_element = mda.topology.guessers.guess_atom_element(atom[2]) - except (TypeError, ValueError, AttributeError): - guessed_element = atom[2] - if guessed_element in ELEMENTS_MASS_TABLE.keys(): - element = guessed_element + for atom in topo.structure: + if atom[1] in ELEMENTS_MASS_TABLE.keys(): + element = atom[1] else: - element = get_element_name(atom[2]) - #if element is None: - # element = get_element_name(atom[3]) + element = get_element_name(atom[1]) if element is None: - element = atom[2] + element = atom[1] atom_element_list.append(element) system_name = '' if segmentList: system_name = system_name + '-'.join(list(set(segmentList))) - elif residueList: + if residueList: system_name = system_name + '-'.join(list(set(residueList))) - attrlist = dir(topo.atoms) - atom_names = [] - atom_types = [] - atom_masses = [] - atom_radiuses = [] - atom_bfactors = [] - atom_charges = [] - if "names" in attrlist: - atom_names = topo.atoms.names - if "types" in attrlist: - atom_types = topo.atoms.types - if "masses" in attrlist: - atom_masses = topo.atoms.masses - if "charges" in attrlist: - atom_charges = topo.atoms.charges - if "radiuses" in attrlist: - atom_radiuses = topo.atoms.radiuses - if "bfactors" in attrlist: - atom_bfactors = topo.atoms.bfactors - #print("Atom Bonds:",MDdata.topohandler.atoms.bonds) - #print("Atom Angles:",MDdata.topohandler.atoms.angles) - #print("Atom Dihedrals:",MDdata.topohandler.atoms.dihedrals) - #print("Atom Impropers:",MDdata.topohandler.atoms.impropers) + atom_all_list = np.asarray(atomAllList) + atom_names = atom_all_list[:,2] + atom_types = atom_all_list[:,3] + atom_masses = atom_all_list[:,7] + atom_charges = atom_all_list[:,8] + atom_radiuses = atom_all_list[:,9] + atom_bfactors = atom_all_list[:,10] + #print("Atom Bonds:",topo.bonds) + #print("Atom Angles:",topo.angles) + #print("Atom Dihedrals:",topo.dihedrals) + #print("Atom Impropers:",topo.impropers) atom_type_dict = {} @@ -265,20 +265,14 @@ def pymConvertTopoDict(topoStor, topoPYM): elm = types_list[ielm] atom_type_dict.update({elm : ielm+1}) typelabelList = [] - for atom in atomList: + for atom in atomAllList: if elm == atom[1]: - try: - guessed_element = mda.topology.guessers.guess_atom_element(atom[2]) - except (TypeError, ValueError, AttributeError): - guessed_element = atom[2] - if guessed_element in ELEMENTS_MASS_TABLE.keys(): - element = guessed_element + if atom[2] in ELEMENTS_MASS_TABLE.keys(): + element = atom[2] else: element = get_element_name(atom[2]) - #if element is None: - # element = get_element_name(atom[3]) if element is None: - element = atom[2] + element = atom[1] elementDict.update({elm : element}) atomnameDict.update({elm : atom[2]}) atomtypesDict.update({elm : atom[3]}) @@ -289,7 +283,7 @@ def pymConvertTopoDict(topoStor, topoPYM): typelabelList.append(atom[0]) atomlabelDict.append(typelabelList) - for atom in atomList: + for atom in atomAllList: atomlabelList.append([atom[0],atom_type_dict[atom[1]]]) massList = list(massDict.values()) @@ -300,30 +294,36 @@ def pymConvertTopoDict(topoStor, topoPYM): chargesList = list(chargeDict.values()) bfactorList = list(bfactorDict.values()) - topd = topo.bonds.topDict - topdk = topd.keys() + topbList = np.column_stack((topo.bonds["from"],topo.bonds["to"])) + topbNames = [] + for pair in topbList: + topbNames.append(atomAllList[pair[0]-1][1] + '-' + atomAllList[pair[1]-1][1]) + + topb=list(set(topbNames)) interNum = 0 interDict = {} interTypeDict = {} - for key in topdk: + for tb in topb: bondList = [] typeList = [] noninter = True - topt = topd[key] - for b in topt: - reslist=atom_respairs(b.atoms[0],b.atoms[1]) - atmlist=atompairs(b.atoms[0],b.atoms[1]) - atmidlist=atompairids(b.atoms[0],b.atoms[1]) - bondList.append(atmidlist) - interDict.update({interNum : bondList}) - if noninter: - noninter = False - typeList.extend(list([ - atom_type_dict[atmlist[0][1]], - atom_type_dict[atmlist[1][1]] - ])) - interTypeDict.update({interNum : typeList}) + bc = 0 + for b in topbList: + topt=atomAllList[b[0]-1][1] + '-' + atomAllList[b[1]-1][1] + if topt == tb: + reslist=[atomAllList[b[0]-1][4],atomAllList[b[1]-1][4]] + atmlist=[atomAllList[b[0]-1],atomAllList[b[1]-1]] + atmidlist=[b[0]-1,b[1]-1] + bondList.append(atmidlist) + interDict.update({interNum : bondList}) + if noninter: + noninter = False + typeList.extend(list([ + atom_type_dict[atmlist[0][1]], + atom_type_dict[atmlist[1][1]] + ])) + interTypeDict.update({interNum : typeList}) interNum += 1 # for ielm in range(len(atom_type_list)-1): @@ -960,6 +960,9 @@ class MDDataAccess(object): self.natoms = None self.topology = None # Main storage for topology data self.trajectory = None # Main storage for trajectory data + self.inputcoords = None + self.outputcoords = None + self.trajtype = None self.forcefield = None # Main storage for force field parameters self.thermostats = None # Main storage for thermodynamical quantities # and properties such as energies, temperatures @@ -971,6 +974,8 @@ class MDDataAccess(object): self.set_defaults() self.init_topo() self.init_traj() + self.init_incoord() + self.init_outcoord() self.init_thermo() def set_defaults(self): @@ -995,6 +1000,26 @@ class MDDataAccess(object): self.trajfile = None # File name of the trajectory file with path if needed self.trajectory = None + def init_incoord(self): + self.incoord_natoms = None + self.incoordformat = None # Format type of trajectory file + self.incoordhandler = None # The object parsing the trajectory file + self.incoordplugin = None # The object parsing the trajectory file + self.incoorditer = None # The object parsing the trajectory file + self.incoordcode = None # To explicitly define the parsing library (pymolfile, mdtraj, ASE ...) for trajectory files + self.incoordfile = None # File name of the trajectory file with path if needed + self.inputpositions = None + + def init_outcoord(self): + self.outcoord_natoms = None + self.outcoordformat = None # Format type of output file + self.outcoordhandler = None # The object parsing the output file + self.outcoordplugin = None # The plugin parsing the output file + self.outcoorditer = None # The iteration at the output file + self.outcoordcode = None # To explicitly define the parsing library (pymolfile, mdtraj, ASE ...) for the files + self.outcoordfile = None # File name of the file with path if needed + self.outputpositions = None + def init_thermo(self): self.thermofile = None # File name of the thermostat file with path if needed self.thermoformat = None # Format type of topology file @@ -1007,6 +1032,8 @@ class MDDataAccess(object): self.set_defaults() self.init_topo() self.init_traj() + self.init_incoord() + self.init_outcoord() self.init_thermo() def set_topo(self, filename, fileformat=None): @@ -1017,6 +1044,14 @@ class MDDataAccess(object): self.trajfile = filename self.trajformat = fileformat + def set_incoord(self, filename, fileformat=None): + self.incoordfile = filename + self.incoordformat = fileformat + + def set_outcoord(self, filename, fileformat=None): + self.outcoordfile = filename + self.outcoordformat = fileformat + def set_thermo(self, filename, fileformat=None): self.thermofile = filename self.thermoformat = fileformat @@ -1027,10 +1062,10 @@ class MDDataAccess(object): if self.topohandler is None: if self.topofile: - self.check_topology_format_support() + self.check_topology_format_support("traj") if self.trajhandler is None: - self.check_trajectory_format_support() + self.check_trajectory_format_support("traj") return self.trajhandler @@ -1044,6 +1079,26 @@ class MDDataAccess(object): return self.topohandler + def load_incoord(self): + """Loads the file handles for coordinates only + """ + + if self.incoordhandler is None: + if self.incoordfile: + self.check_trajectory_format_support("input") + + return self.incoordhandler + + def load_outcoord(self): + """Loads the file handles for coordinates only + """ + + if self.outcoordhandler is None: + if self.outcoordfile: + self.check_trajectory_format_support("output") + + return self.outcoordhandler + def load_thermo(self): """Loads the file handles for topology only """ @@ -1195,35 +1250,53 @@ class MDDataAccess(object): else: return True - def check_trajectory_format_support(self): + def check_trajectory_format_support(self, filetype): """Check if the given format is supported. """ - trajfilename = os.path.basename(self.trajfile) - if self.trajformat is None: - file_format = self.get_file_format(self.trajfile, self.trajformat) - self.trajformat = file_format + if "input" in filetype: + chkfile = self.incoordfile + chkformat = self.incoordformat + self.trajtype = "input" + elif "output" in filetype: + chkfile = self.outcoordfile + chkformat = self.outcoordformat + self.trajtype = "output" + else: + chkfile = self.trajfile + chkformat = self.trajformat + self.trajtype = "traj" + + numatoms=None + chkfilename = os.path.basename(chkfile) + if chkformat is None: + file_format = self.get_file_format(chkfile, chkformat) + chkformat = file_format + if "input" in filetype: + self.incoordformat = file_format + elif "output" in filetype: + self.outcoordformat = file_format + else: + self.trajformat = file_format else: - file_format = self.trajformat + file_format = chkformat usedefault=True - molfile_traj=None # Use the given order to check topology if self.interfaceorder: for interface in self.interfaceorder: if "pymolfile" in interface: - filetrajformat = re.sub('[.]', '', self.trajformat) + filetrajformat = re.sub('[.]', '', chkformat) trajhandler_check = None if self.topohandler is not None: - self.natoms = self.get_natoms_from_topo(topocode="pymolfile") + numatoms = self.get_natoms_from_topo(topocode="pymolfile") if(isinstance(self.topohandler, pym.OpenMolfile) or isinstance(self.topohandler, pym.OpenMolfile.topology)): - molfile_traj = pym.OpenMolfile(self.trajfile, file_format=filetrajformat, topology=self.topohandler, silent=False) - elif self.natoms is not None: - if self.natoms > 0: - molfile_traj = pym.OpenMolfile(self.trajfile, file_format=filetrajformat, natoms=self.natoms, silent=False) - if molfile_traj is not None: - if molfile_traj.trajectory is not None: - trajhandler_check = molfile_traj + molfile_traj = pym.OpenMolfile(chkfile, file_format=filetrajformat, topology=self.topohandler, silent=False) + elif numatoms is not None: + if numatoms > 0: + molfile_traj = pym.OpenMolfile(chkfile, file_format=filetrajformat, natoms=numatoms, silent=False) + if molfile_traj.trajectory is not None: + trajhandler_check = molfile_traj if trajhandler_check: trajhandler = None trajhandler = self.pymolfile_iread(trajhandler_check) @@ -1231,17 +1304,41 @@ class MDDataAccess(object): if self.interfacematch: if interface in self.topocode: usedefault=False - self.trajhandler = trajhandler - self.trajcode = "pymolfile" - self.trajplugin = molfile_traj + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "pymolfile" + self.incoordplugin = molfile_traj + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "pymolfile" + self.outcoordplugin = molfile_traj + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "pymolfile" + self.trajplugin = molfile_traj + self.natoms = numatoms break else: trajhandler = None else: usedefault=False - self.trajhandler = trajhandler - self.trajcode = "pymolfile" - self.trajplugin = molfile_traj + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "pymolfile" + self.incoordplugin = molfile_traj + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "pymolfile" + self.outcoordplugin = molfile_traj + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "pymolfile" + self.trajplugin = molfile_traj + self.natoms = numatoms break if "mdtraj" in interface: trajhandler_check = None @@ -1257,43 +1354,97 @@ class MDDataAccess(object): if self.interfacematch: if interface in self.topocode: usedefault=False - self.trajhandler = trajhandler - self.trajcode = "mdtraj" + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "mdtraj" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "mdtraj" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "mdtraj" + self.natoms = numatoms break else: trajhandler = None else: usedefault=False - self.trajhandler = trajhandler - self.trajcode = "mdtraj" + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "mdtraj" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "mdtraj" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "mdtraj" + self.natoms = numatoms break if "mdanalysis" in interface: mdanalysis_format = re.sub('[.]', '', file_format) mdanalysis_format = mdanalysis_format.upper() - print(mdanalysis_format) if self.topohandler is not None: # if the topology handler is a MDAnalysis universe, # we may try replacing the trajectory data in it. if isinstance(self.topohandler, mda_u.Universe): try: - self.topohandler.load_new(self.trajfile, format=mdanalysis_format) - self.trajhandler = self.mdanalysis_iread(mdanalysis_handler=self.topohandler.trajectory) - self.trajcode = "mdanalysis" + trajhandler = None + self.topohandler.load_new(chkfile, format=mdanalysis_format) + trajhandler = self.mdanalysis_iread(mdanalysis_handler=self.topohandler.trajectory) + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "mdanalysis" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "mdanalysis" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "mdanalysis" + self.natoms = numatoms usedefault=False break except (AttributeError, IOError, OSError, ValueError, TypeError): try: universe = mda_u.Universe(self.topohandler, self.trajfile, format=mdanalysis_format) + trajhandler = None if isinstance(universe, mda_u.Universe): self.topohandler = universe - self.trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe.trajectory) - self.trajcode = "mdanalysis" + trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe.trajectory) + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "mdanalysis" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "mdanalysis" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "mdanalysis" + self.natoms = numatoms usedefault=False break elif self.is_class_of_module(universe, mda_c.Trajectory, mda_coordinates_modules): - self.trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe.trajectory) - self.trajcode = "mdanalysis" + trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe.trajectory) + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "mdanalysis" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "mdanalysis" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "mdanalysis" + self.natoms = numatoms usedefault=False break #except (AttributeError, IOError, OSError, ValueError, TypeError): @@ -1312,31 +1463,79 @@ class MDDataAccess(object): if self.interfacematch: if interface in self.topocode: self.topohandler = universe - self.trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) - self.trajcode = "mdanalysis" + trajhandler = None + trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "mdanalysis" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "mdanalysis" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "mdanalysis" + self.natoms = numatoms usedefault=False break else: universe = None else: self.topohandler = universe - self.trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) - self.trajcode = "mdanalysis" + trajhandler = None + trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "mdanalysis" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "mdanalysis" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "mdanalysis" + self.natoms = numatoms usedefault=False break elif self.is_class_of_module(universe, mda_c.Trajectory, mda_coordinates_modules): if self.interfacematch: if interface in self.topocode: - self.trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) - self.trajcode = "mdanalysis" + trajhandler = None + trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "mdanalysis" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "mdanalysis" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "mdanalysis" + self.natoms = numatoms usedefault=False break else: universe = None else: - self.trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) - self.trajcode = "mdanalysis" + trajhandler = None + trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "mdanalysis" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "mdanalysis" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "mdanalysis" + self.natoms = numatoms usedefault=False break except (AttributeError, IOError, OSError, ValueError, TypeError): @@ -1352,15 +1551,35 @@ class MDDataAccess(object): if self.interfacematch: if interface in self.topocode: usedefault=False - self.trajhandler = trajhandler - self.trajcode = "ase" + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "ase" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "ase" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "ase" + self.natoms = numatoms break else: trajhandler = None else: usedefault=False - self.trajhandler = trajhandler - self.trajcode = "ase" + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "ase" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "ase" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "ase" + self.natoms = numatoms break else: trajhandler = None @@ -1369,43 +1588,70 @@ class MDDataAccess(object): if self.interfacematch: if interface in self.topocode: usedefault=False - self.trajhandler = trajhandler - self.trajcode = "ase" + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "ase" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "ase" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "ase" + self.natoms = numatoms break else: trajhandler = None else: usedefault=False - self.trajhandler = trajhandler - self.trajcode = "ase" + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "ase" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "ase" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "ase" + self.natoms = numatoms break - elif self.UserSuppliedInterface: - if isinstance(self.UserSuppliedInterface, MDDataAccess.UserSuppliedInterface): - if self.UserSuppliedInterface.name in interface: - trajhandler = None - trajhandler = self.UserSuppliedInterface.trajectory_support(self.trajfile, file_format=file_format) - if trajhandler: - if self.interfacematch: - if interface in self.topocode: + elif hasattr(self, "UserSuppliedInterface"): + if self.UserSuppliedInterface is not None: + if isinstance(self.UserSuppliedInterface, MDDataAccess.UserSuppliedInterface): + if self.UserSuppliedInterface.name in interface: + trajhandler = None + trajhandler = self.UserSuppliedInterface.trajectory_support(self.trajfile, file_format=file_format) + if trajhandler: + if self.interfacematch: + if interface in self.topocode: + usedefault=False + self.trajhandler = trajhandler + self.trajcode = self.UserSuppliedInterface.name + break + else: + trajhandler = None + else: usedefault=False self.trajhandler = trajhandler self.trajcode = self.UserSuppliedInterface.name break - else: - trajhandler = None - else: - usedefault=False - self.trajhandler = trajhandler - self.trajcode = self.UserSuppliedInterface.name - break if usedefault: - if self.trajhandler is None: - filetrajformat = re.sub('[.]', '', self.trajformat) + if "input" in filetype: + trajhandler = self.incoordhandler + elif "output" in filetype: + trajhandler = self.outcoordhandler + else: + trajhandler = self.trajhandler + if trajhandler is None: + filetrajformat = re.sub('[.]', '', chkformat) # First check whether pymolfile has support for the file type trajhandler_check = None if self.topohandler is not None: - self.natoms = self.get_natoms_from_topo(topocode=self.topocode) + numatoms = self.get_natoms_from_topo(topocode=self.topocode) try: pymHasTopo = getattr(pym.OpenMolfile, "topology") except AttributeError: @@ -1418,52 +1664,104 @@ class MDDataAccess(object): else: topoIsPymTopo = False if(isinstance(self.topohandler, pym.OpenMolfile) or topoIsPymTopo): - molfile_traj = pym.OpenMolfile(self.trajfile, file_format=filetrajformat, topology=self.topohandler, silent=False) - elif self.natoms is not None: - if self.natoms > 0: - molfile_traj = pym.OpenMolfile(self.trajfile, file_format=filetrajformat, natoms=self.natoms, silent=False) - if molfile_traj is not None: - if molfile_traj.trajectory is not None: - trajhandler_check = molfile_traj + molfile_traj = pym.OpenMolfile(chkfile, file_format=filetrajformat, topology=self.topohandler, silent=False) + elif numatoms is not None: + if numatoms > 0: + molfile_traj = pym.OpenMolfile(chkfile, file_format=filetrajformat, natoms=numatoms, silent=False) + if molfile_traj.trajectory is not None: + trajhandler_check = molfile_traj if trajhandler_check: trajhandler = None trajhandler = self.pymolfile_iread(trajhandler_check) if trajhandler: if self.interfacematch: if "pymolfile" in self.topocode: - self.trajhandler = trajhandler - self.trajcode = "pymolfile" - self.trajplugin = molfile_traj + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "pymolfile" + self.incoordplugin = molfile_traj + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "pymolfile" + self.outcoordplugin = molfile_traj + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "pymolfile" + self.trajplugin = molfile_traj + self.natoms = numatoms else: trajhandler = None else: - self.trajhandler = trajhandler - self.trajcode = "pymolfile" - self.trajplugin = molfile_traj + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "pymolfile" + self.incoordplugin = molfile_traj + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "pymolfile" + self.outcoordplugin = molfile_traj + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "pymolfile" + self.trajplugin = molfile_traj + self.natoms = numatoms else: - if self.natoms is not None: - if self.natoms > 0: - molfile_traj = pym.OpenMolfile(self.trajfile, file_format=filetrajformat, natoms=self.natoms, silent=False) - if molfile_traj is not None: - if molfile_traj.trajectory is not None: - trajhandler_check = molfile_traj + if numatoms is not None: + if numatoms > 0: + molfile_traj = pym.OpenMolfile(chkfile, file_format=filetrajformat, natoms=numatoms, silent=False) + if molfile_traj.trajectory is not None: + trajhandler_check = molfile_traj if trajhandler_check: trajhandler = None trajhandler = self.pymolfile_iread(trajhandler_check) if trajhandler: if self.interfacematch: if interface in self.topocode: - self.trajhandler = trajhandler - self.trajcode = "pymolfile" - self.trajplugin = molfile_traj + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "pymolfile" + self.incoordplugin = molfile_traj + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "pymolfile" + self.outcoordplugin = molfile_traj + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "pymolfile" + self.trajplugin = molfile_traj + self.natoms = numatoms else: trajhandler = None else: - self.trajhandler = trajhandler - self.trajcode = "pymolfile" - self.trajplugin = molfile_traj + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "pymolfile" + self.incoordplugin = molfile_traj + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "pymolfile" + self.outcoordplugin = molfile_traj + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "pymolfile" + self.trajplugin = molfile_traj + self.natoms = numatoms - if self.trajhandler is None: + if "input" in filetype: + trajhandler = self.incoordhandler + elif "output" in filetype: + trajhandler = self.outcoordhandler + else: + trajhandler = self.trajhandler + if trajhandler is None: # Second,check whether MDtraj has support for the file type # trajhandler_check = mdt_FormatRegistry.loaders[file_format] try: @@ -1486,97 +1784,205 @@ class MDDataAccess(object): trajhandler = None if self.interfacematch: if "mdtraj" in self.topocode: - self.trajhandler = self.mdtraj_iread(mdtraj_handler=trajhandler_check) - self.trajcode = "mdtraj" + trajhandler = self.mdtraj_iread(mdtraj_handler=trajhandler_check) + else: + trajhandler = self.mdtraj_iread(mdtraj_handler=trajhandler_check) + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "mdtraj" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "mdtraj" + self.outcoord_natoms = numatoms else: - self.trajhandler = self.mdtraj_iread(mdtraj_handler=trajhandler_check) + self.trajhandler = trajhandler self.trajcode = "mdtraj" + self.natoms = numatoms # If MDTraj does not have support for the format # or can not load the trajectory, use MDAnalysis or ASE. - if self.trajhandler is None: + if "input" in filetype: + trajhandler = self.incoordhandler + elif "output" in filetype: + trajhandler = self.outcoordhandler + else: + trajhandler = self.trajhandler + if trajhandler is None: mdanalysis_format = re.sub('[.]', '', file_format) if self.topohandler is not None: if isinstance(self.topohandler, mda_u.Universe): try: - self.topohandler.load_new(self.trajfile, format=mdanalysis_format) - self.trajhandler = self.mdanalysis_iread(mdanalysis_handler=self.topohandler) - self.trajcode = "mdanalysis" + self.topohandler.load_new(chkfile, format=mdanalysis_format) + trajhandler = self.mdanalysis_iread(mdanalysis_handler=self.topohandler) + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "mdanalysis" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "mdanalysis" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "mdanalysis" + self.natoms = numatoms except (AttributeError, IOError, OSError, ValueError, TypeError): try: universe = mda_u.Universe(self.topohandler, self.trajfile, format=mdanalysis_format) if isinstance(universe, mda_u.Universe): self.topohandler = universe - self.trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) - self.trajcode = "mdanalysis" + trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "mdanalysis" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "mdanalysis" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "mdanalysis" + self.natoms = numatoms elif self.is_class_of_module(self.universe, mda_c.Trajectory, mda_coordinates_modules): - self.trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) - self.trajcode = "mdanalysis" + trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "mdanalysis" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "mdanalysis" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "mdanalysis" + self.natoms = numatoms except (AttributeError, IOError, OSError, ValueError, TypeError): pass try: universe = mda_u.Universe(self.trajfile, format=mdanalysis_format) if isinstance(universe, mda_u.Universe): + trajhandler = None if self.interfacematch: if "mdanalysis" in self.topocode: self.topohandler = universe - self.trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) - self.trajcode = "mdanalysis" + trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "mdanalysis" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "mdanalysis" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "mdanalysis" + self.natoms = numatoms else: universe = None else: self.topohandler = universe - self.trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) - self.trajcode = "mdanalysis" + trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = "mdanalysis" + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = "mdanalysis" + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = "mdanalysis" + self.natoms = numatoms elif self.is_class_of_module(self.universe, mda_c.Trajectory, mda_coordinates_modules): + trajhandler = None + trajcode = None if self.interfacematch: if "mdanalysis" in self.topocode: - self.trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) - self.trajcode = "mdanalysis" + trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) + trajcode = "mdanalysis" else: universe = None else: - self.trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) - self.trajcode = "mdanalysis" + trajhandler = self.mdanalysis_iread(mdanalysis_handler=universe) + trajcode = "mdanalysis" + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = trajcode + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = trajcode + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = trajcode + self.natoms = numatoms except (AttributeError, IOError, OSError, ValueError, TypeError): pass - if self.trajhandler is None: + if "input" in filetype: + trajhandler = self.incoordhandler + elif "output" in filetype: + trajhandler = self.outcoordhandler + else: + trajhandler = self.trajhandler + if trajhandler is None: ase_support = None ase_support = self.get_ase_format_support(file_format) # May still have chance that ASE can recognize the # format with its filetype checking function if ase_support is None: - ase_support = ase_io.formats.filetype(self.trajfile) + ase_support = ase_io.formats.filetype(chkfile) trajhandler = None - trajhandler = self.ase_iread(self.trajfile, fileformat=ase_support) + trajhandler = self.ase_iread(chkfile, fileformat=ase_support) if trajhandler: + trajcode=None if self.interfacematch: if "ase" in self.topocode: - self.trajhandler = trajhandler - self.trajcode = "ase" + trajcode = "ase" else: trajhandler = None else: - self.trajhandler = trajhandler - self.trajcode = "ase" + trajcode = "ase" else: trajhandler = None - trajhandler = self.ase_iread(self.trajfile, fileformat=file_format) + trajhandler = self.ase_iread(chkfile, fileformat=file_format) + trajcode=None if trajhandler: if self.interfacematch: if "ase" in self.topocode: - self.trajhandler = trajhandler - self.trajcode = "ase" + trajcode = "ase" else: trajhandler = None else: - self.trajhandler = trajhandler - self.trajcode = "ase" - - if self.trajhandler is None: + trajcode = "ase" + if "input" in filetype: + self.incoordhandler = trajhandler + self.incoordcode = trajcode + self.incoord_natoms = numatoms + elif "output" in filetype: + self.outcoordhandler = trajhandler + self.outcoordcode = trajcode + self.outcoord_natoms = numatoms + else: + self.trajhandler = trajhandler + self.trajcode = trajcode + self.natoms = numatoms + + if "input" in filetype: + trajhandler = self.incoordhandler + elif "output" in filetype: + trajhandler = self.outcoordhandler + else: + trajhandler = self.trajhandler + if trajhandler is None: return False else: return True @@ -1596,6 +2002,46 @@ class MDDataAccess(object): finally: del iterator_object + def incoord_iread(self): + """Returns an iterator that goes through the given file one + configuration at a time. + """ + try: + iterator_object = iter(self.incoordhandler) + try: + while True: + try: + self.incoorditer = next(iterator_object) + return self.incoorditer + except ValueError: + pass + except StopIteration: + pass + finally: + del iterator_object + except TypeError: + pass + + def outcoord_iread(self): + """Returns an iterator that goes through the given file one + configuration at a time. + """ + try: + iterator_object = iter(self.outcoordhandler) + try: + while True: + try: + self.outcoorditer = next(iterator_object) + return self.outcoorditer + except ValueError: + pass + except StopIteration: + pass + finally: + del iterator_object + except TypeError: + pass + def topology_iread(self): """Returns an iterator that goes through the given trajectory file one configuration at a time. @@ -1795,7 +2241,7 @@ class MDDataAccess(object): # configurations from it. Should be checked at some point. handler = None try: - handler = ase_io.iread(file_name, index=":", format=file_format) + handler = ase_io.iread(filename, index=":", format=fileformat) except (AttributeError, IOError, OSError, ValueError, ImportError, ModuleNotFoundError): return @@ -1809,24 +2255,31 @@ class MDDataAccess(object): def pymolfile_iread(self, traj_handler=None): """Returns the iterator for pymolfile trajectory """ + if "input" in self.trajtype: + traj = self.inputcoords + elif "output" in self.trajtype: + traj = self.outputcoords + else: + traj = self.trajectory + if traj_handler.trajectory is not None: while True: positions = traj_handler.trajectory.iread() if positions is not None: - if self.trajectory is None: - self.trajectory = MDDataTrajectory() + if traj is None: + traj = MDDataTrajectory() if "has_velocities" in positions: if positions["has_velocities"]>0: - self.trajectory.velocities = positions["velocities"] + traj.velocities = positions["velocities"] if("A" in positions and "B" in positions and "C" in positions): - self.trajectory.unitcell_lengths = np.asarray( + traj.unitcell_lengths = np.asarray( [positions["A"],positions["B"],positions["C"]]) if("alpha" in positions and "beta" in positions and "gamma" in positions): - self.trajectory.unitcell_angles = np.asarray( + traj.unitcell_angles = np.asarray( [positions["alpha"],positions["beta"],positions["gamma"]]) if("A" in positions and "B" in positions and "C" in positions and "alpha" in positions and @@ -1854,13 +2307,19 @@ class MDDataAccess(object): lz = math.sqrt(cxz) else: lz = c if c>0 else 1.0 - self.trajectory.unitcell_vectors = np.zeros((3,3)) - self.trajectory.unitcell_vectors[0][0] = lx if lx != 0 else 1.0 - self.trajectory.unitcell_vectors[0][1] = xy - self.trajectory.unitcell_vectors[0][2] = xz - self.trajectory.unitcell_vectors[1][1] = ly if ly != 0 else 1.0 - self.trajectory.unitcell_vectors[1][2] = yz - self.trajectory.unitcell_vectors[2][2] = lz + traj.unitcell_vectors = np.zeros((3,3)) + traj.unitcell_vectors[0][0] = lx if lx != 0 else 1.0 + traj.unitcell_vectors[0][1] = xy + traj.unitcell_vectors[0][2] = xz + traj.unitcell_vectors[1][1] = ly if ly != 0 else 1.0 + traj.unitcell_vectors[1][2] = yz + traj.unitcell_vectors[2][2] = lz + if "input" in self.trajtype: + self.inputcoords = traj + elif "output" in self.trajtype: + self.outputcoords = traj + else: + self.trajectory = traj yield positions["coords"] else: return None @@ -1911,61 +2370,80 @@ class MDDataAccess(object): # Must use the low level MDTraj API to open files without topology. # mdtraj_supported_format = FormatRegistry.loaders[self.fileformat] + if "input" in self.trajtype: + traj = self.inputcoords + trajformat = self.incoordformat + trajfile = self.incoordfile + chunk = 1 + elif "output" in self.trajtype: + traj = self.outputcoords + trajformat = self.outcoordformat + trajfile = self.outcoordfile + chunk = 1 + else: + traj = self.trajectory + trajformat = self.trajformat + trajfile = self.trajfile + chunk = self.trajchunk + if mdtraj_handler is not None: - if self.trajchunk == 0: + if chunk == 0: try: - loader = mdt_FormatRegistry.loaders[self.trajformat] + loader = mdt_FormatRegistry.loaders[trajformat] except KeyError: return # If chunk was 0 then we want to avoid filetype-specific code # in case of undefined behavior in various file parsers. # TODO: this will first apply stride, then skip! - if self.trajformat not in mdt_TOPOLOGY_EXTS: + if trajformat not in mdt_TOPOLOGY_EXTS: topkwargs = kwargs.copy() topkwargs['top'] = self.topohandler # standard_names is a valid keyword argument only for files containing topologies topkwargs.pop('standard_names', None) - positions = loader(self.trajfile, **topkwargs) + positions = loader(trajfile, **topkwargs) else: - positions = loader(self.trajfile) + positions = loader(trajfile) for pos in positions: yield pos - elif self.trajformat in ('.pdb', '.pdb.gz'): + elif trajformat in ('.pdb', '.pdb.gz'): # the PDBTrajectoryFile class doesn't follow the standard API. Fixing it here try: - loader = mdt_FormatRegistry.loaders[self.trajformat] + loader = mdt_FormatRegistry.loaders[trajformat] except KeyError: return t = loader(filename) - for i in range(0, len(t), self.trajchunk): - positions = t[i:i+self.trajchunk] + for i in range(0, len(t), chunk): + positions = t[i:i+chunk] for pos in positions: yield pos else: if self.topohandler is None: n_atoms_set=None - if self.trajformat in ('.crd', '.mdcrd'): + if trajformat in ('.crd', '.mdcrd'): return else: - if "mdtraj" in self.topocode: - n_atoms_set=self.topohandler.n_atoms - elif "mdanalysis" in self.topocode: - n_atoms_set=len(self.topohandler.atoms) - elif "ase" in self.topocode: - if isinstance(self.topohandler, ase.Atoms): - n_atoms_set=len(self.topohandler.get_positions()) - else: - n_atoms_set=None + #if "pymolfile" in self.topocode: + # n_atoms_set=self.topohandler.n_atoms + #elif "mdtraj" in self.topocode: + # n_atoms_set=self.topohandler.n_atoms + #elif "mdanalysis" in self.topocode: + # n_atoms_set=len(self.topohandler.atoms) + #elif "ase" in self.topocode: + # if isinstance(self.topohandler, ase.Atoms): + # n_atoms_set=len(self.topohandler.get_positions()) + # else: + n_atoms_set = None + n_atoms_set = self.get_natoms_from_topo(self.topocode) try: with (lambda x: mdtraj_handler(x, n_atoms=n_atoms_set) - if self.trajformat in ('.crd', '.mdcrd') - else mdtraj_handler(self.trajfile, mode="r"))(self.trajfile) as f: + if trajformat in ('.crd', '.mdcrd') + else mdtraj_handler(trajfile, mode="r"))(trajfile) as f: empty = False while not empty: - if self.trajformat not in mdt_TOPOLOGY_EXTS: - data = f.read_as_traj(self.topohandler, n_frames=self.trajchunk) + if trajformat not in mdt_TOPOLOGY_EXTS: + data = f.read_as_traj(self.topohandler, n_frames=chunk) else: - data = f.read_as_traj(n_frames=self.trajchunk) + data = f.read_as_traj(n_frames=chunk) if isinstance(data, tuple): positions = data[0] else: @@ -2014,6 +2492,36 @@ class MDDataAccess(object): if self.atompositions is not None: return fileFormat + def incoordFileHandler(self, filename, fileformatlist=None, interfacelist=None): + self.init_incoord() + self.set_incoord(filename) + self.interfaceorder = interfacelist + incoord_loaded = self.load_incoord() + self.inputpositions = self.incoord_iread() + if self.inputpositions is None: + for fileformat in fileformatlist: + self.init_incoord() + self.set_incoord(filename, fileformat) + incoord_loaded = self.load_incoord() + self.inputpositions = self.incoord_iread() + if self.inputpositions is not None: + return fileFormat + + def outcoordFileHandler(self, filename, fileformatlist=None, interfacelist=None): + self.init_outcoord() + self.set_outcoord(filename) + self.interfaceorder = interfacelist + outcoord_loaded = self.load_outcoord() + self.outputpositions = self.outcoord_iread() + if self.outputpositions is None: + for fileformat in fileformatlist: + self.init_outcoord() + self.set_outcoord(filename, fileformat) + outcoord_loaded = self.load_outcoord() + self.outputpositions = self.outcoord_iread() + if self.outputpositions is not None: + return fileFormat + def thermoFileHandler(self, filename, fileformatlist=None, interfacelist=None): self.init_thermo() self.set_thermo(filename) @@ -2033,76 +2541,128 @@ class MDDataAccess(object): def initializeFileHandlers(self, parser_ui): # Files will be loaded using their extensions initially. # If this fails, the fileFormat lists will be used in loading process. - self.access_ui = parser_ui topoformat = None trajformat = None - for fileItem in self.access_ui.fileDict: + self.atompositions = None + self.topohandler = None + self.thermohandler = None + self.forcefieldhandler = None + for fileItem in parser_ui.fileDict: fileformatlist = None interfacelist = None - if (self.access_ui.fileDict[fileItem].fileSupplied and - self.access_ui.fileDict[fileItem].activeInfo): - filename = self.access_ui.fileDict[fileItem].fileName - fileformatlist = self.access_ui.fileDict[fileItem].fileFormat - interfacelist = self.access_ui.fileDict[fileItem].fileInterface - # First, check topology file - if 'topology' in self.access_ui.fileDict[fileItem].infoPurpose: + if (parser_ui.fileDict[fileItem].fileSupplied and + parser_ui.fileDict[fileItem].activeInfo): + filename = parser_ui.fileDict[fileItem].fileName + fileformatlist = parser_ui.fileDict[fileItem].fileFormat + interfacelist = parser_ui.fileDict[fileItem].fileInterface + if 'topology' in parser_ui.fileDict[fileItem].infoPurpose: topoformat = self.topologyFileHandler(filename, fileformatlist, interfacelist) - # Second, check trajectory file - if 'trajectory' in self.access_ui.fileDict[fileItem].infoPurpose: + if 'inputcoordinates' in parser_ui.fileDict[fileItem].infoPurpose: + incoordformat = self.incoordFileHandler(filename, fileformatlist, interfacelist) + if 'outputcoordinates' in parser_ui.fileDict[fileItem].infoPurpose: + outcoordformat = self.outcoordFileHandler(filename, fileformatlist, interfacelist) + if 'trajectory' in parser_ui.fileDict[fileItem].infoPurpose: trajformat = self.trajectoryFileHandler(filename, fileformatlist, interfacelist) - # Third, check thermostat file - if 'thermostats' in self.access_ui.fileDict[fileItem].infoPurpose: + if 'thermostats' in parser_ui.fileDict[fileItem].infoPurpose: thermoformat = self.thermoFileHandler(filename, fileformatlist, interfacelist) - # Last, check force filed file - if 'forcefield' in self.access_ui.fileDict[fileItem].infoPurpose: + if 'forcefield' in parser_ui.fileDict[fileItem].infoPurpose: forcefieldformat = self.forcefieldFileHandler(filename, fileformatlist, interfacelist) - if self.atompositions is not None: - self.set_TrajectoryData() if self.topohandler is not None: - self.set_TopologyData() + parser_ui.topology = self.set_TopologyData() + if self.inputpositions is not None: + parser_ui.inputcoords = self.set_InputData() + if self.outputpositions is not None: + parser_ui.outputcoords = self.set_OutputData() + if self.atompositions is not None: + parser_ui.trajectory = self.set_TrajectoryData() if self.thermohandler is not None: - self.set_ThermoData() + parser_ui.thermostats = self.set_ThermoData() + parser_ui.thermoDict = parser_ui.thermostats.thermoDict if self.forcefieldhandler is not None: - self.set_ForceFieldData() + #self.set_ForceFieldData() + pass def set_TopologyData(self): self.topology = MDDataTopology() self.topology.filename = self.topofile self.topology.topoDict = {} - if ("pymolfile" in self.topocode and - "pymolfile" in self.trajcode): - self.topology = pymConvertTopoDict(self.topology, self.trajhandler.Topology) if "pymolfile" in self.topocode: self.topology = pymConvertTopoDict(self.topology, self.topohandler) - if ("mdtraj" in self.topocode and - "mdtraj" in self.trajcode): - self.topology = mdtConvertTopoDict(self.topology, self.trajhandler.get_topology()) + if(self.trajcode and not self.topology): + if ("pymolfile" in self.topocode and + "pymolfile" in self.trajcode): + self.topology = pymConvertTopoDict(self.topology, self.trajhandler.Topology) if "mdtraj" in self.topocode: self.topology = mdtConvertTopoDict(self.topology, self.topohandler) + if(self.trajcode and not self.topology): + if ("mdtraj" in self.topocode and + "mdtraj" in self.trajcode): + self.topology = mdtConvertTopoDict(self.topology, self.trajhandler.get_topology()) if "mdanalysis" in self.topocode: self.topology = mdaConvertTopoDict(self.topology, self.topohandler) - self.access_ui.topology = self.topology + return self.topology def trajgen(self): - if self.trajectory.frame_no == 0: + if self.trajectory.frame_no == -1: self.trajectory.natoms = self.natoms + self.trajectory.frame_no += 1 return self.atompositions else: self.atompositions = self.iread() self.trajectory.natoms = self.natoms - if self.atompositions: + if self.atompositions is not None: self.trajectory.frame_no += 1 return self.atompositions + def incoordgen(self): + if self.inputcoords.frame_no == -1: + self.inputcoords.natoms = self.incoord_natoms + return self.inputpositions + else: + self.inputpositions = self.incoord_iread() + self.inputcoords.natoms = self.incoord_natoms + if self.inputpositions is not None: + self.inputcoords.frame_no += 1 + return self.inputpositions + + def outcoordgen(self): + if self.outputcoords.frame_no == -1: + self.outputcoords.natoms = self.outcoord_natoms + return self.outputpositions + else: + self.outputpositions = self.outcoord_iread() + self.outputcoords.natoms = self.outcoord_natoms + if self.outputpositions is not None: + self.outputcoords.frame_no += 1 + return self.outputpositions + def set_TrajectoryData(self): if self.trajectory is None: self.trajectory = MDDataTrajectory() self.trajectory.filename = self.trajfile self.trajectory.nsteps = self.set_nsteps() - self.trajectory.frame_no = 0 + self.trajectory.frame_no = -1 self.trajectory.trajDict = None self.trajectory.positions = self.trajgen - self.access_ui.trajectory = self.trajectory + return self.trajectory + + def set_InputData(self): + if self.inputcoords is None: + self.inputcoords = MDDataTrajectory() + self.inputcoords.frame_no = -1 + self.inputcoords.nsteps = 1 + self.inputcoords.filename = self.incoordfile + self.inputcoords.positions = self.incoordgen + return self.inputcoords + + def set_OutputData(self): + if self.outputcoords is None: + self.outputcoords = MDDataTrajectory() + self.outputcoords.frame_no = -1 + self.outputcoords.nsteps = 1 + self.outputcoords.filename = self.outcoordfile + self.outputcoords.positions = self.outcoordgen + return self.outputcoords def set_ThermoData(self): try: @@ -2185,8 +2745,7 @@ class MDDataAccess(object): yield step self.thermostats.iread = thermogen(self) - self.access_ui.thermostats = self.thermostats - self.access_ui.thermoDict = self.thermostats.thermoDict + return self.thermostats def get_natoms_from_topo(self, topocode=None): """Read the first configuration of the coordinate file to extract the @@ -2204,7 +2763,7 @@ class MDDataAccess(object): elif "mdanalysis" in topocode: n_atoms = len(self.topohandler.atoms) elif "ase" in topocode: - n_atoms = len(self.trajiter) + n_atoms = len(self.topohandler.get_positions()) return n_atoms def get_natoms_from_traj(self, trajcode=None): diff --git a/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py b/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py index 8fab71d6a86eb978555ca62a96a5446760e2ca99..a91655c330de0b837b52295ce85c1dd245da7b73 100644 --- a/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py +++ b/common/python/nomadcore/metainfo_storage/MetaInfoStorage.py @@ -284,7 +284,7 @@ class Container(object): if "lookupdict" in item: needFetchVal = False if "test" in firstdepend: - updateValue, localdict = self.checkTestsDicts(item, localdict) + storeValue, updateValue, localdict = self.checkTestsDicts(item, localdict) elif "assign" in firstdepend: updateValue = firstdepend["assign"] elif "value" in firstdepend: @@ -304,7 +304,7 @@ class Container(object): else: needFetchVal = False if "test" in firstdepend: - updateValue, localdict = self.checkTestsAttr(item, localdict) + storeValue, updateValue, localdict = self.checkTestsAttr(item, localdict) elif "assign" in firstdepend: updateValue = firstdepend["assign"] elif "value" in firstdepend: @@ -373,6 +373,8 @@ class Container(object): newUpdateValue = updateValue elif is_number(updateValue): newUpdateValue = eval(valtype+"("+str(updateValue)+")") + else: + newUpdateValue = updateValue else: # I hope you know what you are doing try: @@ -450,8 +452,9 @@ class Container(object): if eval(str(checkval) + deptest[1]): depmeet += 1 if depmeet == len(deptests): + storeValue = False if 'assign' in depdict: - return depdict['assign'], localdict + return storeValue, depdict['assign'], localdict elif 'value' in depdict: if depdict['value'] in localdict: checkval = localdict[depdict['value']] @@ -459,8 +462,18 @@ class Container(object): accessName, checkval = self.findNameInLookupDict(depdict['value'], item.lookupdict) localdict.update({depdict['value'] : checkval}) - return checkval, localdict - return None, localdict + return storeValue, checkval, localdict + elif "store" in depdict: + itemdepval = depdict["store"] + storeValue = True + if itemdepval in localdict: + checkval = localdict[itemdepval] + else: + accessName, checkval = self.findNameInLookupDict(itemdepval, item.lookupdict) + localdict.update({itemdepval : checkval}) + print("PRINTING: metainfo, store:",storeValue, checkval) + return storeValue, checkval, localdict + return False, None, localdict def checkTestsAttr(self, item, localdict): for depdict in item["depends"]: diff --git a/common/python/nomadcore/smart_parser/SmartParserCommon.py b/common/python/nomadcore/smart_parser/SmartParserCommon.py index 38b191a5bbd6fd450713a22ef39296db95be48e1..ba30130218a3d754cd41a92673e04ecb1a33c47d 100644 --- a/common/python/nomadcore/smart_parser/SmartParserCommon.py +++ b/common/python/nomadcore/smart_parser/SmartParserCommon.py @@ -5,7 +5,7 @@ from nomadcore.caching_backend import CachingLevel from nomadcore.simple_parser import mainFunction from nomadcore.simple_parser import SimpleMatcher as SM from nomadcore.metainfo_storage.MetaInfoStorage import COMMON_META_INFO_PATH, PUBLIC_META_INFO_PATH -import nomadcore.metainfo_storage.MetaInfoStorage as mStore +from nomadcore.metainfo_storage import MetaInfoStorage as mStore from nomadcore.metainfo_storage.MetaInfoStorage import strcleaner, strisinstance, literal_eval from nomadcore.smart_parser.SmartParserDictionary import getDict_MetaStrInDict, getList_MetaStrInDict, get_unitDict from contextlib import contextmanager @@ -287,6 +287,7 @@ class ParserBase(object): lastLine = parser.fIn.fInLine else: lastLine = entryline + #print("PRINTING: dictionary_parser:",lastLine) parserDict = { "firstLine" : 0, "storedLines" : '', @@ -299,10 +300,17 @@ class ParserBase(object): dicttype = True readdict = True keyMapper = None + updatefunc = None + updateattrs = None + updateconvert = None + cntrlattrs = None + preprocess = None + postprocess = None parsercntrlattr = None parsercntrlin = None lookupdict = None lookupvals = None + checkdict = {} if "dictionary" in parserOptions.keys(): if isinstance(parserOptions["dictionary"], dict): dictionary = parserOptions["dictionary"] @@ -317,6 +325,29 @@ class ParserBase(object): readdict = False if "keyMapper" in parserOptions.keys(): keyMapper = parserOptions["keyMapper"] if isinstance(parserOptions["keyMapper"], dict) else None + if "updatefunc" in parserOptions.keys(): + if isinstance(parserOptions["updatefunc"], str): + if 'max' in parserOptions["updatefunc"]: + updatefunc = np.amax + updateconvert = float + if 'min' in parserOptions["updatefunc"]: + updatefunc = np.amin + updateconvert = float + else: + updatefunc = parserOptions["updatefunc"] + if "updateattrs" in parserOptions: + updateattrs = parserOptions["updateattrs"] + if "controlattrs" in parserOptions: + cntrlattrs = parserOptions["controlattrs"] + if cntrlattrs: + for cntrla in cntrlattrs: + if hasattr(self,cntrla): + checkdict.update({cntrla:getattr(self,cntrla)}) + #print("PRINTING 0 checkdict:",checkdict) + if "preprocess" in parserOptions.keys(): + preprocess = parserOptions["preprocess"] + if "postprocess" in parserOptions.keys(): + postprocess = parserOptions["postprocess"] if "lookupdict" in parserOptions: lookupdict = getattr(self,parserOptions["lookupdict"]) if "parsercntrlattr" in parserOptions: @@ -334,7 +365,6 @@ class ParserBase(object): mNameDict = getattr(self, matchNameDict) - if dictionary is not None: if dicttype: dictionaryStr = getDict_MetaStrInDict(dictionary) @@ -368,24 +398,92 @@ class ParserBase(object): v = val[elementid] else: v = val + if preprocess is not None: + v = preprocess(cName,v) + if updatefunc is not None: + if updateconvert is None: + v=updatefunc(v) + else: + vallist=[] + if mNameDict[key].value is not None: + vallist.append(updateconvert(mNameDict[key].value)) + if v is not None: + vallist.append(updateconvert(v)) + if not vallist: + pass + else: + v=updatefunc(np.asarray(vallist)) + if postprocess is not None: + v = postprocess(cName,v) if key in list(parser.lastMatch.keys()): parser.lastMatch[key]=v else: mNameDict[key].value=v mNameDict[key].activeInfo=True + if isinstance(updateattrs,(tuple,list)): + for uattr in updateattrs: + if(uattr == cNewName and hasattr(self,cNewName)): + setattr(self,uattr,v) + #print("PRINTING: self update2:",uattr,v) else: val = mNameDict[key].value + #print("PRINTING: dict key val:",key,val) if isinstance(val, (list,tuple,np.ndarray)): v = val[elementid] else: v = val + if preprocess is not None: + #v = preprocess(v) + v = preprocess(cName,v) + if updatefunc is not None: + vallist=[] + if dicttype: + if dictionary[dictionaryStr[cNewName]].value is not None: + vallist.append(updateconvert( + dictionary[dictionaryStr[cNewName]].value + )) + else: + if dictionary[cNewName] is not None: + vallist.append(updateconvert( + dictionary[cNewName] + )) + if v is not None: + vallist.append(updateconvert(v)) + if not vallist: + pass + else: + #print("PRINTING: vallist:",vallist) + v=updatefunc(np.asarray(vallist)) + if postprocess is not None: + v = postprocess(cName,v) if dicttype: dictionary[dictionaryStr[cNewName]].value=v + #print("PRINTING: update key val:",dictionaryStr[cNewName],v) else: dictionary[cNewName]=v + #print("PRINTING: update key val:",cNewName,v) anythingmatched = True + if readdict: + pass + else: + setattr(self,parserOptions["dictionary"],dictionary) + if isinstance(updateattrs,(tuple,list)): + for dk,dv in dictionary.items(): + for uattr in updateattrs: + if(uattr == dk and hasattr(self,uattr)): + setattr(self,uattr,dv) + #print("PRINTING: self uupdate2:",uattr,dv) + + cntrlcheck = False + if cntrlattrs: + for cntrla in cntrlattrs: + if hasattr(self,cntrla): + #print("PRINTING getattr checkdict:",cntrla,getattr(self,cntrla),checkdict[cntrla]) + if getattr(self,cntrla)==checkdict[cntrla]: + cntrlcheck = True + break - if anythingmatched: + if cntrlcheck: # We have matched keywords to update at sections # if the active sections are defined cntrlDict = None @@ -425,13 +523,12 @@ class ParserBase(object): addname = addname + str(parserDict["parserID"]) secDict.update({addname:True}) setattr(self,parserOptions["controlsave"],secDict) - setattr(self,parserOptions["lookupdict"],cntrlDict) - if readdict is False: - setattr(self,parserOptions["dictionary"],dictionary) + setattr(self,parserOptions["controldict"],cntrlDict) parserSuccess = True if(mNameDict is not None and updateMatchDict): setattr(self, matchNameDict, mNameDict) + #print("PRINTING: dict Md step:",self.MDcurrentstep) return lastLine, parserSuccess def readline_control_parser(self, parser, stopOnMatchStr, quitOnMatchStr, metaNameStart, matchNameList, @@ -452,8 +549,8 @@ class ParserBase(object): controlattr = None controlnextattr = None controllast = None - controlskip = None - controlin = None + controlskip = [] + controlin = [] controlwait = None controlcounter = 0 controldict = None @@ -482,8 +579,10 @@ class ParserBase(object): controlin = parserOptions["controlin"] if("controlskip" in parserOptions and controlin is not None): - controlskip = [controlin[cs] for cs in parserOptions[ - "controlskip"] if cs < len(controlin)] + controlskip = [ + controlin[cs] for cs in parserOptions[ + "controlskip"] if cs < len(controlin) + ] if "controlwait" in parserOptions: if lookupdict: if parserOptions["controlwait"] in lookupdict: @@ -523,16 +622,29 @@ class ParserBase(object): if waitatlineRe.findall(lastLine): continuenewline = False else: - if controlattr in controlwait: + if controlwait is not None: + if controlattr in controlwait: + continuenewline = True + else: continuenewline = True else: - if controlattr in controlwait: - continuenewline = True + if controlwait is not None: + if controlattr in controlwait: + continuenewline = True + #else: + # continuenewline = True if continuenewline is False: - if controlattr in controlwait: + if controlwait is not None: + if controlattr in controlwait: + continuenewline = True + else: continuenewline = True + + #print("PRINTING nextlogsteps, MDnextstep, targetstep:",controlwait,controlnextattr,controlcounter) + #print("PRINTING continuenewline:",continuenewline) if continuenewline: + #print("PRINTING: readline_control lastLine:",lastLine) parserSuccess=True lastLine = parser.fIn.readline() else: @@ -588,18 +700,23 @@ class ParserBase(object): "numStoredLines" : 0, "parserID" : parserID, } + if parsername is None: + parsername = "section_control_parser_"+str(parserID) + sectionopenname = parsername + "_open" + sectionclosename = parsername + "_close" + #print("PRINTING: section_control "+parsername) parserSuccess = False backend = parser.backend sectionname = None sectionopen = False sectionopenattr = None sectionopenin = None + sectionopenrecord = None sectionclose = False sectioncloseattr = None sectionclosein = None + sectioncloserecord = None lookupdict = None - openlookupvals = None - closelookupvals = None activatesection = None if "sectionname" in parserOptions: sectionname = parserOptions["sectionname"] @@ -611,28 +728,48 @@ class ParserBase(object): sectionopenattr = getattr(self,parserOptions["sectionopenattr"]) if "sectionopenin" in parserOptions: if lookupdict: - sectionopenin = parserOptions["sectionopenin"] - if sectionopenin in lookupdict: - openlookupvals = lookupdict[sectionopenin] + if parserOptions["sectionopenin"] in lookupdict: + sectionopenin = lookupdict[parserOptions["sectionopenin"]] else: if isinstance(parserOptions["sectionopenin"], str): sectionopenin = getattr(self,parserOptions["sectionopenin"]) else: sectionopenin = parserOptions["sectionopenin"] + if sectionopenin is not None: + if lookupdict: + if sectionopenname in lookupdict: + sectionopenrecord = lookupdict[sectionopenname] + else: + sectionopenrecord = getattr(self,sectionopenname) + if sectionopenrecord is None: + #print("PRINTING: section open record setup") + sectionopenrecord = dict() + for item in sectionopenin: + sectionopenrecord.update({str(item):False}) if "sectionclose" in parserOptions: sectionclose = parserOptions["sectionclose"] if "sectioncloseattr" in parserOptions: sectioncloseattr = getattr(self,parserOptions["sectioncloseattr"]) if "sectionclosein" in parserOptions: if lookupdict: - sectionclosein = parserOptions["sectionclosein"] - if sectionclosein in lookupdict: - closelookupvals = lookupdict[sectionclosein] + if parserOptions["sectionclosein"] in lookupdict: + sectionclosein = lookupdict[parserOptions["sectionclosein"]] else: if isinstance(parserOptions["sectionclosein"], str): sectionclosein = getattr(self,parserOptions["sectionclosein"]) else: sectionclosein = parserOptions["sectionclosein"] + if sectionclosein is not None: + if lookupdict: + if sectionclosename in lookupdict: + sectioncloserecord = lookupdict[sectionclosename] + else: + sectioncloserecord = getattr(self,sectionclosename) + if sectioncloserecord is None: + #print("PRINTING: section close record setup") + sectioncloserecord = dict() + for item in sectionclosein: + sectioncloserecord.update({str(item):False}) if "activatesection" in parserOptions: if lookupdict: if parserOptions["activatesection"] in lookupdict: @@ -640,25 +777,42 @@ class ParserBase(object): if sectionname: activate = False + #print("PRINTING: sectionname activatesection:",sectionname,activatesection) if activatesection is not None: if sectionname in activatesection: activate = activatesection[sectionname] if activate: + #print("PRINTING: activate:",activate) + #print("PRINTING: open record dict:",sectionopenrecord) + #print("PRINTING: close record dict:",sectioncloserecord) if(sectionopen is not None and sectionopenattr is not None and - openlookupvals is not None): - if sectionopenattr in openlookupvals: - parserSuccess = True - gIndex = backend.openSection(sectionname) - self.secGIndexDict.update({sectionname : [1, gIndex]}) + sectionopenrecord is not None): + #print("PRINTING: section open:",str(sectionopenattr)) + if str(sectionopenattr) in sectionopenrecord: + if sectionopenrecord[str(sectionopenattr)] is False: + parserSuccess = True + gIndex = backend.openSection(sectionname) + self.secGIndexDict.update({sectionname : [1, gIndex]}) + sectionopenrecord[str(sectionopenattr)] = True if(sectionclose is not None and sectioncloseattr is not None and - closelookupvals is not None): - if sectioncloseattr in closelookupvals: - if self.secGIndexDict[sectionname][0]: - parserSuccess = True - backend.closeSection(sectionname, self.secGIndexDict[sectionname][1]) - self.secGIndexDict.update({sectionname : [0, gIndex]}) + sectioncloserecord is not None): + #print("PRINTING: section close:",str(sectioncloseattr)) + if str(sectioncloseattr) in sectioncloserecord: + if sectioncloserecord[str(sectioncloseattr)] is False: + if self.secGIndexDict[sectionname][0]: + parserSuccess = True + backend.closeSection(sectionname, self.secGIndexDict[sectionname][1]) + self.secGIndexDict.update({sectionname : [0, gIndex]}) + sectioncloserecord[str(sectioncloseattr)] = True + + if lookupdict: + lookupdict.update({sectionopenname:sectionopenrecord}) + lookupdict.update({sectionclosename:sectioncloserecord}) + else: + setattr(self,sectionopenname,sectionopenrecord) + setattr(self,sectionclosename,sectioncloserecord) if parserSuccess: if activatesection is not None: @@ -666,38 +820,7 @@ class ParserBase(object): activatesection[sectionname] = False lookupdict.update({parserOptions["activatesection"] : activatesection}) setattr(self,parserOptions["lookupdict"],lookupdict) - - -# stopOnMatch = False -# if(stopOnMatchStr is None or "AlwaysStop" in stopOnMatchStr): -# stopOnMatch = True -# else: -# stopOnMatchRe = re.compile(stopOnMatchStr) -# if stopOnMatchRe.findall(lastLine): -# stopOnMatch = True -# if self.firstLine==0: -# if stopOnFirstLine: -# stopOnMatch = True -# else: -# stopOnMatch = False -# if(quitOnMatchStr is None or "AlwaysStop" in quitOnMatchStr): -# stopOnMatch = True -# else: -# quitOnMatchRe = re.compile(quitOnMatchStr) -# if quitOnMatchRe.findall(lastLine): -# stopOnMatch = True -# if stopOnMatch is False: -# while True: -# lastLine = self.peekline(parser) -# parserDict.update({"firstLine" : parserDict["firstLine"] + 1}) -# if not lastLine: -# break -# else: -# if stopOnMatchRe.findall(lastLine): -# stopOnMatch = True -# break -# else: -# lastLine = parser.fIn.readline() + #print("PRINTING: Success sectionname activatesection:",sectionname,activatesection) return lastLine, parserSuccess @@ -713,6 +836,7 @@ class ParserBase(object): headerList = None if header is not True, values will be assigned to titles in the headerList in order as seen in line """ + #print("PRINTING: table_store lastLine:",lastLine) tableStartRe = None if "tablestartsat" in parserOptions: tableStartRe = re.compile(parserOptions["tablestartsat"]) @@ -740,24 +864,38 @@ class ParserBase(object): # the table properties header = False wrap = False - storeLines = 1 + storeLines = 0 lineFilter = None headerList = None + headerListCheck = None + headersave = None skipLines = 0 if "header" in parserOptions.keys(): header = parserOptions["header"] if (parserOptions["header"] is not None or parserOptions["header"] is not False) else False + if "headersave" in parserOptions.keys(): + if parserOptions["headersave"]: + headersave = getattr(self, parserOptions["headersave"]) + else: + headersave = None if "headerList" in parserOptions.keys(): - headerList = parserOptions["headerList"] if parserOptions["headerList"] else None + if parserOptions["headerList"]: + headerListCheck = getattr(self, parserOptions["headerList"]) + if headerListCheck is not None: + headerList = headerListCheck + else: + headerList = parserOptions["headerList"] + else: + headerList = None if "wrap" in parserOptions.keys(): wrap = parserOptions["wrap"] if parserOptions["wrap"] else False if "tablelines" in parserOptions.keys(): - storeLines = parserOptions["tablelines"] if parserOptions["tablelines"]>0 else 1 + storeLines = parserOptions["tablelines"] if parserOptions["tablelines"]>0 else 0 if "lineFilter" in parserOptions.keys(): lineFilter = parserOptions["lineFilter"] if isinstance(parserOptions["lineFilter"], dict) else None if "skiplines" in parserOptions.keys(): skipLines = parserOptions["skiplines"] - if (skipLines>0 and skipLines>=parserDict["firstLine"]): + if (skipLines>0 and skipLines>parserDict["firstLine"]): linenum = -1 else: linenum = parserDict["numStoredLines"] @@ -765,6 +903,10 @@ class ParserBase(object): parserDict.update({"storedLines" : parserDict["storedLines"] + conv_str(lastLine)}) linenum += 1 parserDict.update({"numStoredLines" : linenum}) + if storeLines == 0: + parserDict.update({"storedLines" : conv_str(lastLine)}) + linenum = 0 + parserDict.update({"numStoredLines" : linenum}) mNameDict = getattr(self, matchNameDict) @@ -795,7 +937,12 @@ class ParserBase(object): vlist.extend(line.split()) else: if headerList is not None: - hlist.extend(headerList) + if isinstance(headerList, (tuple, list)): + hlist.extend(headerList) + elif isinstance(headerList, dict): + hlist.extend(headerList.keys()) + elif isinstance(headerList, str): + hlist.extend(headerList.split()) vlist.extend(line.split()) lcount += 1 # Reconvert to original @@ -806,18 +953,27 @@ class ParserBase(object): hitem = hitem.replace(v, k) tmplist.append(hitem) hlist = tmplist + #print("PRINTING table_parser lcount:hlist:vlist:",lcount,hlist,vlist) anythingmatched = False # Search for dictionary keys - for cName, key in getDict_MetaStrInDict(mNameDict).items(): - if cName in hlist: - v=vlist[hlist.index(cName)] - if key in list(parser.lastMatch.keys()): - parser.lastMatch[key]=v - else: - mNameDict[key].value=v - mNameDict[key].activeInfo=True + if vlist: + for cName, key in getDict_MetaStrInDict(mNameDict).items(): + if cName in hlist: + v=vlist[hlist.index(cName)] + #print("PRINTING table_parser save:",cName,v) + if key in list(parser.lastMatch.keys()): + parser.lastMatch[key]=v + else: + mNameDict[key].value=v + mNameDict[key].activeInfo=True + anythingmatched = True + else: + if(header is not None and headersave is not None): + for cName in hlist: + headersave.update({cName : None}) anythingmatched = True if anythingmatched: + #print("PRINTING table_parser header:",headersave) # We have matched keywords to update at sections # if the active sections are defined cntrlDict = None @@ -857,7 +1013,7 @@ class ParserBase(object): addname = addname + str(parserDict["parserID"]) secDict.update({addname:True}) setattr(self,parserOptions["controlsave"],secDict) - setattr(self,parserOptions["lookupdict"],cntrlDict) + setattr(self,parserOptions["controldict"],cntrlDict) linenum = 0 parserDict.update({"numStoredLines" : linenum}) @@ -870,6 +1026,8 @@ class ParserBase(object): if(mNameDict is not None and updateMatchDict): setattr(self, matchNameDict, mNameDict) + if(headersave is not None and anythingmatched is True): + setattr(self, parserOptions["headersave"], headersave) return False, parserDict def table_parser(self, parser, stopOnMatchStr, quitOnMatchStr, metaNameStart, matchNameList, @@ -879,6 +1037,7 @@ class ParserBase(object): lastLine = parser.fIn.fInLine else: lastLine = entryline + #print("PRINTING table_parser:",lastLine) parserDict = { "firstLine" : 0, "storedLines" : '', @@ -886,6 +1045,7 @@ class ParserBase(object): "parserID" : parserID, "parserSuccess" : False, } + #print("PRINTING: table_parser:",parsername,lastLine) updateLastLine = False parsercontinue = True parsercntrlattr = None @@ -975,6 +1135,7 @@ class ParserBase(object): # if self.MD is not True: # newLine = parser.fIn.readline() # lastLine = ' = '.join([ "%s" % str(line) for line in zip(lastLine, newLine)]) + anythingmatched = False mNameDict = getattr(self, matchNameDict) for cName, key in getDict_MetaStrInDict(mNameDict).items(): if onlyCaseSensitive is not True: @@ -990,9 +1151,9 @@ class ParserBase(object): if mNameDict[key].replaceDict: replaceDict = mNameDict[key].replaceDict else: - replaceDict = None + replaceDict = None if mNameDict[key].subFunc: - if isinstance(subFunc, str): + if isinstance(mNameDict[key].subFunc, str): subFunc = getattr(self,mNameDict[key].subFunc) else: subFunc = mNameDict[key].subFunc @@ -1001,13 +1162,13 @@ class ParserBase(object): if mNameDict[key].addAsList: addAsList = mNameDict[key].addAsList else: - addAsList = None + addAsList = None if mNameDict[key].appendToList: appendToList = getattr(self,mNameDict[key].appendToList) else: appendToList = None if 'EOL' in matchWith: - matchThisParsy = re.compile(r"(?:%s)\s*(?:=|:)\s*(?:'|\")?" + matchThisParsy = re.compile(r"(?:%s)\s*(?:\s|=|:)\s*(?:'|\")?" "(?P<%s>.*)(?:'|\")?\s*,?" % (cText, key)) elif 'UD' in matchWith: @@ -1061,6 +1222,49 @@ class ParserBase(object): mNameDict[k].value=v mNameDict[k].activeInfo=True parserDict.update({"parserSuccess" : True}) + anythingmatched = True + if appendToList is not None: + setattr(self, mNameDict[key].appendToList, appendToList) + if anythingmatched: + # We have matched keywords to update at sections + # if the active sections are defined + cntrlDict = None + cntrlsec = None + cntrlsave = None + if "controldict" in parserOptions: + cntrlDict = getattr(self,parserOptions["controldict"]) + if "controlsections" in parserOptions: + cntrlsec = parserOptions["controlsections"] + if "controlsave" in parserOptions: + if cntrlDict: + if parserOptions["controlsave"] in cntrlDict: + secDict = {} + cntrlsave = cntrlDict[parserOptions["controlsave"]] + if isinstance(cntrlsave,dict): + secDict = cntrlsave + if cntrlsec is not None: + for sec in cntrlsec: + secDict.update({sec:True}) + else: + addname = "table_store" + if parserDict["parserID"] is not None: + addname = addname + str(parserDict["parserID"]) + secDict.update({addname:True}) + cntrlDict.update({parserOptions["controlsave"]:secDict}) + else: + secDict = {} + cntrlsave = getattr(self,parserOptions["controlsave"]) + if isinstance(cntrlsave,dict): + secDict = cntrlsave + if cntrlsec is not None: + for sec in cntrlsec: + secDict.update({sec:True}) + else: + addname = "table_store" + if parserDict["parserID"] is not None: + addname = addname + str(parserDict["parserID"]) + secDict.update({addname:True}) + setattr(self,parserOptions["controlsave"],secDict) if(mNameDict is not None and updateMatchDict): setattr(self, matchNameDict, mNameDict) @@ -1073,6 +1277,7 @@ class ParserBase(object): lastLine = parser.fIn.fInLine else: lastLine = entryline + #print("PRINTING: name_list:",lastLine) parserDict = { "firstLine" : 0, "storedLines" : lastLine, @@ -1080,8 +1285,11 @@ class ParserBase(object): "parserSuccess" : False, } self.firstLine = 0 + updateLastLine = False # Check the captured line has Fortran namelist variables and store them. # Continue search and store until the line matches with stopOnMatch. + if "movetostopline" in parserOptions: + updateLastLine = parserOptions["movetostopline"] stopOnMatchRe = re.compile(stopOnMatchStr) quitOnMatchRe = None if quitOnMatchStr is not None: @@ -1109,6 +1317,8 @@ class ParserBase(object): onlyCaseSensitive, stopOnFirstLine, parserDict, parserOptions) if rtn: + if updateLastLine: + lastLine = parser.fIn.readline() break else: lastLine = parser.fIn.readline() diff --git a/common/python/nomadcore/smart_parser/SmartParserDictionary.py b/common/python/nomadcore/smart_parser/SmartParserDictionary.py index 6434b71f045d4c039090379fd948061a6189a155..de28fbd70fbbe725a251d348b9ff7236fe630267 100644 --- a/common/python/nomadcore/smart_parser/SmartParserDictionary.py +++ b/common/python/nomadcore/smart_parser/SmartParserDictionary.py @@ -293,7 +293,13 @@ def isMetaStrInDict(nameStr, sourceDict): Meta name strings are not actual meta names but used as the keywords in the parsing. """ - return [k for k,v in sourceDict.items() if nameStr in v.matchStr][0] + val = None + for k,v in sourceDict.items(): + if nameStr in v.matchStr: + val = k + break + return val + #return [k for k,v in sourceDict.items() if nameStr in v.matchStr][0] def getDict_MetaStrInDict(sourceDict, nameList=None): """Returns a dict that includes all meta name