Commit 418eed43 authored by Berk Onat's avatar Berk Onat

Flexiable recorder/parser functions added to SmartParser, Stream RTF, PAR,...

Flexiable recorder/parser functions added to SmartParser, Stream RTF, PAR, CRD, COOR readers and force field parameters with ParmEd support added to MDDataAccess, resetting support added to MetaInfoStorage.
parent 44a3f97e
......@@ -25,7 +25,11 @@ with warnings.catch_warnings(record=True) as w:
import MDAnalysis.core.universe as mda_u
import MDAnalysis.coordinates as mda_c
import panedr
import parmed as pmd
import pymolfile as pym
import io
import struct
import copy
mda_coordinates_modules = tuple(x[1] for x in inspect.getmembers(mda_c,inspect.ismodule))
logger = logging.getLogger(__name__)
......@@ -66,6 +70,87 @@ ELEMENTS_MASS_TABLE = {
"Zn" : 65.37000, "Zr" : 91.224
}
TEXTCHARS = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100)) - {0x7f})
def is_file_binary(fName, checkBytes=None):
testin=None
if checkBytes is None:
checkBytes = 1024
try:
with open(fName, 'rb') as fin:
testin = fin.read(checkBytes)
except(FileNotFoundError, IOError):
pass
if testin:
if is_binary_string(testin):
return True
else:
return False
else:
return None
def is_binary_string(inbytes):
return bool(inbytes.translate(None, TEXTCHARS))
def bytes_per_record(record_type):
if record_type == 'f': return 4
if record_type == 'd': return 8
if record_type == 'i': return 4
if record_type == 's': return 1
if record_type == 's4': return 4
# Default we read 1 byte
return 1
def read_start_marker(f):
# Read Start Marker
try:
struct.unpack('i', f.read(4))
except(struct.error):
pass
def read_end_marker(f):
# Read End Marker
try:
struct.unpack('i', f.read(4))
except(struct.error):
pass
def read_record(f, records, marker=False):
if marker:
# Read Start Marker
read_start_marker(f)
res=[]
for record_type, length, smarker, emarker in records:
if smarker:
# Read Start Marker
read_start_marker(f)
x=[]
if length<0:
length= res[-length-1][0]
if 's' in record_type:
record_type = 's'
for i in range(length):
try:
x.append(struct.unpack(record_type, f.read(bytes_per_record(record_type)))[0])
except(struct.error):
return None
if 's' in record_type:
x = ''.join([ s.decode('utf-8') for s in x])
res.append(x)
if emarker:
# Read End Marker
read_end_marker(f)
if marker:
# Read End Marker
read_end_marker(f)
return res
def get_any_element_name(atomname):
elementlist = ELEMENTS_MASS_TABLE.keys()
# check if the element is in list
......@@ -113,9 +198,123 @@ def get_element_name(atomname):
if len(atomname)<3:
return atomname[0:1]
else:
return get_any_element_name()
return atomname.upper()
return None
def charmm_stream_reader(slines, topo=None):
if topo is None:
topo = pmd.charmm.CharmmParameterSet()
lines = []
title = []
command = []
return topo
def charmm_coor_reader(fname, ftextmem=None):
coorDict = None
if ftextmem is not None:
title = []
natoms = 0
countatoms = 0
atomlist = []
for line in ftextmem:
# If the line is a title line
# add it to title list
if(line.startswith('*') or 'TITLE>' in line.upper()):
title.append(line)
# else start readinf COOR format
else:
if natoms<1:
natoms = int(line.split()[0])
else:
if countatoms<natoms:
atomlist.append(line.split())
countatoms+=1
if coorDict is None:
coorDict = {}
coorDict.update({'binary':False})
coorDict.update({'title':title})
coorDict.update({'numatoms':natoms})
coorDict.update({'atomlist':atomlist})
coords=np.asarray(atomlist)
if len(coords[0,:])>6:
coorDict.update({'coords':np.asarray([[[float(x) for x in v] for v in coords[:,4:7]]])})
else:
coorDict.update({'coords':np.asarray([[[float(x) for x in v] for v in coords[:,3:6]]])})
else:
if is_file_binary(fname):
try:
with open(fname, 'rb') as fin:
hdr, icntrl = read_record(f=fin, records=[
('s', 4, True, False), ('i', 20, False, True)])
if hdr == 'COOR':
ntitl = read_record(f=fin, records=[('i', 1, True, False)])
ntitl=ntitl[0][0]
for t in range(int(ntitl)):
title = read_record(f=fin, records=[('s', 80, False, False)])
endtitle = read_record(f=fin, records=[('i', 1, True, False)])
natoms = read_record(f=fin, records=[('i', 1, False, False)])
natoms = natoms[0][0]
xt = read_record(f=fin, records=[('i', 1, False, False)])
wx = read_record(f=fin, records=[('f', natoms, True, True)])
wy = read_record(f=fin, records=[('f', natoms, True, True)])
wz = read_record(f=fin, records=[('f', natoms, True, True)])
ww = read_record(f=fin, records=[('f', natoms, True, True)])
if ww is None:
ww = [0.0 for atm in range(natoms)]
if coorDict is None:
coorDict = {}
coords=[]
for r in range(len(wx)):
coords.append(np.column_stack([wx[r],wy[r],wz[r]]))
coorDict.update({'binary':True})
coorDict.update({'title':title})
coorDict.update({'numatoms':natoms})
coorDict.update({'xt':xt})
coorDict.update({'coords':np.asarray(coords)})
coorDict.update({'ww':ww})
except(FileNotFoundError, IOError):
pass
else:
try:
with open(fname, 'r') as fin:
title = []
natoms = 0
countatoms = 0
atomlist = []
for line in fin:
# If the line is a title line
# add it to title list
if(line.startswith('*') or
line.startswith('TITLE')):
title.append(line)
# else start readinf COOR format
else:
if natoms<1:
try:
natoms = int(line.split()[0])
except(ValueError):
return coorDict
else:
if countatoms<natoms:
atomlist.append(line.split())
countatoms+=1
if coorDict is None:
coorDict = {}
coorDict.update({'binary':False})
coorDict.update({'title':title})
coorDict.update({'numatoms':natoms})
coorDict.update({'atomlist':atomlist})
coords=np.asarray(atomlist)
if len(coords[0,:])>6:
coorDict.update({'coords':np.asarray([[[float(x) for x in v] for v in coords[:,4:7]]])})
else:
coorDict.update({'coords':np.asarray([[[float(x) for x in v] for v in coords[:,3:6]]])})
except(FileNotFoundError, IOError):
pass
return coorDict
def get_dir_base_extension(file_name):
""" Splits directory, file base and file extensions
......@@ -127,6 +326,569 @@ def get_dir_base_extension(file_name):
file_dir = os.path.dirname(file_name)
return file_dir, file_base, file_extension
def pmdConvertTopoDict(topoStor, topoPMD):
""" Function to convert ParmEd topology info
from CharmmPsfFile and CharmmParameterSet
to MDDataAccess
"""
topo=None
if(isinstance(topoPMD, pmd.charmm.CharmmParameterSet) or
isinstance(topoPMD, pmd.charmm.CharmmPsfFile) or
(topoPMD is None and topoStor.charmmcoor is not None) or
topoStor.charmmcoortopo is not None):
topo = topoPMD
def getatomall(atom):
atmid=0
atmname = atom[0]
atmtyp = atom[1]
if atom[10]:
atmmass = float(atom[10])
else:
atmmass = None
if atom[3]:
atmresid = atom[3]
atmres = atom[2]
else:
atmresid = None
atmres = None
if atom[4]:
atmsegid = atom[4]
else:
atmsegid = None
if atom[11]:
atmchrg = float(atom[11])
else:
atmchrg = None
if atom[12]:
atmrad = float(atom[12])
else:
atmrad = None
if atom[9]:
atmbfac = float(atom[9])
else:
atmbfac = None
atm_unique = ''
if atmname:
atm_unique = atm_unique + atmname
if atmtyp:
atm_unique = atm_unique + "-" + atmtyp
if atom[16]:
aeps = float(atom[16])
else:
aeps = None
if atom[17]:
arm = float(atom[17])
else:
arm = None
if atom[18]:
aeps14 = float(atom[18])
else:
aeps14 = None
if atom[19]:
arm14 = float(atom[19])
else:
arm14 = None
if atom[20]:
anb = [[x,float(y)] for x,y in atom[20]]
else:
anb = None
#if atmres:
# atm_unique = atm_unique + "-" + atmres
#if atmsegid:
# atm_unique = atm_unique + "-" + atmsegid
return [atmid,atm_unique,atmname,
atmtyp,atmres,atmresid,atmsegid,
atmmass,atmchrg,atmrad,atmbfac,
aeps,arm,aeps14,arm14,anb,None]
def checkatomsdiff(atom1,atom2,checklist):
index=0
target=len(checklist)
for t in checklist:
if atom1[t] is not atom2[t]:
index += 1
if index==target:
return True
else:
return False
def atom_respairs(atom1,atom2):
return [get_atomres(atom1),get_atomres(atom2)]
def atompairs(atom1,atom2):
return [getatom(atom1),getatom(atom2)]
def atompairids(atom1,atom2):
return [atom1[0],atom2[0]]
structure=None
segmentList = None
residueList = None
atom_element_list = []
nbepsList = None
nbrList = None
nbeps14List = None
nbr14List = None
nbfixList = None
# If topo is a PsfFile, the parameter files were not supplied.
# Hence, we can only extract topo info from Psf class in topo.
if isinstance(topoPMD, pmd.charmm.CharmmPsfFile):
structure = [[x.name, x.type, x.residue.name,
x.residue._idx, x.residue.segid,
x.residue.chain, x.altloc,
x.irotat, x.occupancy, x.bfactor, x.mass,
x._charge, x.solvent_radius, x.atomic_number,
x.tree, x.atom_type.name, x.atom_type.epsilon,
x.atom_type.rmin, x.atom_type.epsilon_14,
x.atom_type.rmin_14, x.atom_type.nbfix] for x in topoPMD.atoms]
chainList = [a[5] for a in structure]
segmentList = [a[4] for a in structure]
residList = [a[3] for a in structure]
residueList = [a[2] for a in structure]
nbepsList = [a[16] for a in structure]
nbrList = [a[17] for a in structure]
nbeps14List = [a[18] for a in structure]
nbr14List = [a[19] for a in structure]
nbfixList = [a[20] for a in structure]
atomList = structure
atomAllList = [getatomall(a) for a in structure]
atom_name_list = [a[0] for a in structure]
#for ai, atom in enumerate(structure):
# if atom[2] in ELEMENTS_MASS_TABLE.keys():
# element = atom[2]
# else:
# element = get_element_name(atom[2])
# if element is None:
# element = atom[1]
# atom_element_list.append(element)
# If topo is ParSet, the parameter file or files might be supplied.
# If Psf file is also supplied with the parameters, we can extract
# full information of the topology with parameter values.
# However, if there is no Psf file and user only supplied COOR CARD
# or CRD file, than we can only extract atom definitions through
# these two files and we try to fill in the blanks of the topology
# puzzle in CHARMM with parameter set class in ParmEd.
elif(isinstance(topoPMD, pmd.charmm.CharmmParameterSet) and
(topoStor.charmmcoor is not None or
topoStor.charmmcoortopo is not None)):
coortopo=False
#ATOMNO RESNO RES TYPE X Y Z SEGID RESID Weighting
if topoStor.charmmcoortopo is not None:
if len(topoStor.charmmcoortopo[0])>8:
structure = [[at[3], at[2], at[2],
at[1], at[7], at[8],
None, None, None,
None, None, None,
None, None, None,
None, None, None,
None, None, None] for at in topoStor.charmmcoortopo]
else:
structure = [[at[3], at[2], at[2],
at[1], None, None,
None, None, None,
None, None, None,
None, None, None,
None, None, None,
None, None, None] for at in topoStor.charmmcoortopo]
coortopo=True
elif topoStor.charmmcoor is not None:
if 'atomlist' in topoStor.charmmcoor:
topostruct = topoStor.charmmcoor['atomlist']
if len(topostruct[0])>8:
structure = [[at[3], at[2], at[2],
at[1], at[7], at[8],
None, None, None,
None, None, None,
None, None, None,
None, None, None,
None, None, None] for at in topostruct]
else:
structure = [[at[3], at[2], at[2],
at[1], None, None,
None, None, None,
None, None, None,
None, None, None,
None, None, None,
None, None, None] for at in topostruct]
coortopo=True
if coortopo is True:
param_types = [[topoPMD.atom_types[at].name, topoPMD.atom_types[at].epsilon,
topoPMD.atom_types[at].rmin, topoPMD.atom_types[at].epsilon_14,
topoPMD.atom_types[at].rmin_14, topoPMD.atom_types[at].nbfix] for at in topoPMD.atom_types]
nbond_params = {}
for par in param_types:
nbond_params.update({par[0]:par[1:6]})
structstor = structure[:]
structure = []
for ai, at in enumerate(structstor):
structure.append(at)
structure[ai].extend([None])
if at[2] in nbond_params:
structure[ai].extend(nbond_params[at[2]][0:4])
if nbond_params[at[2]][4]:
structure[ai].extend(nbond_params[at[2]][4])
else:
structure[ai].extend([None])
chainList = [a[5] for a in structure]
segmentList = [a[4] for a in structure]
residList = [a[3] for a in structure]
residueList = [a[2] for a in structure]
nbepsList = [a[16] for a in structure]
nbrList = [a[17] for a in structure]
nbeps14List = [a[18] for a in structure]
nbr14List = [a[19] for a in structure]
nbfixList = [a[20] for a in structure]
atomList = structure
atomAllList = [getatomall(a) for a in structure]
atom_name_list = [a[0] for a in structure]
#for ai, atom in enumerate(structure):
# if atom[2] in ELEMENTS_MASS_TABLE.keys():
# element = atom[2]
# else:
# element = get_element_name(atom[2])
# if element is None:
# element = atom[1]
# atom_element_list.append(element)
# There is also one possibility that we do not have
# Psf or parameter set but only COOR info. If this is
# the case and the info is not from a binary CRD file,
# we can only extract names of residues, atom kinds
# and types from the coordinate card info and
elif(topoStor.charmmcoor is not None or
topoStor.charmmcoortopo is not None):
coortopo=False
#ATOMNO RESNO RES TYPE X Y Z SEGID RESID Weighting
if topoStor.charmmcoortopo is not None:
if len(topoStor.charmmcoortopo[0])>8:
structure = [[at[3], at[2], at[2],
at[1], at[7], at[8],
None, None, None,
None, None, None,
None, None, None,
None, None, None,
None, None, None] for at in topoStor.charmmcoortopo]
else:
structure = [[at[3], at[2], at[2],
at[1], None, None,
None, None, None,
None, None, None,
None, None, None,
None, None, None,
None, None, None] for at in topoStor.charmmcoortopo]
coortopo=True
elif topoStor.charmmcoor is not None:
if 'atomlist' in topoStor.charmmcoor:
topostruct = topoStor.charmmcoor['atomlist']
if len(topostruct[0])>8:
structure = [[at[3], at[2], at[2],
at[1], at[7], at[8],
None, None, None,
None, None, None,
None, None, None,
None, None, None,
None, None, None] for at in topostruct]
else:
structure = [[at[3], at[2], at[2],
at[1], None, None,
None, None, None,
None, None, None,
None, None, None,
None, None, None,
None, None, None] for at in topostruct]
coortopo=True
if coortopo is True:
chainList = [a[5] for a in structure]
segmentList = [a[4] for a in structure]
residList = [a[3] for a in structure]
residueList = [a[2] for a in structure]
atomList = topo.structure
atomAllList = [getatomall(a) for a in structure]
atom_name_list = [a[0] for a in structure]
#for ai, atom in enumerate(structure):
# if atom[2] in ELEMENTS_MASS_TABLE.keys():
# element = atom[2]
# else:
# element = get_element_name(atom[2])
# if element is None:
# element = atom[1]
# atom_element_list.append(element)
if structure is None:
return topoStor
count=0
for i in range(len(atomAllList)):
atomAllList[i][0]=count
count+=1
types_list = list(set([a[1] for a in atomAllList]))
#atom_type_list = [a[3] for a in atomList]
atom_all_list = np.asarray(atomAllList)
atom_names = atom_all_list[:,2]
atom_types = atom_all_list[:,3]
atom_masses = atom_all_list[:,7]
atom_charges = atom_all_list[:,8]
atom_radiuses = atom_all_list[:,9]
atom_bfactors = atom_all_list[:,10]
#print("Atom Bonds:",topo.bonds)
#print("Atom Angles:",topo.angles)
#print("Atom Dihedrals:",topo.dihedrals)
#print("Atom Impropers:",topo.impropers)
atom_type_dict = {}
atomtypesDict = {}
atomnameDict = {}
massDict = {}
elementDict = {}
radiusDict = {}
chargeDict = {}
bfactorDict = {}
epsilonDict = {}
rminDict = {}
epsilon14Dict = {}
rmin14Dict = {}
nbfixDict = {}
atomlabelList = []
atomlabelDict = []
for ielm in range(len(types_list)):
elm = types_list[ielm]
atom_type_dict.update({elm : ielm+1})
typelabelList = []
for ai, atom in enumerate(atomAllList):
if atom[2] in ELEMENTS_MASS_TABLE.keys():
element = atom[2]
else:
element = get_element_name(atom[2])
if element is None:
element = atom[1]
if elm == atom[1]:
elementDict.update({elm : element})
atomnameDict.update({elm : atom[2]})
atomtypesDict.update({elm : atom[3]})
massDict.update({elm : atom[7]})
chargeDict.update({elm : atom[8]})
radiusDict.update({elm : atom[9]})
bfactorDict.update({elm : atom[10]})
epsilonDict.update({elm : atom[11]})
rminDict.update({elm : atom[12]})
epsilon14Dict.update({elm : atom[13]})
rmin14Dict.update({elm : atom[14]})
nbfixDict.update({elm : atom[15]})
typelabelList.append(atom[0])
atomAllList[ai][16]=element
atomlabelDict.append(typelabelList)
for ai, atom in enumerate(atomAllList):
atom_element_list.append(atom[16])
for atom in atomAllList:
atomlabelList.append([atom[0],atom_type_dict[atom[1]]])
system_name = ''
if atom_element_list:
system_name = system_name + ''.join([
el + str(atom_element_list.count(el)) for el in set(
atom_element_list)])
if segmentList is not None:
segsysname = '-'.join([str(seg) for seg in list(
set(segmentList)) if seg is not None])
if segsysname:
system_name = system_name + '-' + segsysname
if residueList is not None:
ressysname = '-'.join([str(res) for res in list(
set(residueList)) if res is not None])
if ressysname:
system_name = system_name + '-' + ressysname
massList = list(massDict.values())
atom_type_list = list(atomtypesDict.values())
name_list = list(atomnameDict.values())
elementList = list(elementDict.values())
radiusList = list(radiusDict.values())
chargesList = list(chargeDict.values())
bfactorList = list(bfactorDict.values())
if isinstance(topoPMD, pmd.charmm.CharmmPsfFile):
if topoPMD.bonds[0].type is not None:
for bi, bondtype in enumerate(topoPMD.bonds[0].type.list):
topoPMD.bonds[0].type.list[bi]._idx=bi
topo_bond_list = np.asarray([
[x.atom1._idx, x.atom2._idx, x.type._idx,
x.type.k, x.type.req] for x in topoPMD.bonds
])
else:
topo_bond_list = np.asarray([
[x.atom1._idx, x.atom2._idx] for x in topoPMD.bonds
])
topbList = topo_bond_list[:,0:2]
elif(isinstance(topoPMD, pmd.charmm.CharmmParameterSet) and
(topoStor.charmmcoor is not None or
topoStor.charmmcoortopo is not None)):
if topoPMD.bond_types is not None:
bi=0
search_types = list(set(atom_type_list))