from rdkit import Chem
from rdkit.Chem import Draw, AllChem
from rdkit.Chem.Draw import IPythonConsole
import gzip, numpy
from rdkit.Chem import PyMol
v = PyMol.MolViewer()
# zinc 0903464
smi_zinc = 'Cc1cc2c(c(c1)OCC(=O)N3Cc4c(c5ccccc5[nH]4)C[C@H]3C(=O)OC)c6c(c(=O)o2)CCC6'
ref1 = Chem.MolFromSmiles(smi_zinc)
ref1
# load pdb
ref1 = Chem.MolFromSmiles('Cc1cc2c(c(c1)OCC(=O)N3Cc4c(c5ccccc5[nH]4)C[C@H]3C(=O)OC)c6c(c(=O)o2)CCC6')
ref1 = AllChem.AddHs(ref1, addCoords=True)
mol1 = Chem.MolFromPDBFile('ligand_zinc_start.pdb', removeHs=False)
mol1 = AllChem.AssignBondOrdersFromTemplate(ref1, mol1)
mol1.RemoveAllConformers()
mol1
from rdkit.Chem import rdConformerParser
cids1 = rdConformerParser.AddConformersFromGromosTrajectory(mol1, 'ligand_zinc_gath.trc')
print len(cids1)
100
from rdkit.Chem import rdMolAlign
for i in range(1, mol1.GetNumConformers()):
pyO3A = rdMolAlign.GetO3A(mol1, mol1, prbCid=i, refCid=0)
pyO3A.Align()
v.DeleteAll()
for cid in cids1: #[:10]:
v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False)
v.GetPNG(preDelay=2)
rmsd1 = []
for cid in cids1[1:]:
rmsd1.append(AllChem.GetConformerRMS(mol1, 0, cid, prealigned=True))
plt.xlabel('RMSD')
plt.ylabel('count')
hist(rmsd1)
plt.show()
cids12 = [0]
cids12 += [cid for cid,r in zip(cids1[1:], rmsd1) if r >= 3.0]
print len(cids12)
33
v.DeleteAll()
for cid in cids12:
v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False)
v.GetPNG(preDelay=2)
rmsdmat1 = AllChem.GetConformerRMSMatrix(mol1, prealigned=True)
from rdkit import SimDivFilters
mmp = SimDivFilters.MaxMinPicker()
dids1 = mmp.Pick(numpy.array(rmsdmat1),mol1.GetNumConformers(),10)
print [d for d in dids1]
[37, 75, 61, 81, 10, 44, 84, 59, 18, 3]
v.DeleteAll()
for cid in dids1:
v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False)
v.GetPNG(preDelay=2)
from rdkit.ML.Cluster import Butina
clusters1 = Butina.ClusterData(rmsdmat1, mol1.GetNumConformers(), 1.0, isDistData=True, reordering=True)
print len(clusters1)
28
plt.xlabel('cluster index')
plt.ylabel('number of members')
bar(range(1,len(clusters1)+1), [len(c) for c in clusters1])
plt.show()
clids1 = []
for c in clusters1:
if len(c) > 1:
clids1.append(c[0])
print clids1
[70, 21, 93, 2, 40, 64, 59, 39, 36]
v.DeleteAll()
for cid in clids1:
v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False)
v.GetPNG(preDelay=2)
tani1 = []
for cid in cids1[1:]:
tani1.append(AllChem.ShapeTanimotoDist(mol1, mol1, confId1=0, confId2=cid))
plt.xlabel('Shape Tanimoto Distance')
plt.ylabel('count')
hist(tani1)
plt.show()
cids13 = [0]
cids13 += [cid for cid,t in zip(cids1[1:], tani1) if t > 0.65]
print len(cids13)
15
v.DeleteAll()
for cid in cids13:
v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False)
v.GetPNG(preDelay=2)
distmat1 = []
for i in range(1, mol1.GetNumConformers()):
tmp = []
for j in range(i):
tmp.append(AllChem.ShapeTanimotoDist(mol1, mol1, confId1=i, confId2=j))
distmat1.extend(tmp)
mmp = SimDivFilters.MaxMinPicker()
tdids1 = mmp.Pick(numpy.array(distmat1),mol1.GetNumConformers(),10)
print [d for d in tdids1]
[37, 81, 75, 61, 10, 73, 59, 7, 11, 44]
tclusters1 = Butina.ClusterData(distmat1, mol1.GetNumConformers(), 0.3, isDistData=True, reordering=True)
print len(tclusters1)
30
plt.xlabel('cluster index')
plt.ylabel('number of members')
bar(range(1,len(tclusters1)+1), [len(c) for c in tclusters1])
plt.show()
tclids1 = []
for c in tclusters1:
if len(c) > 1:
tclids1.append(c[0])
print tclids1
[30, 99, 93, 15, 87, 40, 3, 57, 74, 20, 98, 80, 38, 8]
v.DeleteAll()
for cid in tclids1:
v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False)
v.GetPNG(preDelay=2)
from rdkit.Chem.Pharm2D import Gobbi_Pharm2D, Generate
factory = Gobbi_Pharm2D.factory
pharm3D = []
for cid in cids1:
dm = Chem.Get3DDistanceMatrix(mol1, confId=cid)
pharm3D.append(Generate.Gen2DFingerprint(mol1, factory, dMat=dm))
from rdkit import DataStructs
# similarity distribution
sims1 = []
p1 = pharm3D[0] # first conformer as reference
for p in pharm3D[1:]:
sims1.append(DataStructs.TanimotoSimilarity(p1, p))
plt.xlabel('Tanimoto similarity')
plt.ylabel('number of conformers')
hist(sims1)
plt.show()
sids1 = [0]
sids1 += [cid for cid,s in zip(cids1[1:], sims1) if s < 0.4]
print len(sids1)
print sids1
11 [0, 64, 73, 76, 79, 83, 84, 87, 88, 95, 98]
tanimat1 = []
for i in range(1, mol1.GetNumConformers()):
tanimat1.extend(DataStructs.BulkTanimotoSimilarity(pharm3D[i], pharm3D[:i], returnDistance=True))
tclusters12 = Butina.ClusterData(tanimat1, mol1.GetNumConformers(), 0.3, isDistData=True, reordering=True)
print len(tclusters12)
22
plt.xlabel('cluster index')
plt.ylabel('number of members')
bar(range(1,len(tclusters12)+1), [len(c) for c in tclusters12])
plt.show()
tclids12 = []
for c in tclusters12:
if len(c) > 1:
tclids12.append(c[0])
print tclids12
[13, 90, 51, 87, 82, 98, 76, 56, 99, 77, 61, 11, 10]
v.DeleteAll()
for cid in tclids12:
v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False)
v.GetPNG(preDelay=2)
smi_pdb = 'C[NH+]1CCN(CCOc2cc3ncnc(Nc4c5c(ccc4Cl)OCO5)c3c(OC3CCOCC3)c2)CC1'
mol2 = Chem.MolFromSmiles(smi_pdb)
mol2
# PDB 2H8H
mol2 = Chem.MolFromMolFile('lig_2H8H.sdf', removeHs=False)
mol2.RemoveAllConformers()
mol2
print mol2.GetNumAtoms()
71
cids2 = rdConformerParser.AddConformersFromAmberTrajectory(mol2, 'ligand.trx')
print len(cids2)
100
for i in range(1, mol2.GetNumConformers()):
pyO3A = rdMolAlign.GetO3A(mol2, mol2, prbCid=i, refCid=0)
pyO3A.Align()
v.DeleteAll()
for cid in cids2:
v.ShowMol(mol2, confId=cid, name='conf_'+str(cid), showOnly=False)
v.GetPNG(preDelay=2)
rmsd2 = []
for cid in cids2[1:]:
rmsd2.append(AllChem.GetConformerRMS(mol2, 0, cid, prealigned=True))
plt.xlabel('RMSD')
plt.ylabel('count')
hist(rmsd2)
plt.show()
cids22 = [0]
cids22 += [cid for cid,r in zip(cids2[1:], rmsd2) if r >= 1.0]
print len(cids22)
10
v.DeleteAll()
for cid in cids22:
v.ShowMol(mol2, confId=cid, name='conf_'+str(cid), showOnly=False)
v.GetPNG(preDelay=2)
rmsdmat2 = AllChem.GetConformerRMSMatrix(mol2, prealigned=True)
mmp = SimDivFilters.MaxMinPicker()
dids2 = mmp.Pick(numpy.array(rmsdmat2), mol2.GetNumConformers(), 10)
print [d for d in dids2]
[37, 23, 11, 62, 28, 77, 61, 49, 44, 33]
v.DeleteAll()
for cid in dids2:
v.ShowMol(mol2, confId=cid, name='conf_'+str(cid), showOnly=False)
v.GetPNG(preDelay=2)
clusters2 = Butina.ClusterData(rmsdmat2, mol2.GetNumConformers(), 0.5, isDistData=True, reordering=True)
print len(clusters2)
24
plt.xlabel('cluster index')
plt.ylabel('number of members')
bar(range(1,len(clusters2)+1), [len(c) for c in clusters2])
plt.show()
clids2 = []
for c in clusters2:
if len(c) > 1:
clids2.append(c[0])
print clids2
[37, 47, 14, 83, 80, 18, 69, 45, 84, 59]
v.DeleteAll()
for cid in clids2:
v.ShowMol(mol2, confId=cid, name='conf_'+str(cid), showOnly=False)
v.GetPNG(preDelay=2)