from rdkit import Chem from rdkit.Chem import Draw, AllChem from rdkit.Chem.Draw import IPythonConsole import gzip, numpy from rdkit.Chem import PyMol v = PyMol.MolViewer() # zinc 0903464 smi_zinc = 'Cc1cc2c(c(c1)OCC(=O)N3Cc4c(c5ccccc5[nH]4)C[C@H]3C(=O)OC)c6c(c(=O)o2)CCC6' ref1 = Chem.MolFromSmiles(smi_zinc) ref1 # load pdb ref1 = Chem.MolFromSmiles('Cc1cc2c(c(c1)OCC(=O)N3Cc4c(c5ccccc5[nH]4)C[C@H]3C(=O)OC)c6c(c(=O)o2)CCC6') ref1 = AllChem.AddHs(ref1, addCoords=True) mol1 = Chem.MolFromPDBFile('ligand_zinc_start.pdb', removeHs=False) mol1 = AllChem.AssignBondOrdersFromTemplate(ref1, mol1) mol1.RemoveAllConformers() mol1 from rdkit.Chem import rdConformerParser cids1 = rdConformerParser.AddConformersFromGromosTrajectory(mol1, 'ligand_zinc_gath.trc') print len(cids1) from rdkit.Chem import rdMolAlign for i in range(1, mol1.GetNumConformers()): pyO3A = rdMolAlign.GetO3A(mol1, mol1, prbCid=i, refCid=0) pyO3A.Align() v.DeleteAll() for cid in cids1: #[:10]: v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False) v.GetPNG(preDelay=2) rmsd1 = [] for cid in cids1[1:]: rmsd1.append(AllChem.GetConformerRMS(mol1, 0, cid, prealigned=True)) plt.xlabel('RMSD') plt.ylabel('count') hist(rmsd1) plt.show() cids12 = [0] cids12 += [cid for cid,r in zip(cids1[1:], rmsd1) if r >= 3.0] print len(cids12) v.DeleteAll() for cid in cids12: v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False) v.GetPNG(preDelay=2) rmsdmat1 = AllChem.GetConformerRMSMatrix(mol1, prealigned=True) from rdkit import SimDivFilters mmp = SimDivFilters.MaxMinPicker() dids1 = mmp.Pick(numpy.array(rmsdmat1),mol1.GetNumConformers(),10) print [d for d in dids1] v.DeleteAll() for cid in dids1: v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False) v.GetPNG(preDelay=2) from rdkit.ML.Cluster import Butina clusters1 = Butina.ClusterData(rmsdmat1, mol1.GetNumConformers(), 1.0, isDistData=True, reordering=True) print len(clusters1) plt.xlabel('cluster index') plt.ylabel('number of members') bar(range(1,len(clusters1)+1), [len(c) for c in clusters1]) plt.show() clids1 = [] for c in clusters1: if len(c) > 1: clids1.append(c[0]) print clids1 v.DeleteAll() for cid in clids1: v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False) v.GetPNG(preDelay=2) tani1 = [] for cid in cids1[1:]: tani1.append(AllChem.ShapeTanimotoDist(mol1, mol1, confId1=0, confId2=cid)) plt.xlabel('Shape Tanimoto Distance') plt.ylabel('count') hist(tani1) plt.show() cids13 = [0] cids13 += [cid for cid,t in zip(cids1[1:], tani1) if t > 0.65] print len(cids13) v.DeleteAll() for cid in cids13: v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False) v.GetPNG(preDelay=2) distmat1 = [] for i in range(1, mol1.GetNumConformers()): tmp = [] for j in range(i): tmp.append(AllChem.ShapeTanimotoDist(mol1, mol1, confId1=i, confId2=j)) distmat1.extend(tmp) mmp = SimDivFilters.MaxMinPicker() tdids1 = mmp.Pick(numpy.array(distmat1),mol1.GetNumConformers(),10) print [d for d in tdids1] tclusters1 = Butina.ClusterData(distmat1, mol1.GetNumConformers(), 0.3, isDistData=True, reordering=True) print len(tclusters1) plt.xlabel('cluster index') plt.ylabel('number of members') bar(range(1,len(tclusters1)+1), [len(c) for c in tclusters1]) plt.show() tclids1 = [] for c in tclusters1: if len(c) > 1: tclids1.append(c[0]) print tclids1 v.DeleteAll() for cid in tclids1: v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False) v.GetPNG(preDelay=2) from rdkit.Chem.Pharm2D import Gobbi_Pharm2D, Generate factory = Gobbi_Pharm2D.factory pharm3D = [] for cid in cids1: dm = Chem.Get3DDistanceMatrix(mol1, confId=cid) pharm3D.append(Generate.Gen2DFingerprint(mol1, factory, dMat=dm)) from rdkit import DataStructs # similarity distribution sims1 = [] p1 = pharm3D[0] # first conformer as reference for p in pharm3D[1:]: sims1.append(DataStructs.TanimotoSimilarity(p1, p)) plt.xlabel('Tanimoto similarity') plt.ylabel('number of conformers') hist(sims1) plt.show() sids1 = [0] sids1 += [cid for cid,s in zip(cids1[1:], sims1) if s < 0.4] print len(sids1) print sids1 tanimat1 = [] for i in range(1, mol1.GetNumConformers()): tanimat1.extend(DataStructs.BulkTanimotoSimilarity(pharm3D[i], pharm3D[:i], returnDistance=True)) tclusters12 = Butina.ClusterData(tanimat1, mol1.GetNumConformers(), 0.3, isDistData=True, reordering=True) print len(tclusters12) plt.xlabel('cluster index') plt.ylabel('number of members') bar(range(1,len(tclusters12)+1), [len(c) for c in tclusters12]) plt.show() tclids12 = [] for c in tclusters12: if len(c) > 1: tclids12.append(c[0]) print tclids12 v.DeleteAll() for cid in tclids12: v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False) v.GetPNG(preDelay=2) smi_pdb = 'C[NH+]1CCN(CCOc2cc3ncnc(Nc4c5c(ccc4Cl)OCO5)c3c(OC3CCOCC3)c2)CC1' mol2 = Chem.MolFromSmiles(smi_pdb) mol2 # PDB 2H8H mol2 = Chem.MolFromMolFile('lig_2H8H.sdf', removeHs=False) mol2.RemoveAllConformers() mol2 print mol2.GetNumAtoms() cids2 = rdConformerParser.AddConformersFromAmberTrajectory(mol2, 'ligand.trx') print len(cids2) for i in range(1, mol2.GetNumConformers()): pyO3A = rdMolAlign.GetO3A(mol2, mol2, prbCid=i, refCid=0) pyO3A.Align() v.DeleteAll() for cid in cids2: v.ShowMol(mol2, confId=cid, name='conf_'+str(cid), showOnly=False) v.GetPNG(preDelay=2) rmsd2 = [] for cid in cids2[1:]: rmsd2.append(AllChem.GetConformerRMS(mol2, 0, cid, prealigned=True)) plt.xlabel('RMSD') plt.ylabel('count') hist(rmsd2) plt.show() cids22 = [0] cids22 += [cid for cid,r in zip(cids2[1:], rmsd2) if r >= 1.0] print len(cids22) v.DeleteAll() for cid in cids22: v.ShowMol(mol2, confId=cid, name='conf_'+str(cid), showOnly=False) v.GetPNG(preDelay=2) rmsdmat2 = AllChem.GetConformerRMSMatrix(mol2, prealigned=True) mmp = SimDivFilters.MaxMinPicker() dids2 = mmp.Pick(numpy.array(rmsdmat2), mol2.GetNumConformers(), 10) print [d for d in dids2] v.DeleteAll() for cid in dids2: v.ShowMol(mol2, confId=cid, name='conf_'+str(cid), showOnly=False) v.GetPNG(preDelay=2) clusters2 = Butina.ClusterData(rmsdmat2, mol2.GetNumConformers(), 0.5, isDistData=True, reordering=True) print len(clusters2) plt.xlabel('cluster index') plt.ylabel('number of members') bar(range(1,len(clusters2)+1), [len(c) for c in clusters2]) plt.show() clids2 = [] for c in clusters2: if len(c) > 1: clids2.append(c[0]) print clids2 v.DeleteAll() for cid in clids2: v.ShowMol(mol2, confId=cid, name='conf_'+str(cid), showOnly=False) v.GetPNG(preDelay=2)