import gzip from rdkit import Chem from rdkit.Chem import Draw,AllChem from rdkit.Chem.Draw import IPythonConsole from rdkit.Chem import rdqueries inf = gzip.open('./data/zim.sdf.gz','r') zim_mols = [x for x in Chem.ForwardSDMolSupplier(inf) if x is not None] len(zim_mols) %timeit [len([x for x in y.GetAtoms() if x.GetHybridization() == Chem.HybridizationType.SP2]) for y in zim_mols] v1=[len([x for x in y.GetAtoms() if x.GetHybridization() == Chem.HybridizationType.SP2]) for y in zim_mols] qa = rdqueries.HybridizationEqualsQueryAtom(Chem.HybridizationType.SP2) v2=[len(y.GetAtomsMatchingQuery(qa)) for y in zim_mols] v1==v2 qa = rdqueries.HybridizationEqualsQueryAtom(Chem.HybridizationType.SP2) %timeit [len(y.GetAtomsMatchingQuery(qa)) for y in zim_mols] dir(rdqueries) import pandas as pd # Import pandas from rdkit.Chem import PandasTools inf = gzip.open('./data/zim.sdf.gz','r') cpds = PandasTools.LoadSDF(inf, includeFingerprints=True) cpds.columns from rdkit.Chem import Descriptors cpds['logp'] = cpds['ROMol'].map(Descriptors.MolLogP) cpds['mw'] = cpds['ROMol'].map(Descriptors.MolWt) PandasTools.FrameToGridImage(cpds.head(8), legendsCol="ID", molsPerRow=4) # pull out everything matching a carbazole scaffold: qry=Chem.MolFromSmiles('N1C2=CC=CC=C2C2=C1C=CC=C2') filtered = cpds[cpds['ROMol']>=qry] len(filtered) qry=Chem.MolFromSmarts('c1ncc[n,c]c1') filtered2 = cpds[cpds['ROMol']>=qry] len(filtered2) PandasTools.FrameToGridImage(filtered.head(8), legendsCol="ID", molsPerRow=4) from rdkit.Chem import AllChem AllChem.Compute2DCoords(qry) for mol in filtered['ROMol']: AllChem.GenerateDepictionMatching2DStructure(mol,qry) PandasTools.FrameToGridImage(filtered.head(8), legendsCol="ID", molsPerRow=4) from rdkit.Chem import rdFMCS with open('data/Target_no_130_60894.txt') as inf: ms = [Chem.MolFromSmiles(x.strip().split()[-1]) for x in inf] ms1 = [m for m in ms if m is not None] with open('data/Target_no_121_20096.txt') as inf: ms = [Chem.MolFromSmiles(x.strip().split()[-1]) for x in inf] ms2 = [m for m in ms if m is not None] mcs= rdFMCS.FindMCS(ms1,completeRingsOnly=True,timeout=60) print mcs.smartsString mcs= rdFMCS.FindMCS(ms1, atomCompare=rdFMCS.AtomCompare.CompareAny, bondCompare=rdFMCS.BondCompare.CompareAny, completeRingsOnly=True,timeout=60) print mcs.smartsString from rdkit.Chem import MCS mcs= MCS.FindMCS(ms1,atomCompare='any',bondCompare='any',completeRingsOnly=True) print mcs.smarts mcs= rdFMCS.FindMCS(ms2,completeRingsOnly=True,timeout=60) print mcs.smartsString mcs= rdFMCS.FindMCS(ms2, atomCompare=rdFMCS.AtomCompare.CompareAny, bondCompare=rdFMCS.BondCompare.CompareAny, completeRingsOnly=True,timeout=60) print mcs.smartsString import copy core = Chem.MolFromSmarts(mcs.smartsString) AllChem.Compute2DCoords(core) mscp = copy.deepcopy(ms2) [AllChem.GenerateDepictionMatching2DStructure(x,core) for x in mscp] Draw.MolsToGridImage(mscp,molsPerRow=4) rxnB="""$RXN ISIS 052820091627 2 1 $MOL -ISIS- 05280916272D 2 1 0 0 0 0 0 0 0 0999 V2000 -3.2730 -7.0542 0.0000 Br 0 0 0 0 0 0 0 0 0 0 0 0 -3.9875 -7.4667 0.0000 R# 0 0 0 0 0 0 0 0 0 1 0 0 1 2 1 0 0 0 0 V 1 halogen.bromine.aromatic M RGP 1 2 1 M END $MOL -ISIS- 05280916272D 4 3 0 0 0 0 0 0 0 0999 V2000 3.4375 -7.7917 0.0000 R# 0 0 0 0 0 0 0 0 0 2 0 0 4.1520 -7.3792 0.0000 B 0 0 0 0 0 0 0 0 0 0 0 0 4.1520 -6.5542 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 4.8664 -7.7917 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 2 3 1 0 0 0 0 1 2 1 0 0 0 0 2 4 1 0 0 0 0 V 2 boronicacid M RGP 1 1 2 M END $MOL -ISIS- 05280916272D 2 1 0 0 0 0 0 0 0 0999 V2000 11.2667 -7.3417 0.0000 R# 0 0 0 0 0 0 0 0 0 1 0 0 11.9811 -6.9292 0.0000 R# 0 0 0 0 0 0 0 0 0 2 0 0 1 2 1 0 0 0 0 M RGP 2 1 1 2 2 M END """ rxn = AllChem.ReactionFromRxnBlock(rxnB) rxn.Initialize() r1 = rxn.GetReactantTemplate(0) for smi in ('CCBr','c1ccccc1Br'): tm=Chem.MolFromSmiles(smi) print(smi,tm.HasSubstructMatch(r1),rxn.IsMoleculeReactant(tm)) from rdkit.Chem.SimpleEnum import Enumerator nWarn,nError,nReacts,nProds,reactantLabels = Enumerator.PreprocessReaction(rxn) print(reactantLabels) for smi in ('CCBr','c1ccccc1Br'): tm=Chem.MolFromSmiles(smi) print(smi,tm.HasSubstructMatch(r1),rxn.IsMoleculeReactant(tm)) cpds['SMILES'][2] m1 = Chem.MolFromSmiles('OC(c1ccncc1)c1ccc(OCC[NH+]2CCCC2)cc1') m1 from rdkit.Chem import AllChem m1h = Chem.AddHs(m1) AllChem.EmbedMolecule(m1h) AllChem.MMFFOptimizeMolecule(m1h) m1h IPythonConsole.ipython_3d=True m1h m1 = Chem.MolFromPDBFile('./data/3E4.pdb') m1=AllChem.AssignBondOrdersFromTemplate(\ Chem.MolFromSmiles('Cc1cc(c2ccccc2c1Oc3c(cccn3)c4ccnc(n4)NC5CCC(CC5)N)NS(=O)(=O)c6ccccc6Cl'), m1) m1 m2 = Chem.MolFromPDBFile('./data/3EL.pdb') m2=AllChem.AssignBondOrdersFromTemplate(\ Chem.MolFromSmiles('c1ccc(cc1)S(=O)(=O)Nc2ccc(c3c2cccc3)Oc4c(cccn4)c5ccnc(n5)NC6CCC(CC6)N'), m2) m2 o3=AllChem.GetO3A(m2,m1) print o3.Align() print o3.Score() Chem.CombineMols(m1,m2)