print 'Hello World!' 1+4 from rdkit.Chem import AllChem as Chem from rdkit.Chem.Draw import IPythonConsole from rdkit.Chem import Descriptors from rdkit import DataStructs smi = 'CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4' #sildenafil m = Chem.MolFromSmiles(smi) m Descriptors.MolWt(m) Descriptors.TPSA(m) Descriptors.RingCount(m) Chem.MolToSmiles(m, True) Chem.MolToInchi(m) print Chem.MolToMolBlock(m) Chem.Compute2DCoords(m) print Chem.MolToMolBlock(m) fp = Chem.GetMorganFingerprintAsBitVect(m,2,nBits=2048) fp.ToBitString() fp.GetNumOnBits() fp.GetNumBits() smi2 = 'CCCc1nc(C)c2C(=O)N=C(Nn12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(CC)CC4' #vardenafil m2 = Chem.MolFromSmiles(smi2) fp2 = Chem.GetMorganFingerprintAsBitVect(m2, 2, nBits=2048) m2 m DataStructs.TanimotoSimilarity(fp,fp2) from rdkit.Chem.Draw import SimilarityMaps SimilarityMaps.GetSimilarityMapForFingerprint(m2, m, SimilarityMaps.GetMorganFingerprint) sma = 'C1C[!#1!#6]1' #oxirane or aziridine from IPython.display import Image from urllib import quote_plus Image(url='http://www.smartsview.de/smartsview/auto/png/1/dynamic/{0}'.format(quote_plus(sma))) import psycopg2 conn = psycopg2.connect(port=5432, user='chembl', dbname='chembl_17') cur = conn.cursor() sql1 = """ SELECT mr.*, md.chembl_id, cp.full_mwt, cp.alogp from mols_rdkit mr, molecule_dictionary md, compound_properties cp where mr.m @> 'C1C[!#1!#6]1'::qmol and mr.molregno = md.molregno and md.molregno = cp.molregno limit 100 """ cur.execute(sql1) for c in cur: print c smi = 'CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4' #sildenafil sql2 = """ select molregno,m as smiles,tanimoto_sml(morganbv_fp('CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4'::mol),mfp2) as similarity from fps_rdkit join mols_rdkit using (molregno) where morganbv_fp('CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4'::mol)%mfp2 order by morganbv_fp('CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4'::mol)<%>mfp2; """ cur.execute(sql2) for c in cur: print c from rdkit.Chem import PandasTools import pandas as pd data = pd.read_sql(sql2, conn) data.shape data.head() nn = data[data['similarity'] >= 0.8] nn.shape nn.head() PandasTools.AddMoleculeColumnToFrame(data,smilesCol='smiles',molCol='mol',includeFingerprints=True) data.head() data['logp'] = data['mol'].map(Descriptors.MolLogP) data['mw'] = data['mol'].map(Descriptors.MolWt) data.head() data.sort(columns=['similarity']).head() data[['mw', 'logp']].describe() rcParams['figure.figsize'] = 12,12 data['logp'].hist() scatter(data['mw'],data['logp']) data[data['mw']>800][['molregno','mol','mw','logp']] qsmi = 'c1nn(C)c2c1nc[nH]c2=O' qmol = Chem.MolFromSmiles(qsmi) qmol subset = data[data['mol'] >= qmol] subset.shape subset.head() data.groupby(data['mol'] >= qmol).describe().unstack() data['containsQ'] = data['mol'] >= qmol data.head(2) data.boxplot('similarity',by='containsQ') conn.close()