print 'Hello World!'
Hello World!
1+4
5
from rdkit.Chem import AllChem as Chem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Descriptors
from rdkit import DataStructs
smi = 'CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4' #sildenafil
m = Chem.MolFromSmiles(smi)
m
Descriptors.MolWt(m)
474.5870000000004
Descriptors.TPSA(m)
113.41999999999999
Descriptors.RingCount(m)
4
Chem.MolToSmiles(m, True)
'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O'
Chem.MolToInchi(m)
'InChI=1S/C22H30N6O4S/c1-5-7-17-19-20(27(4)25-17)22(29)24-21(23-19)16-14-15(8-9-18(16)32-6-2)33(30,31)28-12-10-26(3)11-13-28/h8-9,14H,5-7,10-13H2,1-4H3,(H,23,24,29)'
print Chem.MolToMolBlock(m)
RDKit 33 36 0 0 0 0 0 0 0 0999 V2000 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 0 2 3 1 0 3 4 1 0 4 5 2 0 5 6 1 0 6 7 1 0 6 8 1 0 8 9 1 0 9 10 2 0 9 11 1 0 11 12 1 0 12 13 2 0 13 14 1 0 12 15 1 0 15 16 2 0 16 17 1 0 17 18 2 0 18 19 1 0 19 20 2 0 20 21 1 0 21 22 1 0 22 23 1 0 17 24 1 0 24 25 2 0 24 26 2 0 24 27 1 0 27 28 1 0 28 29 1 0 29 30 1 0 30 31 1 0 30 32 1 0 32 33 1 0 14 4 1 0 14 8 2 0 20 15 1 0 33 27 1 0 M END
Chem.Compute2DCoords(m)
0
print Chem.MolToMolBlock(m)
RDKit 2D 33 36 0 0 0 0 0 0 0 0999 V2000 -8.2094 2.2189 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -7.5208 0.8863 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -6.0224 0.8163 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -5.3338 -0.5163 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -6.0072 -1.8566 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 -4.9405 -2.9112 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 -5.1666 -4.3941 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -3.6079 -2.2226 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -2.2044 -2.7522 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -1.9613 -4.2323 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 -1.0441 -1.8015 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 -1.2872 -0.3214 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -2.6907 0.2082 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 -3.8510 -0.7424 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -0.1269 0.6292 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 1.2765 0.0997 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 2.4368 1.0503 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 2.1937 2.5305 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.7903 3.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -0.3700 2.1094 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -1.7734 2.6390 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 -2.0166 4.1191 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -3.4200 4.6487 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 3.8402 0.5208 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0 3.3107 -0.8826 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 4.3698 1.9242 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 5.2436 -0.0088 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 5.4867 -1.4889 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6.8902 -2.0185 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8.0505 -1.0679 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 9.4539 -1.5974 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7.8074 0.4123 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6.4039 0.9418 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 0 2 3 1 0 3 4 1 0 4 5 2 0 5 6 1 0 6 7 1 0 6 8 1 0 8 9 1 0 9 10 2 0 9 11 1 0 11 12 1 0 12 13 2 0 13 14 1 0 12 15 1 0 15 16 2 0 16 17 1 0 17 18 2 0 18 19 1 0 19 20 2 0 20 21 1 0 21 22 1 0 22 23 1 0 17 24 1 0 24 25 2 0 24 26 2 0 24 27 1 0 27 28 1 0 28 29 1 0 29 30 1 0 30 31 1 0 30 32 1 0 32 33 1 0 14 4 1 0 14 8 2 0 20 15 1 0 33 27 1 0 M END
fp = Chem.GetMorganFingerprintAsBitVect(m,2,nBits=2048)
fp.ToBitString()
'00000000000000100000000000000000000000000000000000000000000000000000000001000000100000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000010000000000000000000000000000001000000000000000000000000010000000000000000000100001000000000000000000000000000000101000010000000010000000000010000000000000000001000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000010000000000000000000010000000000000000000000000000000000000000000000000000000000000000100000000000000000000010000000000000000000000100000000000000000001000000000000000000000000000000000010000001000000000000000000000010000000000000100000100000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000100000000100000000000000000000000000000000000000000100000000000000000000000000010000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000001000000001000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010100001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000010000000000010000000000000000000000010001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000100000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000101001000000000000000100001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000001000100000000000000000001000000000000000000000000000000000000000000000000000010000010000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000'
fp.GetNumOnBits()
61
fp.GetNumBits()
2048
smi2 = 'CCCc1nc(C)c2C(=O)N=C(Nn12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(CC)CC4' #vardenafil
m2 = Chem.MolFromSmiles(smi2)
fp2 = Chem.GetMorganFingerprintAsBitVect(m2, 2, nBits=2048)
m2
m
DataStructs.TanimotoSimilarity(fp,fp2)
0.5
from rdkit.Chem.Draw import SimilarityMaps
SimilarityMaps.GetSimilarityMapForFingerprint(m2, m, SimilarityMaps.GetMorganFingerprint)
(<matplotlib.figure.Figure at 0x2b60210>, 0.14414414414414412)
sma = 'C1C[!#1!#6]1' #oxirane or aziridine
from IPython.display import Image
from urllib import quote_plus
Image(url='http://www.smartsview.de/smartsview/auto/png/1/dynamic/{0}'.format(quote_plus(sma)))
import psycopg2
conn = psycopg2.connect(port=5432, user='chembl', dbname='chembl_17')
cur = conn.cursor()
sql1 = """
SELECT mr.*, md.chembl_id, cp.full_mwt, cp.alogp
from mols_rdkit mr, molecule_dictionary md, compound_properties cp
where
mr.m @> 'C1C[!#1!#6]1'::qmol
and
mr.molregno = md.molregno
and
md.molregno = cp.molregno
limit 100
"""
cur.execute(sql1)
for c in cur: print c
(1296551, 'CC(=O)OC1C[C@@H](C)C2(CC(c3ccoc3)OC2=O)C2CCC(O)C3(CO3)C12CO', 'CHEMBL1975260', Decimal('420.45'), Decimal('0.57')) (1240102, 'O=C(CCN1CC1)OCCOC(=O)CCN1CC1', 'CHEMBL1899531', Decimal('256.30'), Decimal('0.01')) (1296481, 'CCN(CC)C(=O)CCN1CC1', 'CHEMBL1975190', Decimal('170.25'), Decimal('0.35')) (1235869, 'Clc1ccc(N(CC2CO2)CC2CO2)cc1', 'CHEMBL1895298', Decimal('239.70'), Decimal('2.09')) (1295075, 'O=C(C1OC1c1ccc([N+](=O)[O-])cc1)C12CC3CC(CC(C3)C1)C2', 'CHEMBL1973784', Decimal('327.37'), Decimal('3.65')) (1246669, 'C[C@H]1OP(=O)(Oc2ccccc2)C[C@@H]2O[C@@H]21', 'CHEMBL1906098', Decimal('240.19'), Decimal('1.20')) (1231326, 'N#C[C@H]1C2OC2c2ccccc2N1C(=O)c1ccccc1', 'CHEMBL1890755', Decimal('276.29'), Decimal('2.19')) (1218973, 'CC12OC1C(O)C(Br)=C(CO)C2O', 'CHEMBL1878402', Decimal('251.07'), Decimal('-0.80')) (1295513, 'OC1c2ccccc2C(O)C2OC21', 'CHEMBL1974222', Decimal('178.18'), Decimal('0.24')) (1296770, 'CC(=O)OC1(C#N)CC2OC1C1C2N1C(=O)c1ccccc1', 'CHEMBL1975479', Decimal('298.29'), Decimal('0.41')) (1295729, 'O=C(c1ccc(Cl)cc1)C1OC12C(=O)Nc1ccccc12', 'CHEMBL1974438', Decimal('299.71'), Decimal('2.41')) (1294930, 'CO/C(=N/N1CC1C(F)(F)F)c1ccncc1', 'CHEMBL1973639', Decimal('245.20'), Decimal('1.36')) (1296542, 'COc1ccc(C[C@H]2NC(=O)C=CC[C@@H]([C@H](C)[C@H]3O[C@@H]3c3ccccc3)OC(=O)[C@H](CC(C)C)OC(=O)[C@H](C)CNC2=O)cc1', 'CHEMBL1975251', Decimal('620.73'), Decimal('4.85')) (1295673, 'C=C1C(=O)OC2CCCCC3OC3C12', 'CHEMBL1974382', Decimal('194.23'), Decimal('1.54')) (1202551, 'Cc1ccc(/C(=N/O)N2CC2C)c(Oc2ccc3oc4ccccc4c3c2)n1', 'CHEMBL1861933', Decimal('373.40'), Decimal('4.24')) (1290807, 'COC(C/C=C/N(C)C=O)C(C)C(=O)CCC(C)C(OC)C(C)C1OC(=O)C=CC2OC2(C)CC(OC)C(OC)C2=CC(=O)O[C@H]([C@H]2O)C(C)C(OC)CC(OC)C=CC(C)C(O)CC(OC)C=CC1C', 'CHEMBL1969516', Decimal('1006.27'), None) (1231532, 'O=C1C=C(N2CC2)c2ccccc2C1=O', 'CHEMBL1890961', Decimal('199.21'), Decimal('1.34')) (1243825, 'COC(C[C@@H]1O[C@H]1C(=O)COCc1ccccc1)OC', 'CHEMBL1903254', Decimal('280.32'), Decimal('1.13')) (1209176, 'N#CC1(C#N)OC12CCS(=O)(=O)c1ccccc12', 'CHEMBL1868605', Decimal('260.27'), Decimal('0.52')) (1237949, 'c1cc(OCCN2CC2)ccn1', 'CHEMBL1897378', Decimal('164.20'), Decimal('0.62')) (1291819, 'O=c1n(Cc2ccccc2)c2ccccc2n1CC1CS1', 'CHEMBL1970528', Decimal('296.39'), Decimal('3.15')) (1287453, 'C=C1C(=O)O[C@@H]2C=C(C)C=C[C@@H]3O[C@@]3(C)C[C@@H](O)C12', 'CHEMBL1966162', Decimal('262.30'), Decimal('1.12')) (1296095, 'COC(=O)C1(COCc2ccccc2)CCCC2OC21', 'CHEMBL1974804', Decimal('276.33'), Decimal('2.16')) (1296635, 'COc1cc(O)c2c(c1)C(O)C(O)CC1OC1C(=O)C=CCC(C)OC2=O', 'CHEMBL1975344', Decimal('378.37'), Decimal('0.90')) (1302623, 'COc1cc2cc(c1Cl)N(C)C(=O)C[C@H](OC(=O)[C@H](C)N(C)C(C)=O)[C@]1(C)O[C@H]1[C@H](C)[C@@H]1C[C@@](O)(NC(=O)O1)[C@H](OC)C=CC=C(C)C2', 'CHEMBL1981332', Decimal('692.20'), Decimal('3.07')) (1290845, 'C=C1CC2(C)CCC(OC(=O)C3(C)OC3C)C(C)(OC(C)=O)C2CC1=C(C)C', 'CHEMBL1969554', Decimal('390.51'), Decimal('4.15')) (1297691, 'Cc1cn([C@@H]2O[C@H](CO[Si](C)(C)C(C)(C)C)[C@]3(O[C@H]3C(=O)NO)[C@H]2O[Si](C)(C)C(C)(C)C)c(=O)[nH]c1=O', 'CHEMBL1976400', Decimal('557.78'), None) (1243276, 'CC12CCC3C(CCC4CC5OC5CC43C)C1CCC21OCCO1', 'CHEMBL1902705', Decimal('332.48'), Decimal('3.27')) (1297762, 'CC1CCC2C(C(=O)O[C@@H]3[C@@H]4O[C@]4(CO)C4C3C=CO[C@H]4OC3OC(CO)C(O)C(O)C3O)=CO[C@@H](OC3OC(CO)C(O)C(O)C3O)C12', 'CHEMBL1976471', Decimal('704.67'), Decimal('-3.95')) (1297121, 'CC1(C)C2CCC3(OCCO3)C1C1OC12', 'CHEMBL1975830', Decimal('210.27'), Decimal('0.88')) (1297766, 'CC(=O)O[C@@H]1[C@H]2O[C@H]2[C@H]2O[C@@]2(COC(=O)c2ccccc2)[C@H]1OC(C)=O', 'CHEMBL1976475', Decimal('362.33'), Decimal('0.59')) (1297698, 'CC(=O)OC1OC(c2ccoc2)C[C@@]12C1CCC3O[C@]3(C)[C@]1(C)C[C@H](OC(C)=O)[C@H]2C', 'CHEMBL1976407', Decimal('432.51'), Decimal('2.47')) (1297541, 'C=CC1(CCC(Br)C(C)(C)Cl)CO1', 'CHEMBL1976250', Decimal('267.59'), Decimal('3.10')) (1297560, '[O-][P-](Oc1ccc2ccccc2c1)(N1CC1)N1CC1', 'CHEMBL1976269', Decimal('274.25'), Decimal('0.92')) (1297481, 'CC(=O)c1ccc2c3c1[C@@H]1O[C@@H]1c1cccc(c1-3)[C@@H]1O[C@H]21', 'CHEMBL1976190', Decimal('276.29'), Decimal('1.96')) (1299670, 'CC1=CC2O[C@@H]3C[C@H]4OC(=O)C=CC=CC56OCCC7(OC7C(=O)OCC2(CC1)[C@]4(C)[C@]31CO1)C5OC(O)C6O', 'CHEMBL1978379', Decimal('558.57'), Decimal('-0.18')) (1299584, 'C=C1[C@H](OC(C)=O)[C@H]2[C@@H](OC(C)=O)[C@](C)(OC(C)=O)C[C@]2(OC(C)=O)C(=O)[C@H](C)[C@@H]2O[C@H]2C(C)(C)[C@H](OC(C)=O)[C@H](OC(C)=O)[C@H]1OC(=O)C(C)C', 'CHEMBL1978293', Decimal('738.77'), Decimal('1.75')) (1298308, 'C=C(C(=O)OC)C1C[C@@H](OC(C)=O)C2=C[C@H](C[C@@]3(C)O[C@@H]3c3cc(C)c(o3)[C@H]1OC(C)=O)OC2=O', 'CHEMBL1977017', Decimal('488.48'), Decimal('2.20')) (1287993, 'C=C1C(=O)OC2C3OC3(C)CCC=C(C)CC(=O)C12', 'CHEMBL1966702', Decimal('262.30'), Decimal('1.78')) (1271919, 'CC(=O)OCC1=C(C)C[C@H]([C@@H](C)[C@H]2CC[C@H]3[C@@H]4C[C@H]5O[C@]56[C@@H](OC(C)=O)C=CC(=O)[C@]6(COC(C)=O)[C@H]4CC[C@]23C)OC1=O', 'CHEMBL1934450', Decimal('612.71'), Decimal('3.43')) (1271928, 'CC1=C(CO)C(=O)O[C@@H]([C@@H](C)[C@H]2CC[C@H]3[C@@H]4C[C@H]5O[C@]56[C@@H](O)[C@@H](OS(=O)(=O)O)CC(=O)[C@]6(CO)[C@H]4CC[C@]23C)C1', 'CHEMBL1934459', Decimal('584.68'), Decimal('1.14')) (1271936, 'CC1=C(CO[C@@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)C(=O)O[C@@H]([C@@H](C)[C@H]2CC[C@H]3[C@@H]4C[C@H]5O[C@]56[C@@H](O)C=CC(=O)[C@]6(C)[C@H]4CC[C@]23C)C1', 'CHEMBL1934467', Decimal('632.74'), Decimal('1.64')) (1288183, 'O=C1c2c3c4ccccc4[nH]c3c3c(c4ccccc4n3CC3CO3)c2C(=O)N1Cc1ccccc1', 'CHEMBL1966892', Decimal('471.51'), Decimal('5.35')) (1285594, 'Cc1ccc(OCC2CO2)c(Br)c1', 'CHEMBL1964303', Decimal('243.10'), Decimal('2.76')) (1284906, 'O=C(CCc1ccc(F)cc1)c1cc(F)ccc1OCC1CO1', 'CHEMBL1963195', Decimal('318.31'), Decimal('3.84')) (1284905, 'O=C(CCc1ccc(F)cc1)c1ccccc1OCC1CO1', 'CHEMBL1963194', Decimal('300.32'), Decimal('3.63')) (1284904, 'O=C(CCc1ccccc1)c1ccccc1OCC1CO1', 'CHEMBL1963193', Decimal('282.33'), Decimal('3.43')) (1299546, 'CCOC(=O)[C@H]1O[C@@H]1C(=O)NC(CC(C)C)C(=O)NCCC(C)C', 'CHEMBL1978255', Decimal('342.43'), Decimal('1.61')) (1216668, 'O=C(O)C1OC1(c1ccccc1Cl)c1ccccc1Cl', 'CHEMBL1876097', Decimal('309.14'), Decimal('3.87')) (1228380, 'COC(C[C@@H]1O[C@@H]1[C@@H](O)[C@@H](C)OCc1ccccc1)OC', 'CHEMBL1887809', Decimal('296.36'), Decimal('1.44')) (1211913, 'CCOC(=O)c1ccc(N(CC2CO2)S(=O)(=O)c2ccc(C)cc2)cc1', 'CHEMBL1871342', Decimal('375.44'), Decimal('2.93')) (1300104, 'COc1cc2c(c3oc4c(O)cccc4c(=O)c13)C(C1(C)CO1)CO2', 'CHEMBL1978813', Decimal('340.33'), Decimal('2.32')) (1228204, 'CC[C@@H](c1ccccc1)n1c(=O)n2n(c1=O)[C@H]1[C@H](O)[C@@H]3O[C@@H]3/C(=N\\OCc3ccccc3)[C@H]1CC2', 'CHEMBL1887633', Decimal('476.52'), Decimal('2.67')) (1299787, 'O=C1c2ccccc2OCC12OC21C=CC(Cl)=CC1', 'CHEMBL1978496', Decimal('274.70'), Decimal('2.35')) (1299302, 'O=C(O)CCCCCCC[C@H]1S[C@H]1CCCCCCO', 'CHEMBL1978011', Decimal('302.47'), Decimal('4.47')) (1300017, 'C=C(C)[C@@H]1C[C@H]2O[C@](O)(C(=C)CC(=O)C=C(C)C[C@@H]3OC(=O)[C@]24O[C@@H]34)[C@H]1O', 'CHEMBL1978726', Decimal('376.40'), Decimal('1.35')) (1268906, 'Cc1coc2c1[C@H]1C=C(CC[C@@H]3O[C@@]3(C)C2)C(=O)O1', 'CHEMBL1927944', Decimal('260.29'), Decimal('2.29')) (1251649, 'C=C1C(=O)O[C@@H]2C[C@@]3(C)O[C@@H]3CC[C@@]3(C)O[C@H]3C[C@@H]12', 'CHEMBL1912039', Decimal('264.32'), Decimal('1.30')) (1251654, 'C=C1C(=O)O[C@@H]2C[C@H](C)[C@]3(CCC(C)O)O[C@@H]3C[C@H]12', 'CHEMBL1912044', Decimal('266.33'), Decimal('1.73')) (1251661, 'C=C1C(=O)O[C@H]2C[C@H](C)[C@@H]3CC[C@@]4(C)O[C@@]34C[C@H]12', 'CHEMBL1912051', Decimal('248.32'), Decimal('2.21')) (1288152, 'COC(=O)[C@@H]1O[C@]12[C@@H](CO[Si](C)(C)C(C)(C)C)O[C@@H](n1cc(C)c(=O)[nH]c1=O)[C@@H]2O[Si](C)(C)C(C)(C)C', 'CHEMBL1966861', Decimal('556.80'), None) (1299033, 'COC(=O)/C=C/[C@@H]1[C@H](C)N1S(=O)(=O)c1ccc(C)cc1', 'CHEMBL1977742', Decimal('295.35'), Decimal('2.05')) (1286713, 'CN1c2ccc(Cl)cc2C2(c3ccccc3)N(CC1=O)C2(Cl)Cl', 'CHEMBL1965422', Decimal('367.66'), Decimal('4.24')) (1301826, 'CC(=O)O[C@H]1CC(C)(C)C(=C=C/C(C)=C/C=C/C=C\\C=C(C)\\C=C2\\C=C(/C=C/[C@@]34O[C@]3(C)C[C@@H](O)CC4(C)C)C(=O)O2)[C@](C)(O)C1', 'CHEMBL1980535', Decimal('630.81'), Decimal('5.36')) (1303258, 'CC(CO)[C@H]1OC(=O)C=C2[C@@]13O[C@@H]3[C@H]1OC(=O)[C@@]3(C)[C@H]4O[C@H]4C[C@@]2(C)[C@@H]13', 'CHEMBL1981967', Decimal('362.37'), Decimal('0.17')) (1295439, 'COC1(OC)C[C@H](C)[C@@]23O[C@]24c2cc(O)c5c(c2N[C@H]3C#CC=CC#C[C@@]41O)C(=O)c1ccccc1C5=O', 'CHEMBL1974148', Decimal('509.51'), Decimal('2.87')) (1295527, 'CC(=O)OCC1OC(OC2CC3C(C)(C)C(O)CC[C@]3(C)C3CCC45CC4(CC[C@H]5C(C)CC(O)C4OC4(C)C)[C@]23C)C(O)C(O)C1O', 'CHEMBL1974236', Decimal('678.89'), Decimal('2.82')) (1321195, 'CC(=O)OCC1OC(OC2CC3C(C)(C)C(OC(C)=O)CC[C@]3(C)C3CCC45CC4(CC[C@H]5C(C)CC(O)C4OC4(C)C)[C@]23C)C(O)C(O)C1O', 'CHEMBL1999904', Decimal('720.93'), Decimal('3.19')) (1301478, 'COC1(OC)C[C@H](C)[C@@]23O[C@]24C2=CC(=O)C=CC2=N[C@H]3C#CC=CC#C[C@@]41O', 'CHEMBL1980187', Decimal('377.39'), Decimal('1.40')) (1276021, 'C=C(C(=O)[C@H](OC(C)=O)[C@@H](C)[C@H]1[C@@H](OC(C)=O)C[C@@]2(C)[C@@H]3[C@H]4O[C@H]4[C@H]4[C@H](C)C(=O)C=C[C@@]45C[C@@]35CC[C@]12C)[C@@H](C)CO', 'CHEMBL1941159', Decimal('582.72'), Decimal('2.93')) (1288224, 'CC(=O)O[C@H]1C[C@]2(C(C)(C)O)C(=C1C)[C@@H](OC(C)=O)C(OC(C)=O)[C@@]1(C)[C@H]([C@@H]2OC(C)=O)[C@@]2(CO2)C(OC(C)=O)C[C@@H]1OC(C)=O', 'CHEMBL1966933', Decimal('652.68'), Decimal('-0.21')) (1301178, 'Cc1cc2c(c3oc(C4(C)OC4C4OC4C)cc(=O)c13)C(=O)c1c(O)c(C3CC(C)(N(C)C)C(O)C(C)O3)cc(C3CC(N(C)C)C(O)C(C)O3)c1C2=O', 'CHEMBL1979887', Decimal('746.84'), Decimal('2.52')) (1321506, 'CC1CN1C(=O)NCCCCCCNC(=O)N1CC1C', 'CHEMBL2000215', Decimal('282.38'), Decimal('0.92')) (1310842, 'C[C@]12CCC3[C@@H](CCC4=CC(=O)CC[C@@]43C)C1CC[C@@H]2OC(=O)C12OC1CCC2=O', 'CHEMBL1989551', Decimal('412.52'), Decimal('3.59')) (1286420, 'C=C1C(=O)OC2C=C(C)C3OC3C=C(C(=O)OC)C(OC(C)=O)C(OC(=O)C(C)(O)C(C)OC(C)=O)C12', 'CHEMBL1965129', Decimal('522.50'), Decimal('0.91')) (1311072, 'CC(=O)OC[C@]12C[C@H](OC(=O)CC(C)C)C(C)=C[C@H]1OC1[C@H](O)[C@@H](OC(C)=O)[C@@]2(C)[C@@]12CO2', 'CHEMBL1989781', Decimal('466.52'), Decimal('0.99')) (1309747, 'CC1=CC2OC3CC(OC(=O)/C=C/C=C/C(OCCC4=CC(=O)OC4)C(C)O)C(C)(C34CO4)C2(CO)CC1', 'CHEMBL1988456', Decimal('530.61'), Decimal('1.70')) (1310737, 'CC1CCOC(=O)C=CC=CC(=O)OC2CC3OC4C5OC5(C)C(O)CC4(COC(=O)C1O)C2(C)C31CO1', 'CHEMBL1989446', Decimal('534.55'), Decimal('0.02')) (1312614, 'CC(=O)OCC1=C(C)C[C@H]([C@@H](COC(C)=O)C2CCC3C4C[C@H]5O[C@]56CC=CC(=O)[C@]6(C)C4CC[C@]23C)OC1=O', 'CHEMBL1991323', Decimal('554.67'), Decimal('3.76')) (1314509, 'CNC(=O)C(C)C1C(=O)/C(=C(O)/C=C/C(C)=C/C(C)C2OC3(C)OC(C=CC34CO4)C2C)C(=O)N1C1CCC(O)C(C)O1', 'CHEMBL1993218', Decimal('600.70'), Decimal('1.16')) (1320933, 'CC1=CC2OC3CC4OC(=O)C=CC=CC(C(C)O)OCCC(C)C(O)C(=O)OCC2(CC1O)C4(C)C31CO1', 'CHEMBL1999642', Decimal('548.62'), Decimal('0.80')) (1322083, 'CC1(C)CCCC2OC2CCC(C)(C)C1=O', 'CHEMBL2000792', Decimal('224.34'), Decimal('3.46')) (1314654, 'O=c1sc2ccccc2n1CC1CS1', 'CHEMBL1993363', Decimal('223.31'), Decimal('2.57')) (1301689, 'CC(=O)O[C@H]1[C@@H]2O[C@@]2(C)CCC=C(C)C[C@H](OC(C)=O)[C@H]1C(C)C', 'CHEMBL1980398', Decimal('338.44'), Decimal('2.92')) (1314212, 'O=C(c1ccccc1)C1OC12C(=O)Nc1ccccc12', 'CHEMBL1992921', Decimal('265.26'), Decimal('1.75')) (1300173, 'C=C1C(=O)OC2CC3(C)OC3C3OC3C3=CC(OC3=O)C12', 'CHEMBL1978882', Decimal('290.27'), Decimal('0.13')) (1322034, 'C[C@H](O)[C@H]1C=CC=CC(=O)O[C@@H]2C[C@H]3O[C@@H]4[C@@H]5O[C@]5(C)CCC4(COC(=O)[C@H]4O[C@]4(C)[C@@H](O)CO1)[C@]2(C)[C@]31CO1', 'CHEMBL2000743', Decimal('562.61'), Decimal('-0.21')) (1313881, 'COc1ccc2c(c1)C1(C)CCC(O2)C12CO2', 'CHEMBL1992590', Decimal('232.28'), Decimal('2.11')) (1312125, 'C=CCOC(=O)N1c2ccc(O)cc2[C@@]23O[C@]24[C@@H](C)CC(OC)(OC)[C@@]3(O)C#CC=CC#C[C@H]14', 'CHEMBL1990834', Decimal('463.48'), Decimal('2.66')) (1313562, 'CC(=O)OC(C[C@H](C)[C@@H]1CC[C@]2(C)C3=CCC4C(C)(C)C(=O)CC[C@]4(C)C3CCC12C)C1OC1(C)C', 'CHEMBL1992271', Decimal('498.74'), Decimal('6.14')) (1300502, 'Cc1cn([C@@H]2O[C@H](CO[Si](C)(C)C(C)(C)C)[C@]3(O[C@H]3C(=O)NN)[C@H]2O[Si](C)(C)C(C)(C)C)c(=O)[nH]c1=O', 'CHEMBL1979211', Decimal('556.80'), None) (1313878, 'CC1OC12CC(C)C(C)(O)C(=O)OCC1=CCN(C)CCC(OC2=O)C1=O', 'CHEMBL1992587', Decimal('381.42'), Decimal('0.63')) (1313202, 'CC(=O)OC1CC2C(C)(C)C(=O)C=C[C@]2(C)C2CCC3(C)C(c4ccoc4)[C@@H](O)[C@H]4O[C@]43[C@]12C', 'CHEMBL1991911', Decimal('468.58'), Decimal('3.09')) (1302651, 'C=C(C)C12OC1[C@@]1(C)C(=CC2=O)CCC(O)[C@@H]1C', 'CHEMBL1981360', Decimal('248.32'), Decimal('1.99')) (1309841, 'C[C@]12CCC3C4=C(CCC3C1CCC2=O)C(=O)[C@H]1O[C@H]1C4=O', 'CHEMBL1988550', Decimal('300.35'), Decimal('2.02')) (1313550, 'CC1=C[C@H]2O[C@@H]3C[C@H]4OC(=O)C=CC=CC(C(C)O)OCC[C@@H](C)[C@H](O)C(=O)OC[C@@]2(CC1)[C@]4(C)[C@]31CO1', 'CHEMBL1992259', Decimal('532.62'), Decimal('1.90')) (1321613, 'C/C=C/C/C=C/CCC(=O)C1OC1C(N)=O', 'CHEMBL2000322', Decimal('223.27'), Decimal('1.09')) (1309900, 'COc1ccc(/C=C/C(=O)c2c(-c3ccccc3)nn(C)c(=O)c2N2CC2C)cc1', 'CHEMBL1988609', Decimal('401.46'), Decimal('3.43')) (1320419, 'CC1CCC2(C)C(CC=C(C=O)C2(O)C=O)C12CO2', 'CHEMBL1999128', Decimal('264.32'), Decimal('0.99')) (1314159, 'CC(=O)OC1(C#N)CC2OC1C1C2N1C(=O)OC(C)(C)C', 'CHEMBL1992868', Decimal('294.30'), Decimal('0.32'))
smi = 'CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4' #sildenafil
sql2 = """
select molregno,m as smiles,tanimoto_sml(morganbv_fp('CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4'::mol),mfp2) as similarity
from fps_rdkit join mols_rdkit using (molregno)
where morganbv_fp('CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4'::mol)%mfp2
order by morganbv_fp('CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4'::mol)<%>mfp2;
"""
cur.execute(sql2)
for c in cur: print c
(410802, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O', 1.0) (1351310, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCCC3)ccc1OCC)[nH]c2=O', 0.88135593220339) (1351311, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCC3)ccc1OCC)[nH]c2=O', 0.88135593220339) (80636, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCNCC3)ccc1OCC)[nH]c2=O', 0.866666666666667) (80694, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)ccc1OCC)[nH]c2=O', 0.838709677419355) (488008, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccccc4)CC3)ccc1OCC)[nH]c2=O', 0.825396825396825) (410662, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCC(=O)O)CC3)ccc1OCC)[nH]c2=O', 0.8125) (512303, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCC(C(N)=O)CC3)ccc1OCC)[nH]c2=O', 0.8125) (1334756, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(C)nn(C)c2c(=O)[nH]1', 0.8) (488151, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C4CCCCC4)CC3)ccc1OCC)[nH]c2=O', 0.8) (410656, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCC(C(=O)O)CC3)ccc1OCC)[nH]c2=O', 0.8) (488072, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccc(F)cc4)CC3)ccc1OCC)[nH]c2=O', 0.8) (488147, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccc(Cl)cc4)CC3)ccc1OCC)[nH]c2=O', 0.787878787878788) (488073, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccccc4Cl)CC3)ccc1OCC)[nH]c2=O', 0.787878787878788) (1351309, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N(CC)CC)ccc1OCC)[nH]c2=O', 0.783333333333333) (488146, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4cccc(Cl)c4)CC3)ccc1OCC)[nH]c2=O', 0.776119402985075) (488010, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccccc4OC)CC3)ccc1OCC)[nH]c2=O', 0.776119402985075) (488009, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccccc4C)CC3)ccc1OCC)[nH]c2=O', 0.776119402985075) (410657, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCC(CC(=O)O)CC3)ccc1OCC)[nH]c2=O', 0.776119402985075) (488071, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccccc4F)CC3)ccc1OCC)[nH]c2=O', 0.764705882352941) (488149, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4cccc5ccccc54)CC3)ccc1OCC)[nH]c2=O', 0.764705882352941) (1351312, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N(C)C)ccc1OCC)[nH]c2=O', 0.758064516129032) (488148, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccc([N+](=O)[O-])cc4)CC3)ccc1OCC)[nH]c2=O', 0.753623188405797) (410658, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCC(CCC(=O)O)CC3)ccc1OCC)[nH]c2=O', 0.753623188405797) (488152, 'CCCCCCCCCC(=O)OCCN1CCN(S(=O)(=O)c2ccc(OCC)c(-c3nc4c(CCC)nn(C)c4c(=O)[nH]3)c2)CC1', 0.742857142857143) (1334601, 'CCCc1nn(-c2cccnc2)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O', 0.742857142857143) (1334602, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(-c3ccccc3)nn(C)c2c(=O)[nH]1', 0.738461538461539) (410664, 'CCCOc1ccc(S(=O)(=O)N2CCC(C(=O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.735294117647059) (1334603, 'CCCc1n[nH]c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O', 0.734375) (410660, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCC(CCCC(=O)O)CC3)ccc1OCC)[nH]c2=O', 0.732394366197183) (283528, 'CCCc1nc(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nn12', 0.727272727272727) (410675, 'CCCOc1ccc(S(=O)(=O)N2CCC(CC(=O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.72463768115942) (410742, 'CCCOc1ccc(S(=O)(=O)N2CCN(CCP(=O)(O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.72463768115942) (488011, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4cccc(C(F)(F)F)c4)CC3)ccc1OCC)[nH]c2=O', 0.722222222222222) (410755, 'CCCOc1ccc(S(=O)(=O)N2CCC(CP(=O)(O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.714285714285714) (410746, 'CCCOc1ccc(S(=O)(=O)N2CCC(P(=O)(O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.714285714285714) (410735, 'CCCOc1ccc(S(=O)(=O)N2CCC(P(=O)(OCC)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.714285714285714) (410731, 'CCCOc1ccc(S(=O)(=O)N2CCN(CCP(=O)(OCC)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.714285714285714) (488150, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(Cc4ccc5c(c4)OCO5)CC3)ccc1OCC)[nH]c2=O', 0.712328767123288) (410715, 'CCCOc1ccc(S(=O)(=O)N2CCC(P(=O)(O)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.704225352112676) (410737, 'CCCOc1ccc(S(=O)(=O)N2CCC(CP(=O)(OCC)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.704225352112676) (1334755, 'CCCc1nn(C)c2c1nc(-c1cccc(S(=O)(=O)N3CCN(C)CC3)c1)[nH]c2=O', 0.698412698412698) (1334754, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2cnn(C)c2c(=O)[nH]1', 0.698412698412698) (410711, 'CCCOc1ccc(S(=O)(=O)N2CCN(CP(=O)(O)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.694444444444444) (410713, 'CCCOc1ccc(S(=O)(=O)N2CCN(CCP(=O)(O)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.694444444444444) (410676, 'CCCOc1ccc(S(=O)(=O)N2CCC(CCC(=O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.694444444444444) (487042, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NCCNc3ccccc3)ccc1OCC)[nH]c2=O', 0.691176470588235) (1351313, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NCc3ccccc3)ccc1OCC)[nH]c2=O', 0.691176470588235) (410717, 'CCCOc1ccc(S(=O)(=O)N2CCC(CP(=O)(O)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.684931506849315) (488153, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCOC(=O)CCCO[N+](=O)[O-])CC3)ccc1OCC)[nH]c2=O', 0.684210526315789) (1441770, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CCC)c(CC)c(=O)[nH]1', 0.676923076923077) (567449, 'CCCc1c(OC)cc(OC)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12', 0.676470588235294) (410677, 'CCCOc1ccc(S(=O)(=O)N2CCC(CCCC(=O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.675675675675676) (304727, 'CCCn1nc(CC)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O', 0.671641791044776) (410679, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)cc3c1OCC3)[nH]c2=O', 0.666666666666667) (80598, 'CCCc1nn(C)c2c1nc(-c1ccccc1OCC)[nH]c2=O', 0.666666666666667) (1441766, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(CCC)c(=O)[nH]1', 0.666666666666667) (140806, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(C)c(=O)[nH]c2=O', 0.666666666666667) (487043, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NCCNc3cccc4ccccc43)ccc1OCC)[nH]c2=O', 0.666666666666667) (487044, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NCCNCC(=O)O)ccc1OCC)[nH]c2=O', 0.661971830985915) (487046, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NCCN(CCO)S(=O)(=O)c3ccc(OCC)c(-c4nc5c(CCC)nn(C)c5c(=O)[nH]4)c3)ccc1OCC)[nH]c2=O', 0.657534246575342) (410681, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)cc3c1OCO3)[nH]c2=O', 0.656716417910448) (283502, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(ncn2CCC)c(=O)[nH]1', 0.656716417910448) (410683, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)cc3c1OCCO3)[nH]c2=O', 0.656716417910448) (553751, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(nc3ccccn32)c(=O)[nH]1', 0.656716417910448) (1441475, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CCC)cc(=O)[nH]1', 0.65625) (555103, 'CCCc1c(OC)cc(O)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12', 0.647887323943662) (304716, 'CCCn1nc(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O', 0.647058823529412) (1262965, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(Cl)c(=O)[nH]1', 0.646153846153846) (304811, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(c(CC)nn2C2CCCC2)c(=O)[nH]1', 0.642857142857143) (1441765, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(CC)c(=O)[nH]1', 0.636363636363636) (1262967, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(I)c(=O)[nH]1', 0.636363636363636) (1262966, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(Br)c(=O)[nH]1', 0.636363636363636) (1441764, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(C)c(=O)[nH]1', 0.636363636363636) (558511, 'CCCCc1c(OC)cc(OC)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12', 0.633802816901408) (80661, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C(N)=O)CC3)ccc1OCC)nc2O', 0.633802816901408) (556780, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(Br)c(OC)cc(OC)c2c(=O)[nH]1', 0.632352941176471) (563798, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(C)c(OC)cc(OC)c2c(=O)[nH]1', 0.632352941176471) (1441774, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(Cl)c(=O)[nH]1', 0.630769230769231) (1351308, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)Nc3ncccc3C3CCCN3C)ccc1OCC)[nH]c2=O', 0.628205128205128) (140060, 'CCCn1c2nc(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)[nH]c2c(=O)[nH]c1=O', 0.626865671641791) (1262964, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(F)c(=O)[nH]1', 0.626865671641791) (562127, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(Cl)c(OC)cc(OC)c2c(=O)[nH]1', 0.623188405797101) (558510, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(CC)c(OC)cc(OC)c2c(=O)[nH]1', 0.623188405797101) (558503, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(I)c(OC)cc(OC)c2c(=O)[nH]1', 0.623188405797101) (558494, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(F)c(OC)cc(OC)c2c(=O)[nH]1', 0.623188405797101) (560323, 'C=Cc1c(OC)cc(OC)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12', 0.623188405797101) (1441771, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(Br)c(=O)[nH]1', 0.621212121212121) (487045, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NCCN(CC(=O)O)S(=O)(=O)c3ccc(OCC)c(-c4nc5c(CCC)nn(C)c5c(=O)[nH]4)c3)ccc1OCC)[nH]c2=O', 0.618421052631579) (1376119, 'CCCOc1ccc(S(=O)(=O)NCCC2CCCN2C)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.618421052631579) (140771, 'CCCn1c2nc(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)[nH]c2c(=O)n(C)c1=O', 0.617647058823529) (410694, 'CCCOc1ccc(NC(C)=O)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.617647058823529) (511952, 'CCCCN1C(=O)c2nc(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)[nH]c(=O)c2C1=O', 0.617647058823529) (1441775, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(c(=O)[nH]1)CCC2', 0.617647058823529) (1262963, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)cc(=O)[nH]1', 0.615384615384615) (1441474, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C)cc(=O)[nH]1', 0.615384615384615) (562135, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2cc(OC)cc(OC)c2c(=O)[nH]1', 0.611940298507463) (1441769, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C)c(CC)c(=O)[nH]1', 0.611940298507463) (1441768, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(CC)c(=O)[nH]1', 0.608695652173913) (563835, 'CCCCc1c(OC)cc(O)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12', 0.608108108108108) (1441472, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(O)cc(=O)[nH]1', 0.606060606060606) (1441471, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(N)cc(=O)[nH]1', 0.606060606060606) (555082, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(Br)c(OC)cc(O)c2c(=O)[nH]1', 0.605633802816901) (139899, 'CCCCCCn1c2nc(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)[nH]c2c(=O)n(C)c1=O', 0.605633802816901) (565660, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(C)c(OC)cc(O)c2c(=O)[nH]1', 0.605633802816901) (1334600, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(-c3ccccc3)n[nH]c2c(=O)[nH]1', 0.605633802816901) (1441761, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(Cl)c(=O)[nH]1', 0.602941176470588) (1441480, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(NC(C)=O)c(=O)[nH]1', 0.6) (140172, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC(C)C)c(=O)[nH]c2=O', 0.6) (140026, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC1CC1)c(=O)n(C)c2=O', 0.6) (556799, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(Cl)c(OC)cc(O)c2c(=O)[nH]1', 0.597222222222222) (563829, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(CC)c(OC)cc(O)c2c(=O)[nH]1', 0.597222222222222) (565656, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(F)c(OC)cc(O)c2c(=O)[nH]1', 0.597222222222222) (562146, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(I)c(OC)cc(O)c2c(=O)[nH]1', 0.597222222222222) (1441767, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(C)c(=O)[nH]1', 0.594202898550725) (1441763, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(I)c(=O)[nH]1', 0.594202898550725) (1441762, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(Br)c(=O)[nH]1', 0.594202898550725) (1441473, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(NC(C)=O)cc(=O)[nH]1', 0.594202898550725) (140805, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC(C)(C)C)c(=O)n(C)c2=O', 0.591549295774648) (1441773, 'CCCCCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(Br)c(=O)[nH]1', 0.591549295774648) (140087, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC(C)C)c(=O)n(C)c2=O', 0.591549295774648) (410684, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)cc3c1OCC3)[nH]c2=O', 0.589041095890411) (560347, 'C=Cc1c(OC)cc(O)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12', 0.589041095890411) (1441476, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)cc(=O)[nH]1', 0.588235294117647) (1441479, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(F)(F)F)cc(=O)[nH]1', 0.588235294117647) (555093, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2cc(OC)cc(O)c2c(=O)[nH]1', 0.585714285714286) (1441772, 'CCCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(Br)c(=O)[nH]1', 0.585714285714286) (139960, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC1CCCCC1)c(=O)n(C)c2=O', 0.583333333333333) (140807, 'C=CCCn1c2[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc2c(=O)n(C)c1=O', 0.583333333333333) (140260, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC(C)CC)c(=O)n(C)c2=O', 0.583333333333333) (80559, 'CCCc1nn(C)c2c1nc(-c1ccccc1OCC1CC1)[nH]c2=O', 0.582089552238806) (1441477, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC(C)C)cc(=O)[nH]1', 0.579710144927536) (1334762, 'CCCc1nn(C)c2c1nc(-c1cccnc1OCC)[nH]c2=O', 0.578125) (410698, 'CCCOc1ccc(NC(=O)CCC)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.577464788732394) (410696, 'CCCOc1ccc(NC(=O)CC)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.577464788732394) (1441478, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(-c2ccccc2)cc(=O)[nH]1', 0.571428571428571) (304703, 'CCCc1nc(C)n2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)nc2O', 0.571428571428571) (410702, 'CCCOc1ccc(NC(=O)C(C)C)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.569444444444444) (80558, 'CCCc1nn(C)c2c1nc(-c1ccccc1NS(C)(=O)=O)[nH]c2=O', 0.569230769230769) (140519, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC(C)C)c(=O)n(C)c2=O', 0.567567567567568) (410687, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)cc3c1OCO3)[nH]c2=O', 0.567567567567568) (410689, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)cc3c1OCCO3)[nH]c2=O', 0.567567567567568) (410686, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)cc3c1OCCC3)[nH]c2=O', 0.565789473684211) (304580, 'CCCn1cnc2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)nc2O', 0.563380281690141) (304707, 'CCCc1nc(CC)c2c(O)nc(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nn12', 0.561643835616438) (140816, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(Cc1ccc(OC)cc1)c(=O)n(C)c2=O', 0.56) (102837, 'CCCOc1ccc(S(=O)(=O)N2CCN(CP(=O)(O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(O)n1', 0.545454545454545) (325914, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(nc3c(C)cccn32)c(O)n1', 0.540540540540541) (512228, 'CCCc1nn(C)c2c1nc(-c1ccccc1O)[nH]c2=O', 0.53968253968254) (410708, 'CCCOc1ccc(NC(=O)C2CCCCC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.539473684210526) (102529, 'CCCOc1ccc(S(=O)(=O)N2CCN(CP(=O)(OCC)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(O)n1', 0.538461538461538) (1334759, 'CCCc1nn(C)c2c1nc(-c1cccnc1OC)[nH]c2=O', 0.537313432835821) (1008491, 'CCCc1nn(C)c2c1nc(-c1cc(S(N)(=O)=O)ccc1OCC)nc2O', 0.536231884057971) (326033, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(nc3cc(C)ccn32)c(O)n1', 0.533333333333333) (1334767, 'CCCc1nn(C)c2c1nc(-c1cccnc1O)[nH]c2=O', 0.53030303030303) (325968, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(nc3ccc(Br)cn32)c(O)n1', 0.526315789473684) (1334776, 'CCCc1nn(C)c2c1nc(-c1ccsc1)[nH]c2=O', 0.523809523809524) (1334773, 'CCCc1nn(C)c2c1nc(-c1cn(C)cn1)[nH]c2=O', 0.523809523809524) (80670, 'CCCc1nn(C)c2c1nc(-c1ccccc1[N+](=O)[O-])[nH]c2=O', 0.522388059701492) (1334761, 'CCCc1nn(C)c2c1nc(-c1cccnc1OC(C)C)[nH]c2=O', 0.521739130434783) (971340, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NC)ccc1OCC)nc2O', 0.52112676056338) (800246, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N(CCO)CCO)ccc1OCC)nc2O', 0.520547945205479) (326091, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(nc3ccc(C)cn32)c(O)n1', 0.52) (140606, 'CC(C)COc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC(C)C)c(=O)n(C)c2=O', 0.52) (1227092, 'CCCc1nn(C)c2c1nc(-c1cc([S+](=O)([O-])N3CCN(C)CC3)ccc1OCC)nc2O', 0.52) (1353820, 'CCCc1nn(C)c2c1nc(-c1ccc(C)cc1)[nH]c2=O', 0.516129032258065) (512227, 'CCCc1nn(C)c2c1nc(-c1ccccc1)[nH]c2=O', 0.516129032258065) (1353819, 'CCCc1nn(C)c2c1nc(-c1ccc(OC)cc1)[nH]c2=O', 0.515625) (1334771, 'CCCc1nn(C)c2c1nc(-c1cn(C)nc1C)[nH]c2=O', 0.515625) (1269220, 'CCCn1nc2c(nc(-c3cc(S(=O)(=O)N4CCN(C)CC4)cnc3OCCOC)[nH]c2=O)c1CC', 0.513513513513513) (1212525, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NCCO)ccc1OCC)nc2O', 0.513513513513513) (140345, 'CCOc1ccc(S(=O)(=O)N2CCN(CC)CC2)cc1-c1nc2c([nH]1)n(CC(C)C)c(=O)n(C)c2=O', 0.513157894736842) (1353817, 'CCCc1nn(C)c2c1nc(-c1ccc(Br)cc1)[nH]c2=O', 0.507936507936508) (1334775, 'CCCc1nn(C)c2c1nc(-c1ccnn1C)[nH]c2=O', 0.507936507936508) (421326, 'CCCc1nc(C)c2c(=O)nc(-c3cc(S(=O)(=O)N4CCN(CC)CC4)ccc3OCC)[nH]n12', 0.506666666666667) (28710, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(O)c2cc3[nH]cnc3cc2n1', 0.506666666666667) (140382, 'CCOc1ccc(S(=O)(=O)N2CCN(CCN(C)C)CC2)cc1-c1nc2c([nH]1)n(CC(C)C)c(=O)n(C)c2=O', 0.506329113924051) (453288, 'CCCc1c2nc(-c3cc(S(=O)(=O)N4CCN(CC)CC4)cnc3OCCOC)[nH]c(=O)c2nn1C', 0.5) (511950, 'CCCCN1C(=O)C2=C(N=C(c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)NC2)C1=O', 0.5) (1353816, 'CCCc1nn(C)c2c1nc(-c1ccc(Cl)cc1)[nH]c2=O', 0.5) (326382, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(nc3cccc(CC)n32)c(O)n1', 0.5) (140451, 'CCOc1ccc(S(=O)(=O)N2CCN(CCO)CC2)cc1-c1nc2c([nH]1)n(CC(C)C)c(=O)n(C)c2=O', 0.5) (1351307, 'CCOc1ccc(S(=O)(=O)Nc2ccc(O)c(C(=O)O)c2)cc1-c1nc2c(C(C)(C)C)nn(C)c2c(=O)[nH]1', 0.5) (1353821, 'CCCc1nn(C)c2c1nc(-c1cccc(Br)c1)[nH]c2=O', 0.5) (1334770, 'CCCc1nn(C)c2c1nc(-c1ccc(=O)[nH]n1)[nH]c2=O', 0.5) (1334766, 'CCCc1nn(C)c2c1nc(-c1ccc(O)cc1)[nH]c2=O', 0.5) (1353818, 'CCCc1nn(C)c2c1nc(-c1cccc(Cl)c1)[nH]c2=O', 0.5)
from rdkit.Chem import PandasTools
import pandas as pd
data = pd.read_sql(sql2, conn)
data.shape
(187, 3)
data.head()
molregno | smiles | similarity | |
---|---|---|---|
0 | 410802 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O | 1.000000 |
1 | 1351310 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCCC3)ccc1OCC)[nH]c2=O | 0.881356 |
2 | 1351311 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCC3)ccc1OCC)[nH]c2=O | 0.881356 |
3 | 80636 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCNCC3)ccc1OCC)[nH]c2=O | 0.866667 |
4 | 80694 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)ccc1OCC)[nH]c2=O | 0.838710 |
nn = data[data['similarity'] >= 0.8]
nn.shape
(12, 3)
nn.head()
molregno | smiles | similarity | |
---|---|---|---|
0 | 410802 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O | 1.000000 |
1 | 1351310 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCCC3)ccc1OCC)[nH]c2=O | 0.881356 |
2 | 1351311 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCC3)ccc1OCC)[nH]c2=O | 0.881356 |
3 | 80636 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCNCC3)ccc1OCC)[nH]c2=O | 0.866667 |
4 | 80694 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)ccc1OCC)[nH]c2=O | 0.838710 |
PandasTools.AddMoleculeColumnToFrame(data,smilesCol='smiles',molCol='mol',includeFingerprints=True)
data.head()
molregno | smiles | similarity | mol | |
---|---|---|---|---|
0 | 410802 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O | 1.000000 | |
1 | 1351310 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCCC3)ccc1OCC)[nH]c2=O | 0.881356 | |
2 | 1351311 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCC3)ccc1OCC)[nH]c2=O | 0.881356 | |
3 | 80636 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCNCC3)ccc1OCC)[nH]c2=O | 0.866667 | |
4 | 80694 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)ccc1OCC)[nH]c2=O | 0.838710 | |
data['logp'] = data['mol'].map(Descriptors.MolLogP)
data['mw'] = data['mol'].map(Descriptors.MolWt)
data.head()
molregno | smiles | similarity | mol | logp | mw | |
---|---|---|---|---|---|---|
0 | 410802 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O | 1.000000 | |
1.6109 | 474.587 |
1 | 1351310 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCCC3)ccc1OCC)[nH]c2=O | 0.881356 | |
3.2395 | 473.599 |
2 | 1351311 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCC3)ccc1OCC)[nH]c2=O | 0.881356 | |
2.8494 | 459.572 |
3 | 80636 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCNCC3)ccc1OCC)[nH]c2=O | 0.866667 | |
1.2687 | 460.560 |
4 | 80694 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)ccc1OCC)[nH]c2=O | 0.838710 | |
0.9734 | 504.613 |
data.sort(columns=['similarity']).head()
molregno | smiles | similarity | mol | logp | mw | |
---|---|---|---|---|---|---|
186 | 1353818 | CCCc1nn(C)c2c1nc(-c1cccc(Cl)c1)[nH]c2=O | 0.5 | |
2.9295 | 302.765 |
177 | 453288 | CCCc1c2nc(-c3cc(S(=O)(=O)N4CCN(CC)CC4)cnc3OCCOC)[nH]c(=O)c2nn1C | 0.5 | |
1.0225 | 519.628 |
178 | 511950 | CCCCN1C(=O)C2=C(N=C(c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)NC2)C1=O | 0.5 | |
0.7942 | 489.598 |
179 | 1353816 | CCCc1nn(C)c2c1nc(-c1ccc(Cl)cc1)[nH]c2=O | 0.5 | |
2.9295 | 302.765 |
180 | 326382 | CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(nc3cccc(CC)n32)c(O)n1 | 0.5 | |
2.5473 | 496.593 |
data[['mw', 'logp']].describe()
mw | logp | |
---|---|---|
count | 187.000000 | 187.000000 |
mean | 486.180160 | 2.268858 |
std | 92.688399 | 0.968545 |
min | 268.320000 | -0.048300 |
25% | 448.589000 | 1.566700 |
50% | 496.593000 | 2.292500 |
75% | 537.152000 | 2.902300 |
max | 866.980000 | 4.731700 |
rcParams['figure.figsize'] = 12,12
data['logp'].hist()
<matplotlib.axes.AxesSubplot at 0x390e490>
scatter(data['mw'],data['logp'])
<matplotlib.collections.PathCollection at 0x422b0d0>
data[data['mw']>800][['molregno','mol','mw','logp']]
molregno | mol | mw | logp | |
---|---|---|---|---|
60 | 487046 | |
852.997 | 2.6245 |
88 | 487045 | |
866.980 | 2.7168 |
qsmi = 'c1nn(C)c2c1nc[nH]c2=O'
qmol = Chem.MolFromSmiles(qsmi)
qmol
subset = data[data['mol'] >= qmol]
subset.shape
(90, 6)
subset.head()
molregno | smiles | similarity | mol | logp | mw | |
---|---|---|---|---|---|---|
0 | 410802 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O | 1.000000 | |
1.6109 | 474.587 |
1 | 1351310 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCCC3)ccc1OCC)[nH]c2=O | 0.881356 | |
3.2395 | 473.599 |
2 | 1351311 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCC3)ccc1OCC)[nH]c2=O | 0.881356 | |
2.8494 | 459.572 |
3 | 80636 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCNCC3)ccc1OCC)[nH]c2=O | 0.866667 | |
1.2687 | 460.560 |
4 | 80694 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)ccc1OCC)[nH]c2=O | 0.838710 | |
0.9734 | 504.613 |
data.groupby(data['mol'] >= qmol).describe().unstack()
molregno | similarity | logp | mw | |||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | |
mol | ||||||||||||||||||||||||||||||||
False | 97 | 741276.649485 | 525340.753036 | 28710 | 304703.0 | 560323.0 | 1441471.0 | 1441775 | 97 | 0.595045 | 0.050741 | 0.5 | 0.567568 | 0.600000 | 0.626866 | 0.734375 | 97 | 2.018649 | 0.740929 | -0.0483 | 1.482000 | 2.08120 | 2.54670 | 4.2180 | 97 | 492.993175 | 45.491272 | 391.453 | 462.61600 | 490.586 | 518.64000 | 624.701 |
True | 90 | 709987.411111 | 449869.213609 | 80558 | 410694.5 | 488009.5 | 1334760.5 | 1376119 | 90 | 0.670030 | 0.113819 | 0.5 | 0.567983 | 0.691176 | 0.753623 | 1.000000 | 90 | 2.538528 | 1.107499 | 0.3034 | 1.620075 | 2.65905 | 3.32205 | 4.7317 | 90 | 478.837244 | 124.968717 | 268.320 | 386.95875 | 504.591 | 558.42875 | 866.980 |
data['containsQ'] = data['mol'] >= qmol
data.head(2)
molregno | smiles | similarity | mol | logp | mw | containsQ | |
---|---|---|---|---|---|---|---|
0 | 410802 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O | 1.000000 | |
1.6109 | 474.587 | True |
1 | 1351310 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCCC3)ccc1OCC)[nH]c2=O | 0.881356 | |
3.2395 | 473.599 | True |
data.boxplot('similarity',by='containsQ')
<matplotlib.axes.AxesSubplot at 0x393c4d0>
conn.close()