import requests
import pandas as pd
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import PandasTools
from rdkit.Chem import AllChem as Chem
from rdkit.Chem import DataStructs
from scipy.spatial.distance import *
import numpy as np
from sklearn import manifold
from ggplot import *
TRGT = 'CHEMBL5024'
re = requests.get('https://www.ebi.ac.uk/chemblws/targets/{0}/bioactivities.json'.format(TRGT))
data = pd.DataFrame(re.json()['bioactivities'])
data.head()
activity_comment | assay_chemblid | assay_description | assay_type | bioactivity_type | ingredient_cmpd_chemblid | name_in_reference | operator | organism | parent_cmpd_chemblid | reference | target_chemblid | target_confidence | target_name | units | value | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Unspecified | CHEMBL2330631 | Inhibition of ATR-mediated CHK1 phosphorylation at serine 345 in human HT29 cells after 1 hr in presence of 4-nitroquinoline 1-oxide | B | IC50 | CHEMBL2325438 | 36 | = | Homo sapiens | CHEMBL2325438 | J. Med. Chem., (2013) 56:5:2125 | CHEMBL5024 | 9 | Serine-protein kinase ATR | nM | 9200 |
1 | Unspecified | CHEMBL2330631 | Inhibition of ATR-mediated CHK1 phosphorylation at serine 345 in human HT29 cells after 1 hr in presence of 4-nitroquinoline 1-oxide | B | IC50 | CHEMBL2325726 | 35 | = | Homo sapiens | CHEMBL2325726 | J. Med. Chem., (2013) 56:5:2125 | CHEMBL5024 | 9 | Serine-protein kinase ATR | nM | 15000 |
2 | Unspecified | CHEMBL2330631 | Inhibition of ATR-mediated CHK1 phosphorylation at serine 345 in human HT29 cells after 1 hr in presence of 4-nitroquinoline 1-oxide | B | IC50 | CHEMBL2325723 | 32 | = | Homo sapiens | CHEMBL2325723 | J. Med. Chem., (2013) 56:5:2125 | CHEMBL5024 | 9 | Serine-protein kinase ATR | nM | 7200 |
3 | Unspecified | CHEMBL2330631 | Inhibition of ATR-mediated CHK1 phosphorylation at serine 345 in human HT29 cells after 1 hr in presence of 4-nitroquinoline 1-oxide | B | IC50 | CHEMBL2325714 | 22 | > | Homo sapiens | CHEMBL2325714 | J. Med. Chem., (2013) 56:5:2125 | CHEMBL5024 | 9 | Serine-protein kinase ATR | nM | 30000 |
4 | Unspecified | CHEMBL2330631 | Inhibition of ATR-mediated CHK1 phosphorylation at serine 345 in human HT29 cells after 1 hr in presence of 4-nitroquinoline 1-oxide | B | IC50 | CHEMBL2030442 | 5 | = | Homo sapiens | CHEMBL2030442 | J. Med. Chem., (2013) 56:5:2125 | CHEMBL5024 | 9 | Serine-protein kinase ATR | nM | 1100 |
data = data.drop_duplicates(['parent_cmpd_chemblid'])
data.shape
(135, 16)
assays = data[['assay_chemblid','target_chemblid']]
assays = assays.groupby('assay_chemblid').count()
goodassays = list(assays.ix[assays.assay_chemblid >= 4].index)
data = data.ix[data.assay_chemblid.isin(goodassays)]
data.shape
(124, 16)
def fetch_SMILES(chemblid):
return str(requests.get('https://www.ebi.ac.uk/chemblws/compounds/{}.json'.format(chemblid)).json()['compound']['smiles'])
data['SMILES'] = data['parent_cmpd_chemblid'].map(fetch_SMILES)
PandasTools.AddMoleculeColumnToFrame(data, smilesCol = 'SMILES')
mols = data[['parent_cmpd_chemblid','name_in_reference','SMILES', 'ROMol', 'assay_chemblid']]
mols.shape
(124, 5)
mols.head()
parent_cmpd_chemblid | name_in_reference | SMILES | ROMol | assay_chemblid | |
---|---|---|---|---|---|
0 | CHEMBL2325438 | 36 | CS(=O)(=O)C1(CC1)c2cc(nc(n2)c3cc(cc4[nH]ccc34)C(=O)N)N5CCOCC5 | |
CHEMBL2330631 |
1 | CHEMBL2325726 | 35 | CNC(=O)c1cc(c2nc(cc(n2)C3(CC3)S(=O)(=O)C)N4CCOCC4)c5cc[nH]c5c1 | |
CHEMBL2330631 |
2 | CHEMBL2325723 | 32 | CS(=O)(=O)C1(CC1)c2cc(nc(n2)c3cccc4[nH]c(cc34)C#N)N5CCOCC5 | |
CHEMBL2330631 |
3 | CHEMBL2325714 | 22 | CS(=O)(=O)C1(CC1)c2cc(nc(n2)c3cccc(N)c3)N4CCOCC4 | |
CHEMBL2330631 |
4 | CHEMBL2030442 | 5 | CS(=O)(=O)Cc1cc(nc(n1)c2cccc3[nH]ccc23)N4CCOCC4 | |
CHEMBL2330631 |
fps = [Chem.GetMorganFingerprintAsBitVect(m,2,nBits=2048) for m in mols['ROMol']]
dist_mat = squareform(pdist(fps,'jaccard'))
pd.DataFrame(dist_mat, columns = mols['parent_cmpd_chemblid'], index=mols['parent_cmpd_chemblid']).head()
parent_cmpd_chemblid | CHEMBL2325438 | CHEMBL2325726 | CHEMBL2325723 | CHEMBL2325714 | CHEMBL2030442 | CHEMBL199416 | CHEMBL1095933 | CHEMBL561907 | CHEMBL371968 | CHEMBL2325708 | CHEMBL199050 | CHEMBL1766788 | CHEMBL1766794 | CHEMBL2325715 | CHEMBL1098243 | CHEMBL1766790 | CHEMBL200299 | CHEMBL2325439 | CHEMBL1766799 | CHEMBL2325704 | CHEMBL2325725 | CHEMBL1766793 | CHEMBL1766798 | CHEMBL1766762 | CHEMBL196764 | CHEMBL563985 | CHEMBL2325705 | CHEMBL198740 | CHEMBL1766792 | CHEMBL1766774 | CHEMBL1766802 | CHEMBL2325712 | CHEMBL2325707 | CHEMBL1766784 | CHEMBL113 | CHEMBL1766768 | CHEMBL2325710 | CHEMBL2325440 | CHEMBL1766791 | CHEMBL2325711 | CHEMBL1766769 | CHEMBL1766780 | CHEMBL1765107 | CHEMBL1766803 | CHEMBL2325720 | CHEMBL1766777 | CHEMBL2325713 | CHEMBL1766781 | CHEMBL1766787 | CHEMBL1766796 | CHEMBL1766770 | CHEMBL1766804 | CHEMBL2325698 | CHEMBL197227 | CHEMBL2325709 | CHEMBL1766800 | CHEMBL196759 | CHEMBL1940990 | CHEMBL1766760 | CHEMBL1766795 | CHEMBL2325721 | CHEMBL2321916 | CHEMBL1766759 | CHEMBL1766764 | CHEMBL1766778 | CHEMBL2325703 | CHEMBL2325724 | CHEMBL1766786 | CHEMBL562999 | CHEMBL199051 | CHEMBL436767 | CHEMBL200275 | CHEMBL197439 | CHEMBL1766789 | CHEMBL369969 | CHEMBL1766766 | CHEMBL1095627 | CHEMBL1095626 | CHEMBL1094981 | CHEMBL1766773 | CHEMBL1766765 | CHEMBL2325719 | CHEMBL1098242 | CHEMBL1766801 | CHEMBL1766763 | CHEMBL1940980 | CHEMBL2325699 | CHEMBL2325702 | CHEMBL2030436 | CHEMBL1766785 | CHEMBL2325717 | CHEMBL1766775 | CHEMBL1766761 | CHEMBL1766771 | CHEMBL2325696 | CHEMBL2325700 | CHEMBL1766767 | CHEMBL196964 | CHEMBL2325722 | CHEMBL1766782 | CHEMBL1766783 | CHEMBL1940981 | CHEMBL199216 | CHEMBL1766772 | CHEMBL370108 | CHEMBL198801 | CHEMBL222102 | CHEMBL2325695 | CHEMBL381025 | CHEMBL197239 | CHEMBL1940987 | CHEMBL2325716 | CHEMBL1095932 | CHEMBL199424 | CHEMBL2325706 | CHEMBL2325697 | CHEMBL197219 | CHEMBL1766776 | CHEMBL382501 | CHEMBL1097942 | CHEMBL2325701 | CHEMBL1766797 | CHEMBL2325718 | CHEMBL1766779 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
parent_cmpd_chemblid | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CHEMBL2325438 | 0.000000 | 0.181818 | 0.447368 | 0.457143 | 0.500000 | 0.808989 | 0.786408 | 0.801887 | 0.813953 | 0.388889 | 0.788235 | 0.861702 | 0.863158 | 0.459459 | 0.790476 | 0.877551 | 0.795181 | 0.230769 | 0.848485 | 0.261538 | 0.181818 | 0.872340 | 0.868687 | 0.888889 | 0.811765 | 0.790000 | 0.298507 | 0.795455 | 0.858696 | 0.877778 | 0.869565 | 0.441558 | 0.328571 | 0.850575 | 0.923077 | 0.873563 | 0.441558 | 0.230769 | 0.881720 | 0.410959 | 0.879121 | 0.898876 | 0.900000 | 0.880435 | 0.218750 | 0.866667 | 0.439394 | 0.891304 | 0.881720 | 0.872340 | 0.887640 | 0.844444 | 0.448718 | 0.813187 | 0.418919 | 0.847826 | 0.808989 | 0.904348 | 0.900000 | 0.877551 | 0.347826 | 0.424658 | 0.894118 | 0.898876 | 0.903226 | 0.506667 | 0.264706 | 0.861702 | 0.794118 | 0.811765 | 0.813953 | 0.790698 | 0.804878 | 0.863158 | 0.716049 | 0.881720 | 0.762887 | 0.765957 | 0.809091 | 0.835165 | 0.891304 | 0.323529 | 0.798165 | 0.857143 | 0.880435 | 0.893805 | 0.453333 | 0.646341 | 0.584416 | 0.879121 | 0.394366 | 0.870968 | 0.891566 | 0.875000 | 0.418919 | 0.578947 | 0.842697 | 0.811765 | 0.347222 | 0.900000 | 0.875000 | 0.913043 | 0.813187 | 0.882979 | 0.755814 | 0.815217 | 0.800000 | 0.253731 | 0.808989 | 0.808511 | 0.884956 | 0.416667 | 0.796117 | 0.787234 | 0.328571 | 0.418919 | 0.817204 | 0.879121 | 0.826087 | 0.801887 | 0.589744 | 0.881188 | 0.416667 | 0.855556 |
CHEMBL2325726 | 0.181818 | 0.000000 | 0.461538 | 0.493151 | 0.513158 | 0.813187 | 0.754902 | 0.794393 | 0.818182 | 0.405405 | 0.793103 | 0.852632 | 0.854167 | 0.473684 | 0.783019 | 0.868687 | 0.800000 | 0.253731 | 0.840000 | 0.283582 | 0.205882 | 0.863158 | 0.860000 | 0.879121 | 0.816092 | 0.757576 | 0.318841 | 0.800000 | 0.849462 | 0.868132 | 0.884211 | 0.455696 | 0.347222 | 0.879121 | 0.925000 | 0.863636 | 0.455696 | 0.253731 | 0.872340 | 0.426667 | 0.869565 | 0.888889 | 0.890110 | 0.882979 | 0.242424 | 0.857143 | 0.455882 | 0.881720 | 0.872340 | 0.863158 | 0.877778 | 0.860215 | 0.462500 | 0.817204 | 0.434211 | 0.838710 | 0.813187 | 0.905983 | 0.888889 | 0.868687 | 0.342857 | 0.440000 | 0.883721 | 0.888889 | 0.893617 | 0.519481 | 0.285714 | 0.852632 | 0.786408 | 0.816092 | 0.818182 | 0.795455 | 0.809524 | 0.854167 | 0.722892 | 0.872340 | 0.767677 | 0.770833 | 0.801802 | 0.826087 | 0.881720 | 0.366197 | 0.790909 | 0.872340 | 0.870968 | 0.895652 | 0.467532 | 0.654762 | 0.594937 | 0.818182 | 0.432432 | 0.861702 | 0.880952 | 0.865169 | 0.434211 | 0.589744 | 0.833333 | 0.816092 | 0.428571 | 0.890110 | 0.865169 | 0.914530 | 0.817204 | 0.873684 | 0.775281 | 0.819149 | 0.804598 | 0.275362 | 0.813187 | 0.824742 | 0.886957 | 0.432432 | 0.764706 | 0.778947 | 0.347222 | 0.434211 | 0.821053 | 0.869565 | 0.829787 | 0.794393 | 0.600000 | 0.872549 | 0.432432 | 0.846154 |
CHEMBL2325723 | 0.447368 | 0.461538 | 0.000000 | 0.434783 | 0.520000 | 0.795455 | 0.809524 | 0.824074 | 0.800000 | 0.410959 | 0.743902 | 0.885417 | 0.886598 | 0.394366 | 0.813084 | 0.900000 | 0.780488 | 0.424658 | 0.871287 | 0.287879 | 0.461538 | 0.895833 | 0.901961 | 0.913043 | 0.811765 | 0.813725 | 0.323529 | 0.781609 | 0.882979 | 0.902174 | 0.869565 | 0.461538 | 0.352113 | 0.913043 | 0.923077 | 0.886364 | 0.461538 | 0.424658 | 0.905263 | 0.432432 | 0.891304 | 0.923077 | 0.923913 | 0.880435 | 0.416667 | 0.891304 | 0.415385 | 0.914894 | 0.905263 | 0.895833 | 0.900000 | 0.818182 | 0.468354 | 0.786517 | 0.440000 | 0.884211 | 0.767442 | 0.864865 | 0.926829 | 0.900000 | 0.416667 | 0.357143 | 0.919540 | 0.923077 | 0.879121 | 0.526316 | 0.361111 | 0.885417 | 0.817308 | 0.811765 | 0.813953 | 0.776471 | 0.804878 | 0.886598 | 0.776471 | 0.893617 | 0.800000 | 0.804124 | 0.830357 | 0.860215 | 0.840909 | 0.459459 | 0.819820 | 0.831461 | 0.892473 | 0.913043 | 0.473684 | 0.576923 | 0.654321 | 0.903226 | 0.347826 | 0.894737 | 0.904762 | 0.887640 | 0.440000 | 0.650000 | 0.855556 | 0.797619 | 0.275362 | 0.923913 | 0.912088 | 0.873874 | 0.772727 | 0.858696 | 0.811111 | 0.775281 | 0.771084 | 0.440000 | 0.795455 | 0.782609 | 0.904348 | 0.298507 | 0.819048 | 0.787234 | 0.352113 | 0.440000 | 0.777778 | 0.903226 | 0.813187 | 0.824074 | 0.589744 | 0.902913 | 0.218750 | 0.880435 |
CHEMBL2325714 | 0.457143 | 0.493151 | 0.434783 | 0.000000 | 0.597222 | 0.784810 | 0.776596 | 0.793814 | 0.773333 | 0.507042 | 0.743243 | 0.831325 | 0.833333 | 0.400000 | 0.781250 | 0.863636 | 0.750000 | 0.455882 | 0.818182 | 0.390625 | 0.493151 | 0.843373 | 0.853933 | 0.875000 | 0.786667 | 0.780220 | 0.424242 | 0.753247 | 0.827160 | 0.833333 | 0.810127 | 0.533333 | 0.449275 | 0.860759 | 0.911765 | 0.871795 | 0.552632 | 0.455882 | 0.853659 | 0.527778 | 0.878049 | 0.871795 | 0.858974 | 0.822785 | 0.447761 | 0.820513 | 0.211538 | 0.864198 | 0.853659 | 0.843373 | 0.887500 | 0.810127 | 0.558442 | 0.790123 | 0.534247 | 0.829268 | 0.769231 | 0.884615 | 0.885714 | 0.850575 | 0.447761 | 0.409091 | 0.849315 | 0.871795 | 0.878049 | 0.621622 | 0.478873 | 0.831325 | 0.784946 | 0.802632 | 0.789474 | 0.763158 | 0.777778 | 0.833333 | 0.779221 | 0.880952 | 0.720930 | 0.722892 | 0.801980 | 0.784810 | 0.891566 | 0.424242 | 0.790000 | 0.825000 | 0.865854 | 0.894231 | 0.567568 | 0.597222 | 0.565217 | 0.850000 | 0.400000 | 0.827160 | 0.876712 | 0.873418 | 0.534247 | 0.558824 | 0.837500 | 0.770270 | 0.420290 | 0.873418 | 0.858974 | 0.894231 | 0.775000 | 0.841463 | 0.723684 | 0.777778 | 0.773333 | 0.471429 | 0.769231 | 0.785714 | 0.884615 | 0.400000 | 0.787234 | 0.790698 | 0.449275 | 0.534247 | 0.765432 | 0.835443 | 0.790123 | 0.793814 | 0.591549 | 0.855556 | 0.400000 | 0.822785 |
CHEMBL2030442 | 0.500000 | 0.513158 | 0.520000 | 0.597222 | 0.000000 | 0.777778 | 0.808081 | 0.800000 | 0.782051 | 0.373134 | 0.720000 | 0.850575 | 0.852273 | 0.573333 | 0.811881 | 0.868132 | 0.760000 | 0.478873 | 0.836957 | 0.317460 | 0.513158 | 0.862069 | 0.858696 | 0.892857 | 0.794872 | 0.800000 | 0.328125 | 0.762500 | 0.847059 | 0.853659 | 0.845238 | 0.430556 | 0.358209 | 0.879518 | 0.915493 | 0.848101 | 0.430556 | 0.478873 | 0.885057 | 0.397059 | 0.855422 | 0.890244 | 0.891566 | 0.857143 | 0.471429 | 0.841463 | 0.608696 | 0.882353 | 0.872093 | 0.862069 | 0.864198 | 0.802469 | 0.480000 | 0.737500 | 0.405797 | 0.848837 | 0.746835 | 0.866667 | 0.905405 | 0.855556 | 0.513889 | 0.520548 | 0.884615 | 0.890244 | 0.895349 | 0.306452 | 0.500000 | 0.850575 | 0.804124 | 0.794872 | 0.797468 | 0.756410 | 0.786667 | 0.852273 | 0.689189 | 0.858824 | 0.784946 | 0.775281 | 0.807692 | 0.807229 | 0.869048 | 0.513889 | 0.807692 | 0.817073 | 0.870588 | 0.886792 | 0.506849 | 0.322581 | 0.327869 | 0.869048 | 0.513889 | 0.847059 | 0.896104 | 0.850000 | 0.472222 | 0.316667 | 0.814815 | 0.779221 | 0.526316 | 0.891566 | 0.878049 | 0.886792 | 0.753086 | 0.860465 | 0.765432 | 0.756098 | 0.733333 | 0.493151 | 0.777778 | 0.764706 | 0.866667 | 0.492958 | 0.806122 | 0.770115 | 0.358209 | 0.472222 | 0.728395 | 0.855422 | 0.783133 | 0.811881 | 0.254237 | 0.860215 | 0.492958 | 0.843373 |
mds = manifold.MDS(n_components=2, dissimilarity="precomputed", random_state=3, n_jobs = 2)
results = mds.fit(dist_mat)
coords = results.embedding_
mols['X'] = [c[0] for c in coords]
mols['Y'] = [c[1] for c in coords]
mols.head()
parent_cmpd_chemblid | name_in_reference | SMILES | ROMol | assay_chemblid | X | Y | |
---|---|---|---|---|---|---|---|
0 | CHEMBL2325438 | 36 | CS(=O)(=O)C1(CC1)c2cc(nc(n2)c3cc(cc4[nH]ccc34)C(=O)N)N5CCOCC5 | |
CHEMBL2330631 | -0.063416 | -0.440131 |
1 | CHEMBL2325726 | 35 | CNC(=O)c1cc(c2nc(cc(n2)C3(CC3)S(=O)(=O)C)N4CCOCC4)c5cc[nH]c5c1 | |
CHEMBL2330631 | -0.095068 | -0.423555 |
2 | CHEMBL2325723 | 32 | CS(=O)(=O)C1(CC1)c2cc(nc(n2)c3cccc4[nH]c(cc34)C#N)N5CCOCC5 | |
CHEMBL2330631 | 0.050322 | -0.574499 |
3 | CHEMBL2325714 | 22 | CS(=O)(=O)C1(CC1)c2cc(nc(n2)c3cccc(N)c3)N4CCOCC4 | |
CHEMBL2330631 | 0.017721 | -0.280538 |
4 | CHEMBL2030442 | 5 | CS(=O)(=O)Cc1cc(nc(n1)c2cccc3[nH]ccc23)N4CCOCC4 | |
CHEMBL2330631 | -0.129206 | -0.343458 |
rcParams['figure.figsize'] = 12,12
scatter(mols['X'], mols['Y'])
<matplotlib.collections.PathCollection at 0x83f6410>
ggplot(aes(x='X', y='Y', colour='assay_chemblid'), data=mols) + geom_point()
<ggplot: (8979761)>
#TODO: D3 interactive visualisation