# Let do our imports first.
import respire,urllib,re
from IPython.display import HTML,JSON
# We just need to monkey-patch the URL join method in this instance,
# since it truncates the URL due to the way ADMESARfari is hosted
def urljoin_patched(base,path):
return base+path
respire.client.urljoin = urljoin_patched
# Create our client and associated methods
api_client = respire.client_from_url('http://wwwdev.ebi.ac.uk/chembl/admesarfari/rest/spore')
# What methods do we have available?
# Iterate over the parsed endpoint, pulling out applicable methods, the paths and the descriptions.
# We'll add some HTML elements to the output.
tc=[]
ts = '
'
for method in api_client.description.methods:
methodname = method
method = api_client.description.methods[methodname]
if method['method']!='HEAD':
tc.append(""+methodname+" |
")
tc.append(''+method['path']+' | '+method['description']+' |
')
h = HTML(ts+"".join(tc)+te)
h
# Let's set a few lookup dictionaries
# Taxonmy ID look up
# Get the taxids
taxids = api_client.get_taxids()['results']
t = {}
# Create taxonomy look-up
for taxid in taxids:
t[taxid['taxid']]=taxid['name']
taxids = t
# Get tissues
tissues = api_client.get_tissues()['results'][0]
alltissues = str(",".join(tissues.keys()))
cells = api_client.get_celltypes()['results'][0]
# Get Human expression levels (Could take a while!)
expressionlevels = api_client.get_expressionmatrix(TISSUE_IDS=alltissues)['expression_matrix']
print "Levels found:",expressionlevels.__len__()
# Let's use an input compound and predict it's ADME profile
# We'll use Gleevec (CHEMBL941) as our input
gleevec_ctab = """
SciTegic12111210002D
37 41 0 0 0 0 999 V2000
6.9208 -3.0042 0.0000 C 0 0
7.5250 -2.6417 0.0000 N 0 0
3.2167 -3.0417 0.0000 C 0 0
5.6875 -3.0167 0.0000 C 0 0
6.3000 -2.6542 0.0000 N 0 0
3.8292 -2.6792 0.0000 N 0 0
8.1417 -2.9833 0.0000 C 0 0
0.1292 -2.0000 0.0000 N 0 0
5.0667 -2.6667 0.0000 C 0 0
-1.1083 -2.6917 0.0000 N 0 0
6.9250 -3.7167 0.0000 N 0 0
2.6000 -2.6917 0.0000 C 0 0
4.4542 -3.0292 0.0000 C 0 0
8.7542 -2.6125 0.0000 C 0 0
5.6917 -3.7292 0.0000 C 0 0
3.2250 -3.7500 0.0000 O 0 0
9.3458 -1.5375 0.0000 N 0 0
0.7417 -1.6417 0.0000 C 0 0
2.6000 -1.9792 0.0000 C 0 0
1.9875 -3.0500 0.0000 C 0 0
5.0667 -4.0917 0.0000 C 0 0
0.1167 -2.7167 0.0000 C 0 0
-0.4708 -1.6417 0.0000 C 0 0
-0.5000 -3.0542 0.0000 C 0 0
-1.1000 -1.9792 0.0000 C 0 0
8.1583 -3.6958 0.0000 C 0 0
7.5458 -4.0625 0.0000 C 0 0
1.3667 -1.9917 0.0000 C 0 0
4.4542 -3.7417 0.0000 C 0 0
1.9667 -1.6292 0.0000 C 0 0
1.3750 -2.7042 0.0000 C 0 0
8.7417 -1.9083 0.0000 C 0 0
-1.7375 -3.0292 0.0000 C 0 0
9.3750 -2.9625 0.0000 C 0 0
9.9750 -1.8833 0.0000 C 0 0
6.3042 -4.0875 0.0000 C 0 0
9.9917 -2.5958 0.0000 C 0 0
2 1 1 0
3 6 1 0
4 5 1 0
5 1 1 0
6 13 1 0
7 2 2 0
8 18 1 0
9 4 1 0
10 25 1 0
11 1 2 0
12 3 1 0
13 9 2 0
14 7 1 0
15 4 2 0
16 3 2 0
17 32 2 0
18 28 1 0
19 12 2 0
20 12 1 0
21 15 1 0
22 8 1 0
23 8 1 0
24 22 1 0
25 23 1 0
26 7 1 0
27 11 1 0
28 31 1 0
29 21 2 0
30 19 1 0
31 20 2 0
32 14 1 0
33 10 1 0
34 14 2 0
35 37 2 0
36 15 1 0
37 34 1 0
27 26 2 0
29 13 1 0
17 35 1 0
28 30 2 0
24 10 1 0
M END
"""
predictions = api_client.post_modelpredictor2(data=urllib.quote(gleevec_ctab))['results']
# How many ADME targets were predicted?
print predictions.__len__()
# Let's view the predictions
tc=[]
ts = ''
for prediction in predictions:
tc.append(""+prediction['PROTEIN_ACCESSION']+" | "+prediction['full_name']+" |
")
if prediction['function'] != None:
pfunc = prediction['function']
else:
pfunc = 'Unknown'
tc.append(''+taxids[prediction['taxid']]+' | '+pfunc+' |
')
h = HTML(ts+"".join(tc)+te)
h
# Now lets look at expression levels of these targets
# Select only HIGH expression levels
tc=[]
ts = ''
for prediction in predictions:
tc.append(""+prediction['PROTEIN_ACCESSION']+" | "+prediction['full_name']+" |
")
# This dumps out expression levels for all tissues and cell types!
targetexpression=[]
for humexp in expressionlevels:
for tissue in tissues:
for cell in cells:
percell = humexp[str(tissue)]
if str(cell) in percell:
if percell[str(cell)]['target_id']==prediction['target_id']:
expstring = "Tissue =",percell[str(cell)]['tissue'],", Cell =",cells[str(cell)]," Level =",percell[str(cell)]['exp_level']," Type =",percell[str(cell)]['expression_type']," Reliability =",percell[str(cell)]['reliability']
level = percell[str(cell)]['exp_level']
if re.match('High|Strong',level):
targetexpression.append(expstring)
for exp in targetexpression:
tc.append(''+" ".join(exp)+' |
')
h = HTML(ts+"".join(tc)+te)
h
# Now lets see how many activity points we have per target
# Let's view the predictions
tc=[]
ts = ''
for prediction in predictions:
try:
activity = api_client.get_targetbioactivity(TARGET_ID=str(prediction['target_id']))['results']
print activity.__len__()," activity points"
except:
print "Error retrieving data points!"
#tc.append(""+prediction['PROTEIN_ACCESSION']+" | "+prediction['full_name']+" |
")
#tc.append('Activity points | '+str(activity.__len__())+' |
')
h = HTML(ts+"".join(tc)+te)
h
# How many compounds do we have per target?
tc=[]
ts = ''
for prediction in predictions:
try:
targetcompounds = api_client.get_targetcompounds(TARGET_ID=str(prediction['target_id']))['results']
count = targetcompounds.__len__()," compounds"
except:
count = 0
print "Error retrieving data points!"
tc.append(""+prediction['PROTEIN_ACCESSION']+" | "+prediction['full_name']+" |
")
tc.append('Activity points | '+str(count)+' |
')
h = HTML(ts+"".join(tc)+te)
h