# Let do our imports first. import respire,urllib,re from IPython.display import HTML,JSON # We just need to monkey-patch the URL join method in this instance, # since it truncates the URL due to the way ADMESARfari is hosted def urljoin_patched(base,path): return base+path respire.client.urljoin = urljoin_patched # Create our client and associated methods api_client = respire.client_from_url('http://wwwdev.ebi.ac.uk/chembl/admesarfari/rest/spore') # What methods do we have available? # Iterate over the parsed endpoint, pulling out applicable methods, the paths and the descriptions. # We'll add some HTML elements to the output. tc=[] ts = '' te = '
' for method in api_client.description.methods: methodname = method method = api_client.description.methods[methodname] if method['method']!='HEAD': tc.append(""+methodname+"") tc.append(''+method['path']+''+method['description']+'') h = HTML(ts+"".join(tc)+te) h # Let's set a few lookup dictionaries # Taxonmy ID look up # Get the taxids taxids = api_client.get_taxids()['results'] t = {} # Create taxonomy look-up for taxid in taxids: t[taxid['taxid']]=taxid['name'] taxids = t # Get tissues tissues = api_client.get_tissues()['results'][0] alltissues = str(",".join(tissues.keys())) cells = api_client.get_celltypes()['results'][0] # Get Human expression levels (Could take a while!) expressionlevels = api_client.get_expressionmatrix(TISSUE_IDS=alltissues)['expression_matrix'] print "Levels found:",expressionlevels.__len__() # Let's use an input compound and predict it's ADME profile # We'll use Gleevec (CHEMBL941) as our input gleevec_ctab = """ SciTegic12111210002D 37 41 0 0 0 0 999 V2000 6.9208 -3.0042 0.0000 C 0 0 7.5250 -2.6417 0.0000 N 0 0 3.2167 -3.0417 0.0000 C 0 0 5.6875 -3.0167 0.0000 C 0 0 6.3000 -2.6542 0.0000 N 0 0 3.8292 -2.6792 0.0000 N 0 0 8.1417 -2.9833 0.0000 C 0 0 0.1292 -2.0000 0.0000 N 0 0 5.0667 -2.6667 0.0000 C 0 0 -1.1083 -2.6917 0.0000 N 0 0 6.9250 -3.7167 0.0000 N 0 0 2.6000 -2.6917 0.0000 C 0 0 4.4542 -3.0292 0.0000 C 0 0 8.7542 -2.6125 0.0000 C 0 0 5.6917 -3.7292 0.0000 C 0 0 3.2250 -3.7500 0.0000 O 0 0 9.3458 -1.5375 0.0000 N 0 0 0.7417 -1.6417 0.0000 C 0 0 2.6000 -1.9792 0.0000 C 0 0 1.9875 -3.0500 0.0000 C 0 0 5.0667 -4.0917 0.0000 C 0 0 0.1167 -2.7167 0.0000 C 0 0 -0.4708 -1.6417 0.0000 C 0 0 -0.5000 -3.0542 0.0000 C 0 0 -1.1000 -1.9792 0.0000 C 0 0 8.1583 -3.6958 0.0000 C 0 0 7.5458 -4.0625 0.0000 C 0 0 1.3667 -1.9917 0.0000 C 0 0 4.4542 -3.7417 0.0000 C 0 0 1.9667 -1.6292 0.0000 C 0 0 1.3750 -2.7042 0.0000 C 0 0 8.7417 -1.9083 0.0000 C 0 0 -1.7375 -3.0292 0.0000 C 0 0 9.3750 -2.9625 0.0000 C 0 0 9.9750 -1.8833 0.0000 C 0 0 6.3042 -4.0875 0.0000 C 0 0 9.9917 -2.5958 0.0000 C 0 0 2 1 1 0 3 6 1 0 4 5 1 0 5 1 1 0 6 13 1 0 7 2 2 0 8 18 1 0 9 4 1 0 10 25 1 0 11 1 2 0 12 3 1 0 13 9 2 0 14 7 1 0 15 4 2 0 16 3 2 0 17 32 2 0 18 28 1 0 19 12 2 0 20 12 1 0 21 15 1 0 22 8 1 0 23 8 1 0 24 22 1 0 25 23 1 0 26 7 1 0 27 11 1 0 28 31 1 0 29 21 2 0 30 19 1 0 31 20 2 0 32 14 1 0 33 10 1 0 34 14 2 0 35 37 2 0 36 15 1 0 37 34 1 0 27 26 2 0 29 13 1 0 17 35 1 0 28 30 2 0 24 10 1 0 M END """ predictions = api_client.post_modelpredictor2(data=urllib.quote(gleevec_ctab))['results'] # How many ADME targets were predicted? print predictions.__len__() # Let's view the predictions tc=[] ts = '' te = '
' for prediction in predictions: tc.append(""+prediction['PROTEIN_ACCESSION']+""+prediction['full_name']+"") if prediction['function'] != None: pfunc = prediction['function'] else: pfunc = 'Unknown' tc.append(''+taxids[prediction['taxid']]+''+pfunc+'') h = HTML(ts+"".join(tc)+te) h # Now lets look at expression levels of these targets # Select only HIGH expression levels tc=[] ts = '' te = '
' for prediction in predictions: tc.append(""+prediction['PROTEIN_ACCESSION']+""+prediction['full_name']+"") # This dumps out expression levels for all tissues and cell types! targetexpression=[] for humexp in expressionlevels: for tissue in tissues: for cell in cells: percell = humexp[str(tissue)] if str(cell) in percell: if percell[str(cell)]['target_id']==prediction['target_id']: expstring = "Tissue =",percell[str(cell)]['tissue'],", Cell =",cells[str(cell)]," Level =",percell[str(cell)]['exp_level']," Type =",percell[str(cell)]['expression_type']," Reliability =",percell[str(cell)]['reliability'] level = percell[str(cell)]['exp_level'] if re.match('High|Strong',level): targetexpression.append(expstring) for exp in targetexpression: tc.append(''+" ".join(exp)+'') h = HTML(ts+"".join(tc)+te) h # Now lets see how many activity points we have per target # Let's view the predictions tc=[] ts = '' te = '
' for prediction in predictions: try: activity = api_client.get_targetbioactivity(TARGET_ID=str(prediction['target_id']))['results'] print activity.__len__()," activity points" except: print "Error retrieving data points!" #tc.append(""+prediction['PROTEIN_ACCESSION']+""+prediction['full_name']+"") #tc.append('Activity points'+str(activity.__len__())+'') h = HTML(ts+"".join(tc)+te) h # How many compounds do we have per target? tc=[] ts = '' te = '
' for prediction in predictions: try: targetcompounds = api_client.get_targetcompounds(TARGET_ID=str(prediction['target_id']))['results'] count = targetcompounds.__len__()," compounds" except: count = 0 print "Error retrieving data points!" tc.append(""+prediction['PROTEIN_ACCESSION']+""+prediction['full_name']+"") tc.append('Activity points'+str(count)+'') h = HTML(ts+"".join(tc)+te) h