import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline
matplotlib.rcParams['savefig.dpi'] = 300
#%config InlineBackend.figure_format = 'svg'
api_url = "http://api.brain-map.org/api/v2/data/"
query_expression = "query.json?criteria=service::human_microarray_expression[probes$eq{probes}][donor$eq{donor}]"
query_probes = "query.xml?criteria=model::Probe,rma::criteria,[probe_type$eq'DNA'],products[abbreviation$eq'HumanMA'],gene[acronym$in{geneid}],rma::options[only$eq'probes.id']"
donor = "'H035.2001'"
gene_ids="'SLC6A2','SCN1A'"
from urllib2 import urlopen
from contextlib import closing
import json
from lxml import etree
request_url = api_url + query_probes.format(geneid=gene_ids)
with closing(urlopen(request_url)) as response:
xml_data = response.read()
tree = etree.fromstring(xml_data)
probes = ','.join([t.text for t in tree.xpath('//probe/id')])
request_url = api_url + query_expression.format(probes=probes, donor=donor)
with closing(urlopen(request_url)) as response:
probe_data = json.load(response)['msg']
expr_lvls = {prb['name']: map(float, prb['expression_level'])
for prb in probe_data['probes']}
structures = [s['top_level_structure']['abbreviation']
for s in probe_data['samples']]
structure_id = [s['structure']['id']
for s in probe_data['samples']]
genes = {prb['name']: prb['gene-symbol']
for prb in probe_data['probes']}
expr_lvls.update({'top_level_structure' : structures,
'structure_id' : structure_id})
df = pd.DataFrame(expr_lvls)
df.to_csv('../data/allen_brain_atlas.csv', index=False)
df = pd.read_csv('../data/allen_brain_atlas.csv')
df[:10]
A_23_P28224 | A_23_P358345 | CUST_16472_PI416261804 | CUST_17139_PI416261804 | CUST_546_PI416408490 | structure_id | top_level_structure | |
---|---|---|---|---|---|---|---|
0 | 8.8037 | 1.6248 | 2.3772 | 5.9888 | 6.8569 | 4055 | FL |
1 | 8.9927 | 1.5055 | 1.7143 | 5.8315 | 6.5077 | 4079 | FL |
2 | 8.6063 | 1.5778 | 2.8595 | 6.0884 | 6.5789 | 4079 | FL |
3 | 8.5581 | 2.0678 | 1.9892 | 6.3405 | 6.6196 | 4079 | FL |
4 | 8.7339 | 1.4767 | 1.4731 | 5.8716 | 6.4472 | 4080 | FL |
5 | 8.4503 | 1.8223 | 2.2234 | 5.7434 | 6.4456 | 4080 | FL |
6 | 8.7355 | 1.5104 | 1.6819 | 5.9797 | 6.8184 | 4890 | FL |
7 | 8.9026 | 1.5253 | 1.7685 | 6.2411 | 6.6980 | 4890 | FL |
8 | 8.7765 | 2.9994 | 1.7939 | 6.2621 | 6.8061 | 4048 | FL |
9 | 8.9004 | 1.7628 | 1.7252 | 6.3940 | 6.9415 | 4048 | FL |
plt.plot(df.ix[::2,1], df.ix[::2,2], '.', ms=1)
plt.xlabel(df.columns[1])
plt.ylabel(df.columns[2])
<matplotlib.text.Text at 0x10edf6750>
df_aggregated = df.groupby('top_level_structure').mean().drop('structure_id', 1)
nx = ny = df_aggregated.shape[1]
fig, axes = plt.subplots(nx, ny,
sharex='col', sharey='row',
figsize=(8,6))
for i in range(nx):
for j in range(ny):
if j != i:
axes[j, i].plot(df_aggregated.ix[:, i],
df_aggregated.ix[:, j], '.', ms=3)
else:
axes[j,i].set_axis_off()
axes[j,i].text(0.5, 0.5, genes[df_aggregated.columns[i]],
transform=axes[j,i].transAxes,
va='center',
ha='center')
[ax.set_yticks(ax.get_ylim()) for ax in axes[:,0]]
[ax.set_xticks(ax.get_xlim()) for ax in axes[0,:]]
[[<matplotlib.axis.XTick at 0x10f72e810>, <matplotlib.axis.XTick at 0x10ee51f90>], [<matplotlib.axis.XTick at 0x10fea75d0>, <matplotlib.axis.XTick at 0x10fea7ed0>], [<matplotlib.axis.XTick at 0x10f73d750>, <matplotlib.axis.XTick at 0x10f7476d0>], [<matplotlib.axis.XTick at 0x10ff91b90>, <matplotlib.axis.XTick at 0x10ff7e350>], [<matplotlib.axis.XTick at 0x10fffe250>, <matplotlib.axis.XTick at 0x10fffead0>]]
df_pure = df.ix[:,:-2]
data = (np.array(df_pure))
zscore = (data - data.mean(0))/data.std(0)
plt.figure(figsize=(12,1))
plt.imshow(zscore.T, aspect='auto', interpolation='nearest', cmap='seismic')
plt.yticks(np.arange(len(df_pure.columns)), df_pure.columns)
([<matplotlib.axis.YTick at 0x110562f50>, <matplotlib.axis.YTick at 0x1107fdd50>, <matplotlib.axis.YTick at 0x10ff9e510>, <matplotlib.axis.YTick at 0x10fe93ed0>, <matplotlib.axis.YTick at 0x10f72efd0>], <a list of 5 Text yticklabel objects>)
from skimage import data
plt.imshow(data.lena())
<matplotlib.image.AxesImage at 0x1134fd310>