import matplotlib import matplotlib.pyplot as plt import numpy as np import pandas as pd %matplotlib inline matplotlib.rcParams['savefig.dpi'] = 300 #%config InlineBackend.figure_format = 'svg' api_url = "http://api.brain-map.org/api/v2/data/" query_expression = "query.json?criteria=service::human_microarray_expression[probes$eq{probes}][donor$eq{donor}]" query_probes = "query.xml?criteria=model::Probe,rma::criteria,[probe_type$eq'DNA'],products[abbreviation$eq'HumanMA'],gene[acronym$in{geneid}],rma::options[only$eq'probes.id']" donor = "'H035.2001'" gene_ids="'SLC6A2','SCN1A'" from urllib2 import urlopen from contextlib import closing import json from lxml import etree request_url = api_url + query_probes.format(geneid=gene_ids) with closing(urlopen(request_url)) as response: xml_data = response.read() tree = etree.fromstring(xml_data) probes = ','.join([t.text for t in tree.xpath('//probe/id')]) request_url = api_url + query_expression.format(probes=probes, donor=donor) with closing(urlopen(request_url)) as response: probe_data = json.load(response)['msg'] expr_lvls = {prb['name']: map(float, prb['expression_level']) for prb in probe_data['probes']} structures = [s['top_level_structure']['abbreviation'] for s in probe_data['samples']] structure_id = [s['structure']['id'] for s in probe_data['samples']] genes = {prb['name']: prb['gene-symbol'] for prb in probe_data['probes']} expr_lvls.update({'top_level_structure' : structures, 'structure_id' : structure_id}) df = pd.DataFrame(expr_lvls) df.to_csv('../data/allen_brain_atlas.csv', index=False) df = pd.read_csv('../data/allen_brain_atlas.csv') df[:10] plt.plot(df.ix[::2,1], df.ix[::2,2], '.', ms=1) plt.xlabel(df.columns[1]) plt.ylabel(df.columns[2]) df_aggregated = df.groupby('top_level_structure').mean().drop('structure_id', 1) nx = ny = df_aggregated.shape[1] fig, axes = plt.subplots(nx, ny, sharex='col', sharey='row', figsize=(8,6)) for i in range(nx): for j in range(ny): if j != i: axes[j, i].plot(df_aggregated.ix[:, i], df_aggregated.ix[:, j], '.', ms=3) else: axes[j,i].set_axis_off() axes[j,i].text(0.5, 0.5, genes[df_aggregated.columns[i]], transform=axes[j,i].transAxes, va='center', ha='center') [ax.set_yticks(ax.get_ylim()) for ax in axes[:,0]] [ax.set_xticks(ax.get_xlim()) for ax in axes[0,:]] df_pure = df.ix[:,:-2] data = (np.array(df_pure)) zscore = (data - data.mean(0))/data.std(0) plt.figure(figsize=(12,1)) plt.imshow(zscore.T, aspect='auto', interpolation='nearest', cmap='seismic') plt.yticks(np.arange(len(df_pure.columns)), df_pure.columns) from skimage import data plt.imshow(data.lena())