import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
matplotlib.rcParams['savefig.dpi'] = 300
df = pd.read_csv('../data/allen_brain_atlas.csv')
data = np.array(df.ix[:, :-2])
data = data - data.mean(0)
corr = np.dot(data.T, data)/data.shape[0]
evals, evecs = np.linalg.eig(corr)
pcs = np.dot(data, evecs[:, :])
fig, axes = plt.subplots(5,5, sharex='col', sharey='row', figsize=(10,10))
for i in range(5):
for j in range(5):
ax = axes[i,j]
if i==j:
ax.set_axis_off()
else:
ax.plot(pcs[::10,i], pcs[::10,j], '.', ms=5)
from sklearn import cluster
W = evecs.dot(np.diag(1./np.sqrt(evals))).dot(evecs.T)
whitened = np.dot(W, data.T)
C_W = np.dot(whitened, whitened.T)/whitened.shape[1]
plt.imshow(C_W, interpolation='nearest')
<matplotlib.image.AxesImage at 0x10b41a8d0>
centers, labels, _ = cluster.k_means(whitened.T, 4)
plt.scatter(pcs[:,0], pcs[:,1], c=labels, cmap='hsv')
plt.xlabel("PC1")
plt.ylabel("PC2")
ax = plt.gca()
plt.text(0.95, 0.05, 'Data source: Allen Brain Institute',
transform=ax.transAxes, va='bottom', ha='right')
<matplotlib.text.Text at 0x10c81b490>
from matplotlib import colors, cm
def get_colors(labels, cmap):
norm_labels = colors.Normalize()(labels)
cmap_func = cm.get_cmap(cmap)
class_colors = cmap_func(norm_labels)
return class_colors
class_colors = get_colors(np.unique(labels), 'hsv')
from lxml import etree
with file('../data/brain_atlas.svg') as fid:
svg = etree.fromstring(fid.read())
with file('../data/ontology.xml') as fid:
ontology = etree.fromstring(fid.read())
def color_svg(svg, ontology, data_ids, labels, colors):
show = []
unique_data_ids = np.unique(data_ids)
paths = svg.xpath('//*[@structure_id]')
ulabels = np.unique(labels)
class_members = [data_ids[labels==l] for l in ulabels]
n_members = np.array([len(c) for c in class_members])
for p in paths:
sid = p.get('structure_id')
el = ontology.xpath('//structure[id={}]'.format(sid))[0]
descendants = []
for i in unique_data_ids:
if i==sid or el.xpath('descendant::structure[id={}]'.format(i)):
descendants.append(i)
if descendants:
descendants = np.array(descendants)
n_probes = np.array([np.sum(np.in1d(cl, descendants))
for cl in class_members])
frac = n_probes*1./n_members
dominating = np.argmax(frac)
r, g, b = (np.array(colors[dominating])*255).astype(int)[:3]
p.set('style', 'fill: rgb({},{},{}); stroke: none'.format(r,g,b))
else:
p.set('style', 'fill: rgb(255,255,255); stroke: none')
return svg
from IPython.display import SVG
new_svg = color_svg(svg, ontology, df.structure_id, labels, class_colors)
SVG(etree.tostring(new_svg))