import cPickle as pickle
import pandas
import numpy as np
from sklearn import metrics
import irm
!pwd
/Users/jonas/projects/connectivitymotifs/paper/experiments/mouseretina
cd "/Users/jonas/projects/connectivitymotifs/paper/experiments/mouseretina"
/Users/jonas/projects/connectivitymotifs/paper/experiments/mouseretina
input_d = pickle.load(open("dummy.pickle"))
tgtdf = input_d['tgtdf']
aucsdf = input_d['aucsdf']
a = tgtdf.reset_index()
a[a['filename'] == 'data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data-fixed_20_100-anneal_slow_1000']
filename | chain_i | ami | ami_coarse | ari | ari_coarse | cluster_n | jaccard | jaccard_coarse | n11 | score | vars | scoreidx | model | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
195 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 0 | 0.426804 | 0.232166 | 0.409607 | 0.205555 | 20 | 0.286231 | 0.196213 | 14801 | 273645.824873 | x z y ... | 7 | srm_clist_xsoma |
196 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 1 | 0.426418 | 0.268067 | 0.404078 | 0.246080 | 17 | 0.282641 | 0.226732 | 15582 | 268271.896876 | x z y ... | 13 | srm_clist_xsoma |
197 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 2 | 0.441127 | 0.259618 | 0.407079 | 0.243275 | 19 | 0.284956 | 0.224916 | 15672 | 270263.829635 | x z y ... | 8 | srm_clist_xsoma |
198 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 3 | 0.434899 | 0.260511 | 0.406366 | 0.243142 | 19 | 0.284326 | 0.224194 | 15535 | 269343.285515 | x z y ... | 11 | srm_clist_xsoma |
199 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 4 | 0.443571 | 0.257487 | 0.417198 | 0.215775 | 20 | 0.291918 | 0.200598 | 14722 | 275103.874787 | x z y ... | 4 | srm_clist_xsoma |
200 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 5 | 0.432163 | 0.271608 | 0.402876 | 0.247838 | 17 | 0.281822 | 0.228729 | 15700 | 268053.299178 | x z y ... | 14 | srm_clist_xsoma |
201 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 6 | 0.439053 | 0.244361 | 0.425360 | 0.214022 | 21 | 0.298356 | 0.199531 | 14973 | 276032.968149 | x z y ... | 1 | srm_clist_xsoma |
202 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 7 | 0.433552 | 0.248396 | 0.420741 | 0.216462 | 19 | 0.294761 | 0.201443 | 14904 | 275125.178026 | x z y ... | 3 | srm_clist_xsoma |
203 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 8 | 0.432953 | 0.263155 | 0.404065 | 0.245741 | 19 | 0.282598 | 0.226245 | 15533 | 269285.843420 | x z y ... | 12 | srm_clist_xsoma |
204 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 9 | 0.440152 | 0.249292 | 0.420875 | 0.214630 | 20 | 0.294836 | 0.200090 | 14868 | 276052.419145 | x z y ... | 0 | srm_clist_xsoma |
205 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 10 | 0.437492 | 0.250682 | 0.419373 | 0.216143 | 19 | 0.293731 | 0.201601 | 14923 | 275070.487525 | x z y ... | 5 | srm_clist_xsoma |
206 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 11 | 0.434332 | 0.251475 | 0.402234 | 0.240069 | 19 | 0.281321 | 0.223800 | 15669 | 269579.728646 | x z y ... | 9 | srm_clist_xsoma |
207 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 12 | 0.431826 | 0.263013 | 0.405154 | 0.245028 | 18 | 0.283445 | 0.225841 | 15577 | 269349.256254 | x z y ... | 10 | srm_clist_xsoma |
208 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 13 | 0.428078 | 0.254506 | 0.401204 | 0.241857 | 19 | 0.280530 | 0.224944 | 15639 | 268040.635772 | x z y ... | 15 | srm_clist_xsoma |
209 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 14 | 0.421101 | 0.245758 | 0.388756 | 0.235512 | 18 | 0.271367 | 0.223878 | 15726 | 266375.513458 | x z y ... | 17 | srm_clist_xsoma |
210 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 15 | 0.440828 | 0.240761 | 0.418502 | 0.212640 | 20 | 0.293126 | 0.200067 | 15001 | 274330.370798 | x z y ... | 6 | srm_clist_xsoma |
211 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 16 | 0.428239 | 0.268882 | 0.390985 | 0.251393 | 18 | 0.273095 | 0.234146 | 15860 | 264374.387337 | x z y ... | 19 | srm_clist_xsoma |
212 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 17 | 0.429703 | 0.259224 | 0.399407 | 0.242628 | 18 | 0.279115 | 0.225139 | 15528 | 267910.540292 | x z y ... | 16 | srm_clist_xsoma |
213 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 18 | 0.434876 | 0.240284 | 0.425648 | 0.210913 | 19 | 0.298425 | 0.196444 | 14775 | 275634.871257 | x z y ... | 2 | srm_clist_xsoma |
214 | data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data... | 19 | 0.417871 | 0.254619 | 0.381751 | 0.239626 | 17 | 0.266132 | 0.226883 | 15561 | 265211.530660 | x z y ... | 18 | srm_clist_xsoma |
20 rows × 14 columns
# compute the variance and put it in a reasonable format
def vars_xyz_to_std(v):
depth = np.mean(np.sqrt(v['x']))
layer = np.mean(np.sqrt(v['y'] + v['z']))
return pandas.Series({'depth_std': depth, 'layer_std' : layer})
vars = tgtdf['vars'].apply(lambda x : vars_xyz_to_std(x))
tgtdf['depth_std'] = vars['depth_std']
tgtdf['layer_std'] = vars['layer_std']
# perform the join
c = aucsdf.join(tgtdf)
d = c
#d = d[d['filename'].str.contains("retina.1")]
d = c[['retina.1' in s for s, y in c.index]]
d.head()
auc | distvar | srm | ami | ami_coarse | ari | ari_coarse | cluster_n | jaccard | jaccard_coarse | n11 | score | vars | scoreidx | model | depth_std | layer_std | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
filename | chain_i | |||||||||||||||||
data/retina.1.bb.data-fixed_20_100-anneal_slow_1000 | 0 | 0.798857 | False | 0.183250 | 0.193096 | 0.116296 | 0.125572 | 17 | 0.089089 | 0.128823 | 4272 | -234158.760661 | x z y ... | 19 | None | 13.252070 | 22.694408 | |
1 | 0.771818 | False | 0.196149 | 0.190073 | 0.126009 | 0.112358 | 18 | 0.093478 | 0.116116 | 4178 | -231736.163411 | x z y ... | 3 | None | 13.376183 | 21.511076 | ||
2 | 0.795967 | False | 0.187646 | 0.187982 | 0.123853 | 0.116700 | 18 | 0.092125 | 0.117928 | 4091 | -231498.983188 | x z y ... | 2 | None | 12.797314 | 21.494607 | ||
3 | 0.723369 | False | 0.187560 | 0.199280 | 0.108872 | 0.131282 | 18 | 0.084488 | 0.129794 | 3957 | -233347.867884 | x z y ... | 13 | None | 12.895936 | 22.821845 | ||
4 | 0.777250 | False | 0.184247 | 0.190873 | 0.121231 | 0.121629 | 17 | 0.091161 | 0.123114 | 4178 | -232763.976439 | x z y ... | 7 | None | 12.956295 | 21.925124 |
5 rows × 17 columns
e = aucsdf.reset_index()
f = e[e['filename'] == 'data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data-fixed_20_100-anneal_slow_1000']
print len(f)
20
def g_auc(g):
g = g.sort('pred_thold', ascending=False)
# under estimate by using rectangles and anchoring left corner
widths = np.diff(g['fp'])
areas = g['tp'][:-1] * widths
auc = np.sum(areas)
return pandas.Series({'auc' : auc})
ground_truth_aucs = {}
for model in ['ld', 'bb']:
tpl = pickle.load(open('truth.1.%s.predlinks.df.pickle' % model, 'r'))['df']
tpl['tp'] = tpl['t_t'] / tpl['t_tot']
tpl['fp'] = tpl['f_t'] / tpl['f_tot']
truth_auc = g_auc(tpl)['auc']
ground_truth_aucs[model] = truth_auc
df_vb = pickle.load(open("../others/vblpcm.results.pickle", 'r'))
df_vb_data = pickle.load(open("../others/data/data.pickle", 'r'))
vb_cells_with_data = df_vb_data['cells'][df_vb_data['have_edges_i']]
df_vb['ari'] = df_vb['assignments'].map(lambda x: metrics.adjusted_rand_score(irm.util.canonicalize_assignment(vb_cells_with_data['type_id']),
irm.util.canonicalize_assignment(x)))
df_vb_conv = df_vb[df_vb['conv']]
def f(x):
#print x.index.values
c = vb_cells_with_data.copy()
c['cluster'] = np.array(x['assignments'])
a = c.groupby("cluster").var()
return a
v = []
for row_i, row in df_vb.iterrows():
v.append({'vars': f(row)})
vb_vars_df = pandas.DataFrame(v)
df_vb['vars'] = vb_vars_df
df_vb.head()
# compute the variance and put it in a reasonable format
vars = df_vb['vars'].apply(vars_xyz_to_std)
df_vb['depth_std'] = vars['depth_std']
df_vb['layer_std'] = vars['layer_std']
d = d[d['distvar'] == 'xyz']
d_srm = d[d['srm']]
d_ld = d[d['srm'] == False]
d_bb = c[['bb' in s for s, y in c.index]]
d_clist = c[['1.clist' in s for s, y in c.index]]
f = pylab.figure(figsize=(10, 10))
ax = f.add_subplot(1, 1, 1)
ax.scatter(d_srm['ari'], d_srm['auc'], edgecolor='none', c='r', label="conn, dist, syn, depth")
ax.scatter(d_ld['ari'], d_ld['auc'], edgecolor='none', c='b', label="conn, dist")
ax.scatter(df_vb['ari'], df_vb['auc'], edgecolor='none', c='c', label="LPCM", s=30)
ax.scatter(d_bb['ari'], d_bb['auc'], edgecolor='none', c='g', label="conn only")
ax.scatter(d_clist['ari'], d_clist['auc'], edgecolor='none', c='y', s=100, label="soma depth only")
ax.set_xlim(-0.01, 1.0)
ax.set_ylim(0.5, 1.0)
ax.grid()
ax.set_xlabel("Adjusted Rand Index (ARI)")
ax.set_ylabel("Area Under ROC Curve (AUC)")
for model_name, model, ls in [("human, with dist", 'ld', '-.'), ("human, no dist", 'bb', '--')]:
ax.axhline(ground_truth_aucs[model], c='k', label=model_name, linestyle = ls)
ax.legend(loc="lower right")
f.savefig("/tmp/comparison.pdf")
# and then, in my nightmares, a bar graph for konrad
fig = pylab.figure(figsize=(6, 8))
ax1 = fig.add_subplot(2, 1, 1)
keys = ['human \n dist', 'human \n no dist', 'conn only', 'conn, dist', 'conn, dist\nsyn, depth', 'lpcm']
vals = [ground_truth_aucs['ld'], ground_truth_aucs['bb'], np.mean(d_simplebb['auc']), np.mean(d_ld['auc']), np.mean(d_srm['auc']),
np.mean(df_vb['auc'])]
vals_error = [0, 0, np.std(d_simplebb['auc']), np.std(d_ld['auc']), np.std(d_srm['auc']), np.std(df_vb['auc'])]
rects1 = ax1.bar(np.arange(len(keys)) + 0.5, vals, 0.8, color='w', yerr=vals_error)
ax1.set_ylabel('AUC')
ax1.set_ylim(0.0, 1.2)
ax1.axhline(1.0, linestyle='--')
ax1.set_xticks([])
ax2 = fig.add_subplot(2, 1, 2)
ari_vals = [1.0, 1.0, np.mean(d_bb['ari']), np.mean(d_ld['ari']), np.mean(d_srm['ari']), np.mean(df_vb['ari'])]
ari_vals_error = [0, 0, np.std(d_bb['ari']), np.std(d_ld['ari']), np.std(d_srm['ari']), np.std(df_vb['ari'])]
rects1 = ax2.bar(np.arange(len(keys)) + 0.5, ari_vals, 0.8, color='w', yerr=ari_vals_error)
ax2.set_xticks(np.arange(len(keys)) + 0.5+0.4)
ax2.set_xticklabels(keys, rotation=90)
ax2.set_ylim(-0.1, 1.2)
ax2.axhline(1.0, linestyle='--')
ax2.set_ylabel("ARI")
fig.savefig('/tmp/bar.pdf')
# plot type 2 to make k-rad happy
# and then, in my nightmares, a bar graph for konrad
fig = pylab.figure(figsize=(12, 6))
ax1 = fig.add_subplot(1, 1, 1)
keys = ['human \n dist', 'human \n no dist', 'conn only', 'conn, dist', 'conn, dist\nsyn, depth', 'lpcm']
vals = [ground_truth_aucs['ld'], ground_truth_aucs['bb'], np.mean(d_simplebb['auc']), np.mean(d_ld['auc']), np.mean(d_srm['auc']),
np.mean(df_vb['auc'])]
vals_error = [0, 0, np.std(d_simplebb['auc']), np.std(d_ld['auc']), np.std(d_srm['auc']), np.std(df_vb['auc'])]
bar_space = 2.4
bar_width = 0.8
bpos = np.arange(len(keys))*bar_space + 0.5
bpos[0] += bar_width*0.5
bpos[1] += bar_width*0.6
rects1 = ax1.bar(bpos, vals, bar_width, color='r', yerr=vals_error)
ari_vals = [ np.mean(d_bb['ari']), np.mean(d_ld['ari']), np.mean(d_srm['ari']), np.mean(df_vb['ari'])]
ari_vals_error = [np.std(d_bb['ari']), np.std(d_ld['ari']), np.std(d_srm['ari']), np.std(df_vb['ari'])]
ax2 = ax1.twinx()
bpos = np.arange(len(keys))*bar_space + bar_width + 0.1 + 0.5
rects1 = ax2.bar(bpos[2:],
ari_vals, bar_width, color='b', yerr=ari_vals_error)
ax1.set_ylim(-0.02, 1.1)
ax2.set_ylim(-0.02, 1.1)
ax1.axhline(1.0, linestyle='--')
ax1.set_xticks(np.arange(len(keys)) * bar_space + bar_width + 0.55)
ax1.set_xticklabels(keys, rotation=90, fontsize=20)
ax1.set_xlim(0, 15)
ax2.set_ylabel('cluster accuracy (ARI)', color='b', fontsize=30)
for tl in ax2.get_yticklabels():
tl.set_color('b')
ax1.set_ylabel('link accuracy (AUC)', color='R', fontsize=30)
for tl in ax1.get_yticklabels():
tl.set_color('r')
fig.tight_layout()
fig.savefig("allmethods.ari_auc.pdf")
# compute the best chain :
a = d_srm.reset_index()
b = a.groupby(['filename', 'chain_i']).mean()
c = b.sort("ari", ascending=False)
c.head()
auc | srm | ami | ami_coarse | ari | ari_coarse | cluster_n | jaccard | jaccard_coarse | n11 | score | scoreidx | depth_std | layer_std | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
filename | chain_i | ||||||||||||||
data/retina.1.srm_clist_xsoma.3.1.xyz.1.2.data-fixed_20_100-anneal_slow_1000 | 15 | 0.896901 | True | 0.456304 | 0.240620 | 0.462856 | 0.162004 | 19 | 0.327185 | 0.157334 | 14119 | 275024.529538 | 0 | 7.447901 | 34.274542 |
data/retina.1.srm_clist_xsoma.3.0.xyz.0.2.data-fixed_20_100-anneal_slow_1000 | 18 | 0.896758 | True | 0.434876 | 0.240284 | 0.425648 | 0.210913 | 19 | 0.298425 | 0.196444 | 14775 | 275634.871257 | 2 | 9.825360 | 33.784155 |
6 | 0.897575 | True | 0.439053 | 0.244361 | 0.425360 | 0.214022 | 21 | 0.298356 | 0.199531 | 14973 | 276032.968149 | 1 | 8.369991 | 33.475891 | |
data/retina.1.srm_clist_xsoma.3.1.xyz.0.2.data-fixed_20_100-anneal_slow_1000 | 16 | 0.897493 | True | 0.443885 | 0.251158 | 0.423840 | 0.216224 | 22 | 0.297103 | 0.200507 | 14862 | 276116.450670 | 0 | 9.134416 | 34.054052 |
5 | 0.898561 | True | 0.445864 | 0.262367 | 0.423064 | 0.220007 | 19 | 0.296643 | 0.203975 | 15038 | 276004.809569 | 1 | 8.390067 | 34.056878 |
5 rows × 14 columns
truth_space_var = df_vb_data['cells'].groupby('type_id').var()
truth_space_depth_std = np.sqrt(truth_space_var['x'])
truth_space_layer_std = np.sqrt(truth_space_var['y'] + truth_space_var['z'])
truth_space_df = pandas.DataFrame({'layer_std' : truth_space_layer_std, 'depth_std' : truth_space_depth_std})
print truth_space_df.mean()
depth_std 1.981348 layer_std 34.349065 dtype: float64
data_sets = [truth_space_df, d_bb, d_ld, d_srm, df_vb]
dists = np.array([(np.mean(x['layer_std']), np.std(x['layer_std'])) for x in data_sets])
fig = pylab.figure(figsize=(8, 6))
ax1 = fig.add_subplot(1, 1, 1)
keys = ['human', 'conn only', 'conn, dist', 'conn, dist\nsyn, depth', 'lpcm', ]
vals = dists[:, 0]
vals_error = dists[:, 1]
bar_space = 2.4
bar_width = 0.8
rects1 = ax1.bar(np.arange(len(keys))*bar_space + 0.5, vals, bar_width, color='r', yerr=vals_error)
ax2 = ax1.twinx()
dists = np.array([(np.mean(x['depth_std']), np.std(x['depth_std'])) for x in data_sets])
rects1 = ax2.bar(np.arange(len(keys))*bar_space + bar_width + 0.1 + 0.5, dists[:, 0], bar_width, color='b', yerr=dists[:, 1])
ax1.set_xticks(np.arange(len(keys)) * bar_space + bar_width + 0.55)
ax1.set_xticklabels(keys, rotation=90, fontsize=20)
#ax1.set_xlim(0, 10)
ax2.set_ylabel('layer type extent', color='b', fontsize=30)
for tl in ax2.get_yticklabels():
tl.set_color('b')
ax1.set_ylabel('depth type extent', color='R', fontsize=30)
for tl in ax1.get_yticklabels():
tl.set_color('r')
fig.tight_layout()
fig.savefig("allmethods.spatial.pdf")