This Notebook reproduces results of the manuscript http://dx.doi.org/10.1016/j.neuroimage.2014.04.041. It also allows to inspect the data in more detail.
import sys
import os
from collections import defaultdict
from scipy.spatial.distance import pdist,squareform
import scipy.stats as stats
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
import matplotlib
%pylab inline
#the datamaker and ImageAnalysisComponents utilities reside two levels above this notebook.
utility_path = os.path.realpath(os.path.join(os.path.pardir, os.path.pardir))
sys.path.append(utility_path)
from regnmf import datamaker
from regnmf import ImageAnalysisComponents as ia
from regnmf.progress_bar import ProgressBar
sys.modules['datamaker'] = datamaker
import pickle
Define the location of the data. Data can be downloaded from zenodo.org at http://dx.doi.org/10.5281/zenodo.12352 .
datapath = 'Data' #on my machine it's '../../../../Data'
For biological data it is generally difficult to asses factorization performance. To obtain nevertheless a detailed picture on the terms of performance for regularized NMF and sICA we constructed a parameterized surrogate dataset. With this dataset we could address two important questions: First, what is the influence of method inherent parameters and their optimal choice? And second, what is the application domain of both methods with respect to strength of pixel noise and size of measured stimuli set?
load_paperdata = True # load the surrogate dataset that was used in the manuscript
param = {'act_time': [0.01, 0.1, 0.3, 0.8, 1.0, 1.0],
'cov': 0.3,
'latents': 40,
'mean': 0.2,
'no_samples': 50,
'noisevar': 0.2,
'shape': (50, 50),
'width':0.1,
'var': 0.08}
if load_paperdata :
with open(os.path.join(datapath,'surrogate','data.pik'),'rb') as f:
# python 2 -> 3 pickle fix
# https://stackoverflow.com/questions/11305790/pickle-incompatibility-of-numpy-arrays-between-python-2-and-3
data = pickle.load(f, encoding='latin1')
else:
data = datamaker.Dataset(param)
os.mkdir(datapath)
# fontsizes
global_fs = 10
fs_num = 10
matplotlib.rcParams['axes.linewidth']=0.4
matplotlib.rcParams['axes.edgecolor']='k'
matplotlib.rcParams['xtick.major.size']=1
matplotlib.rcParams['xtick.minor.size']=0.5
matplotlib.rcParams['xtick.major.width']=0.3
matplotlib.rcParams['xtick.minor.width']=0.3
matplotlib.rcParams['ytick.major.size']=1
matplotlib.rcParams['ytick.minor.size']=0.5
matplotlib.rcParams['ytick.major.width']=0.3
matplotlib.rcParams['ytick.minor.width']=0.3
matplotlib.rcParams['xtick.labelsize']=global_fs
matplotlib.rcParams['ytick.labelsize']=global_fs
matplotlib.rcParams['text.usetex']=False
matplotlib.rcParams['mathtext.default']='regular'
# parameter for visualizing sources
cmap_tmp = matplotlib.colors.LinearSegmentedColormap.from_list('tmp', ['w',np.array([147,112,219])/256.,'m','b'])
rgb_list = [cmap_tmp(i) for i in np.linspace(0,1,100)]+[plt.cm.jet(i) for i in np.linspace(0,1,101)]
cmap_bases = matplotlib.colors.LinearSegmentedColormap.from_list('bases', rgb_list)
param_imshow = {'cmap':cmap_bases, 'interpolation':'none', 'vmax':1, 'vmin':-1}
# axes layout for time courses
def mydecor(ax):
ax.set_ylabel('activation', fontsize=global_fs, labelpad=-0.2)
ax.yaxis.set_tick_params(labelsize=global_fs, labelright=True, labelleft=False)
ax.yaxis.set_label_position('right')
ax.xaxis.set_tick_params(labelsize=global_fs, pad=2)
ax.set_xticks(range(60, 6*50, 60))
# timecourses plotter
splitnum = param['no_samples']
xparts = np.hsplit(np.arange(splitnum*len(param['act_time'])), splitnum)
def timeplot(ax, data):
ax.plot(data, 'o', ms=1.5, mfc='k', mec='none')
for x in xparts:
ax.plot(x, data[x], '--', dashes=(2,2), linewidth=0.4, color='k')
#ax.set_ylabel('activation', fontsize=global_fs, labelpad=-0.2)
ax.yaxis.set_tick_params(labelsize=global_fs, labelright=True, labelleft=False)
ax.yaxis.set_label_position('right')
ax.xaxis.set_tick_params(labelsize=global_fs, pad=2)
ax.set_xticks(range(60, 6*50, 60))
ax.set_yticks([0,1])
def no_ticks(ax):
ax.set_xticks([])
ax.set_yticks([])
def little_spines(ax):
ax.spines['top'].set_color('none')
ax.spines['right'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
def no_spines(ax):
ax.spines['top'].set_color('none')
ax.spines['right'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
no_ticks(ax)
To obtain useful conclusions, we've constructed sources resembling the main characteristics of the biological case: Glomeruli are arranged side by side with slightly overlapping spatial signal distribution (Fig. 1a) The response spectra of surrogate glomeruli are narrowly tuned and correlated (Fig. 1b) and each glomeruli rises to peak activation with a model time-course (Fig. 1c). The data to enter factorization is the concurrent observation of forty glomeruli in response to $n_{stim}$ stimuli (e.g. odours) corrupted by additional pixel noise $\sigma_{\text{noise}}$.
fig_dim = np.array((3.54, 1.1))*4
fig1 = plt.figure(figsize=fig_dim)
fig_ratio = fig_dim[0]/fig_dim[1]
panel_width = 0.18
panel_bottom = 0.23
# ================================================================================
# Panel (a)
# ================================================================================
ax = fig1.add_axes([0.05, panel_bottom, panel_width, panel_width*fig_ratio])
ax.text(0.08, 1.03, '(a)', transform=ax.transAxes,
fontsize=fs_num, fontweight='bold', va='bottom', ha='right')
# contours
for c in data.spt_sources:
ax.contour(c.reshape((50,50))[::-1], [0.5] ,colors=['k'], label='0.3', linewidths=0.4)
# centers
p = ax.scatter(np.array(data.points)[:param['latents'],1], 49-np.array(data.points)[:param['latents'],0],
marker='x', s=4, label='max', linewidths=0.4)
ax.set_xlim([0.5,49.5])
ax.set_ylim([-0.5,48.5])
ax.set_xticks([])
ax.set_yticks([])
# =================================================================================
# Panel (b)
# =================================================================================
leftpos = 0.365
ax = fig1.add_axes([leftpos, panel_bottom,0.2, panel_width*fig_ratio])
ax.text(-0.3, 1.03, '(b)', transform=ax.transAxes,
fontsize=fs_num, fontweight='bold', va='bottom', ha='right')
# plot marginal
ax.plot(np.arange(1E-2,4,0.001), datamaker.adjusted_gamma(param['mean'], param['var']).pdf(np.arange(1E-2,4,0.001)))
ax.set_yscale('log')
ax.set_xlabel('$a_{peak}$', fontsize=global_fs, labelpad=0.5)
ax.set_xticks([0,1,2,3,4])
ax.set_ylabel('pdf', fontsize=global_fs, labelpad=0.5)
# plot covariance
ax = fig1.add_axes([leftpos+0.1, 0.5, 0.12, 0.12*fig_ratio])
im = ax.imshow(data.cov, interpolation='none', vmin=0, vmax=1, cmap=plt.cm.binary)
ax.set_xticks([])
ax.set_yticks([])
axbar = fig1.add_axes([leftpos+0.11+0.12,0.5,0.012,0.12*fig_ratio])
# plot covariance colorscale
cbar = fig1.colorbar(im, axbar)
cbar.set_ticks([0,1])
cbar.set_label('$\\rho$', size=global_fs, labelpad=-0.5)
# ===================================================================================
# panel (c)
# ===================================================================================
ax = fig1.add_axes([0.77,panel_bottom,0.2,panel_width*fig_ratio])
ax.text(-0.3, 1.03, '(c)', transform=ax.transAxes,
fontsize=fs_num, fontweight='bold', va='bottom', ha='right')
ax.plot(param['act_time'], 'o--', ms=2, mfc='k', linewidth=0.6, color='k', dashes=(2,2))
ax.set_xlabel('sample points', fontsize=global_fs, labelpad=1)
ax.set_ylim((-0.05, 1.05))
ax.set_ylabel('% of $a_{peak}$', fontsize=global_fs, labelpad=-3)
ax.set_yticks([0,0.5,1])
ax.set_yticklabels([0,50,100])
plt.show()
Fig. 1 Surrogate data (a) 40 Gaussian shaped sources were randomly placed on a regular $9\times 9$ grid in a $50\times 50\text{px}$ image. Crosses mark centres of pixel participation and lines indicate half maximum (HM). (b) The peak activations for each stimulus were drawn from a gamma distribution ($\mu=0.2$, $\sigma=0.28$). Inset shows covariance $\rho$ introduced via a Gaussian copula into the sources' activation. (c) Each stimulus activation was extended to a 6 point model time-course.
fig_dim = np.array((3.54, 1.8))*4
fig = plt.figure(figsize=fig_dim, dpi=200)
for i in range(50):
ax = fig.add_subplot(5,10,i+1)
ax.imshow(data.observed[(i+1)*len(param['act_time'])-1].reshape(param['shape']), interpolation='none', vmin=-2.5, vmax=2.5)
ax.set_xlabel('t=%s'%((i+1)*6), size=global_fs, labelpad=0.5)
ax.set_xticks([])
ax.set_yticks([])
plt.show()
Fig. 2 Observation of the combined signal at peak activation for different stimuli. Signal is corrupted with pixel noise ($\sigma_{\text{noise}}=0.2$).
We started our analysis with a dataset roughly mimicking the properties of our IOS data with $n_{stim}=50$ stimulus observations and a noise level of $\sigma_{noise}=0.2$. Figure 3 shows exemplary recovered sources from both regularized NMF ($\alpha_{sm}=2$, $\alpha_{sp}=0.5$) and sICA, illustrating the general characteristics of the methods. Regularized NMF indeed showed the desired properties of a localized, sparse and smooth pixel participation, accurately reproducing the spatial and temporal characteristics of the source. In contrast plain sICA (with no additional processing applied) generates more holistic pixel participations, containing global noise contributions besides the local source contribution. This implies a more noisy reconstruction of the activation courses by sICA, especially for weaker signals.
# general NMF param
anal_param = {'sparse_param': 0.5,
'factors': 80,
'smooth_param': 2,
'init':'convex',
'sparse_fct':'global_sparse',
'verbose':0
}
input_data = ia.TimeSeries(data.observed, shape=param['shape'])
nmf = ia.NNMF(maxcount=50, num_components=anal_param['factors'], **anal_param)
nmf_out = nmf(input_data)
sica = ia.sICA(num_components=anal_param['factors'])
sica_out = sica(input_data)
nmf_match, nmf_cor, _ = data.cor2source(nmf_out)
sica_match, sica_cor, _ = data.cor2source(sica_out)
figsize = np.array((20,40))
fig = plt.figure(figsize=figsize)
gs = gridspec.GridSpec(40,3)
gs.update(left = 0.01, top = 0.96, bottom = 0.01)
for source_ind in range(40):
gs_temp = matplotlib.gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=gs[source_ind,0], width_ratios=[1,4], wspace=0.01)
ax = fig.add_subplot(gs_temp[0])
im = ax.imshow(data.spt_sources[source_ind].reshape((50,50)), **param_imshow)
ax.set_axis_off()
ax = fig.add_subplot(gs_temp[1])
timeplot(ax, data.activation[:,source_ind])
gs_temp = matplotlib.gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=gs[source_ind,1], width_ratios=[1,4], wspace=0.01)
ax = fig.add_subplot(gs_temp[0])
im = ax.imshow(nmf_out.base.shaped2D()[nmf_match[source_ind]], **param_imshow)
ax.set_axis_off()
ax = fig.add_subplot(gs_temp[1])
timeplot(ax, nmf_out._series[:,nmf_match[source_ind]])
ax.text(10, ax.get_ylim()[1], r'$r^{tmp} = %.2f$'%nmf_cor[source_ind], size=7, va='top')
gs_temp = matplotlib.gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=gs[source_ind,2], width_ratios=[1,4], wspace=0.01)
ax = fig.add_subplot(gs_temp[0])
im = ax.imshow(sica_out.base.shaped2D()[sica_match[source_ind]], **param_imshow)
ax.set_axis_off()
ax = fig.add_subplot(gs_temp[1])
timeplot(ax, sica_out._series[:,sica_match[source_ind]])
ax.text(10, ax.get_ylim()[1], r'$r^{tmp} = %.2f$'%sica_cor[source_ind], size=7, va='top')
# colorbar
axbar = fig.add_axes([0.01, 0.992, 0.2, 0.003])
cbar = fig.colorbar(im, axbar, orientation='horizontal')
cbar.set_ticks([-1,0,1])
cbar.set_label('pixel participation', size=global_fs, labelpad=2)
cbar.ax.xaxis.set_tick_params(labelsize=global_fs, pad=2)
fig.text(0.15,0.97,'source')
fig.text(0.45,0.97,'NMF')
fig.text(0.75,0.97,'sICA')
plt.show()