In this IPython notebook, the simulations in the in Limbo paper will be replicated in a easy readable format. This code is adapted from the original code used for the paper on github.
# import rpy
from rpy2.robjects import r
from rpy2.robjects import FloatVector, IntVector, ListVector
# Import neuRosim
r['library']('neuRosim')
Loading required package: deSolve This is neuRosim 0.2-10 neuRosim is BETA software! Please report any bugs.
<StrVector - Python:0x4e32f80 / R:0x4369548> [str, str, str, ..., str, str, str]
def simulate_data(onsets, # Onsets per condition, per region
nscans=100, # Number of scans
TR=2.0, # Tr
region_centers = [[20, 20], [60, 60]], # Centers of regions
region_radiuses = [15, 10], # Size of regions
effect_sizes = [[2.0], [.75]], # Effect sizes of conditions per region per
# condition [[r1c1, r1c2], [r2c1, r2c2]]
durations = [[2.0], [2.0]], # Durations per region, per condition
SNR = 15, # SNR
FWHM = 5, # Amount of smoothing
n_regions = 2, # Number of regions
fading = 0.1, # Amount of fading from regions to outside
size = [80, 80], # Total size of images
):
# Get total duration of scan
total_time = nscans * TR
# Convert input to R objects
onsets = [[FloatVector(ol_cond) for ol_cond in ol_region] for ol_region in onsets]
region_centers = [FloatVector(rc) for rc in region_centers]
size = IntVector(size)
# Set up design (neuRosim)
design = r['simprepTemporal'](regions=n_regions,
onsets=onsets,
durations=durations,
TR=TR,
totaltime=total_time,
effectsize=effect_sizes)
# Set up the spatial map (neuRosim)
spatial = r['simprepSpatial'](regions=n_regions,
coord=region_centers,
radius=region_radiuses,
form="sphere",
fading=fading)
# Get the simulated tata (neuRosim)
sim_data = r['simVOLfmri'](design=design,
image=spatial,
SNR=SNR,
noise="mixture",
dim=size,
w=FloatVector([0.05,0.1,0.01,0.09,0.05,0.7]),
FWHM=FWHM,
spat='gammaRF')
return np.array(sim_data)
Now we can get some simulated data. For simplicity we only have one task and we will contrast this against the implicit baseline
Let's make two regions: a 'blue' region with effect size 50, a 'green' one with 25.
We do 500 scans with TR=2, corresponding to a pretty long scan of 28 minutes.
There's a trial every 15 seconds
n_scans = 750
TR = 2.0
onsets = np.arange(10, n_scans*TR, 15)
data = simulate_data([[onsets]] *2, # For both regions, for one condition...
effect_sizes=[[3.0], [1.5]],
nscans=n_scans,
FWHM=5,
fading=0.01)
Now plot it.
import seaborn as sns # plotting library
plt.figure(figsize=(16,6))
plt.subplot(121)
plt.grid(False)
plt.imshow(data.mean(-1).T, cmap=plt.cm.hot)
regions = [[20, 20], [40,40], [60, 60]]
current_palette = sns.color_palette()
effect_sizes = [5., 0, 1.5]
for region, color in zip(regions, current_palette):
plt.scatter(region[0], region[1], c=color, s=150)
plt.title('Mean bold activity')
plt.subplot(122)
for region, color, effect_size in zip(regions, current_palette, effect_sizes):
# Only first 100 seconds
plt.title('Time course in three voxels')
plt.plot(data[region[0],region[1],:50], c=color)
plt.xlabel('Time in TRs')
plt.axhline(effect_size, c=color, ls='--')
Now we have to set up a design matrix and cut it in pieces. For this we use nipy.
import nipy.modalities.fmri.design_matrix as dm
from nipy.modalities.fmri.experimental_paradigm import EventRelatedParadigm
# Set up paradigm
paradigm = EventRelatedParadigm(['task']*len(onsets), onsets)
# Get the scanning times of every scan
TR = 2.0
frametimes = np.arange(0, data.shape[-1] * TR, TR)
X, names = dm.dmtx_light(frametimes, paradigm, drift_model='polynomial',
hfcut=128, hrf_model='canonical')
Now we have a design matrix X with $n = 3$ columns (regressors) and $m = 500$ rows (scans). The first regresor is the expected task-related activity, the second regressor is a linear increase to correct for scanner drift, the third regressor is a constant.
for label, regressor in zip(names, X.T):
plt.plot(regressor[:100], label=label)
plt.legend(loc='best', fontsize=14)
<matplotlib.legend.Legend at 0x20b34d10>
Now we cut the design matrix and data in $q = 25$ pieces of 20 scans.
q = 25
cuts = np.linspace(0, data.shape[-1], q+1)
data_cut = np.array([data[:, :, cuts[i]:cuts[i+1]] for i in np.arange(q)])
X_cut = np.array([X[cuts[i]:cuts[i+1], :] for i in np.arange(q)])
Now we get $Y_ik$ for every voxel $i$ and replication $k$
Now we apply equation 6 from the paper:
$\hat{\beta}_{i, \mbox{sandwich}} = \frac{1}{q} \sum_{k=1}^{q} (X_k'X_k)^{-1}X_k'Y_{ik}$
data.shape
(80, 80, 750)
beta = np.zeros((X.shape[1],)+ data.shape[:-1]) # (num_regressors, size_x, size_y)
predicted = np.zeros_like(data_cut)
residuals = np.zeros_like(data_cut)
for i, x in enumerate(X_cut):
# tensortdot is necessary to indicate which axes need to be 'summed' and get beta
# beta += np.tensordot(np.linalg.pinv(x.T.dot(x)).dot(x.T), Y_.T, (1, 0))
beta += np.einsum('ij,klj', np.linalg.pinv(x.T.dot(x)).dot(x.T), data_cut[i])
beta /= q
for i, x in enumerate(X_cut):
# Now we can get predicted timecourses
predicted[i] = np.tensordot(beta, x, (0, 1))
# ... and residuals
residuals[i] = data_cut[i] - predicted[i]
_ = plt.plot(data_cut[0][20, 20], label='data')
_ = plt.plot(predicted[0][20, 20], label='predicted')
plt.legend()
<matplotlib.legend.Legend at 0x139d7890>
When we plot the residuals of the individual chunks, we can observe autocorrelation
plt.figure(figsize=(18,6))
plt.subplot(141)
_ = plt.plot(residuals[0, 20, 20, :].T, c='k')
plt.subplot(142)
plt.acorr(residuals[0, 20, 20, :])
plt.subplot(143)
_ = plt.plot(residuals[1, 20, 20, :].T, c='k')
plt.subplot(144)
plt.acorr(residuals[1, 20, 20, :])
(array([-10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), array([ 0.29329202, 0.32970672, 0.30482602, 0.41938729, 0.37452268, 0.25377099, 0.31764285, 0.43658255, 0.40897963, 0.46063142, 1. , 0.46063142, 0.40897963, 0.43658255, 0.31764285, 0.25377099, 0.37452268, 0.41938729, 0.30482602, 0.32970672, 0.29329202]), <matplotlib.collections.LineCollection at 0x193ab050>, <matplotlib.lines.Line2D at 0x1966b210>)
Now we will estimate the covariance structure of these residuals (to use as a correction for the autocorrelation):
$W_i = \frac{1}{q - 1} \sum_{k=1}^{q} \hat{r_{ik}}\hat{r}'_{ik}$
W = np.zeros((residuals.shape[-1],) * 2 + data.shape[:-1]) # (length_block, length_block, width, height)
for residual in residuals:
W += np.einsum('...i, ...j->ij...', residual, residual) # do outer product over last two dimensions, broadcast the rest
W /= (q - 1)
(W < 0).sum()
263482
Having $W$, we can calculate the variance estimate using:
$\hat{V}_{i, \mbox{sandwich}} = \frac{1}{q} \sum_{k=1}^{q} (X_k'X_k)^{-1}X_k' W_i X_k(X_k'X_k)^{-1}$
V = np.zeros((X.shape[1],) * 2 + data.shape[:-1]) # (num_regresor, num_regressor, width, height)
for i, x in enumerate(X_cut):
sandwich1 = np.linalg.pinv(x.T.dot(x)).dot(x.T)
sandwich2 = x.dot(np.linalg.pinv(x.T.dot(x)))
# Apply first part sandwich
print sandwich1.shape, W.shape
v = np.tensordot(sandwich1, W, axes=((1), (0)))
print v.shape
# Apply second part
print sandwich2.shape, v.shape
v = np.tensordot(v, sandwich2, (1, 0))
# Roll axis to get correct shape
v = np.rollaxis(v, -1, 1)
V += v
V /= q
(3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80) (3, 30) (30, 30, 80, 80) (3, 30, 80, 80) (30, 3) (3, 30, 80, 80)
residual.shape
(80, 80, 30)
(V[2, 2, :, :].ravel() < 0 ).sum()
0
W.shape, residuals.shape
((30, 30, 80, 80), (25, 80, 80, 30))
See what parameter estimate $\beta$ and variance $V$ of first (task-)regressor looks like
plt.figure(figsize=(16,6))
plt.subplot(121)
plt.title('beta 1 values')
plt.imshow(beta[0].T, cmap=plt.cm.hot)
plt.grid(False)
plt.colorbar()
plt.subplot(122)
plt.title('Variance estimates beta 1')
plt.imshow(V[0, 0].T, cmap=plt.cm.hot)
plt.grid(False)
plt.colorbar()
<matplotlib.colorbar.Colorbar instance at 0x2284a0e0>
Now we can calculate t-values and threshold at $\alpha = 0.01$
import scipy as sp
from scipy import stats
dof = q - 1
t = beta[0] / (np.sqrt(V[0,0]))
threshold = sp.stats.t(dof).ppf(.99)
plt.figure(figsize=(14,6))
plt.subplot(121)
plt.title('t-map')
plt.imshow(t.T, cmap=plt.cm.hot, interpolation='nearest')
plt.grid(False)
plt.subplot(122)
plt.title('Thresholded at t=%.2f' % threshold)
plt.imshow(np.ma.masked_array(t, t < threshold).T, cmap=plt.cm.hot, interpolation='nearest')
plt.grid(False)
Convert to z-values
z = sp.stats.norm.ppf(sp.stats.t(dof).cdf(t))
# Correct for numerical rounding errors
z[z == np.inf] = z[z != np.inf].max()
plt.imshow(z.T, cmap=plt.cm.hot, interpolation='nearest')
plt.grid(False)
plt.colorbar()
<matplotlib.colorbar.Colorbar instance at 0xcd6f7e8>
Use fsl and Nipype-interface to do cluster-based thresholding at z=3.1
from nipype.interfaces import fsl
import nibabel as nb
nb.save(nb.Nifti1Image(z, np.identity(4)), 'z_simulation.nii.gz')
nb.save(nb.Nifti1Image(np.ones_like(z), np.identity(4)), 'z_mask.nii.gz')
dlh = fsl.SmoothEstimate(zstat_file='z_simulation.nii.gz',
mask_file='z_mask.nii.gz').run().outputs.dlh
cluster = fsl.Cluster()
cluster.inputs.in_file = 'z_simulation.nii.gz'
cluster.inputs.threshold = 3.1
cluster.inputs.dlh = dlh
cluster.inputs.out_threshold_file = 'z_simulation_threshold.nii.gz'
_ = cluster.run()
INFO:interface:stdout 2014-10-29T18:00:12.513719:DLH 0.0753853 INFO:interface:stdout 2014-10-29T18:00:12.513719:VOLUME 6400 INFO:interface:stdout 2014-10-29T18:00:12.513719:RESELS 36.7789 INFO:interface:stdout 2014-10-29T18:00:12.694436:Cluster Index Voxels MAX MAX X (vox) MAX Y (vox) MAX Z (vox) COG X (vox) COG Y (vox) COG Z (vox) INFO:interface:stdout 2014-10-29T18:00:12.694436:5 7 3.64 22 18 0 21 18.7 0 INFO:interface:stdout 2014-10-29T18:00:12.694436:4 2 3.5 20 26 0 20 26.5 0 INFO:interface:stdout 2014-10-29T18:00:12.696094:3 2 3.41 23 13 0 23 12.5 0 INFO:interface:stdout 2014-10-29T18:00:12.696094:2 1 3.12 18 18 0 18 18 0 INFO:interface:stdout 2014-10-29T18:00:12.696094:1 1 3.12 19 12 0 19 12 0
Plot thresholded map and find 'Least significantly activated voxel' (comparison voxel).
Plot a histogram to get a flavour of how extreme this voxel is.
z_threshold = nb.load('z_simulation_threshold.nii.gz').get_data()
plt.figure(figsize=(16,6))
plt.subplot(121)
comparison_index = np.unravel_index(np.ma.argmin(np.ma.masked_array(z_threshold, z_threshold == 0)), z.shape)
plt.imshow(z_threshold.T,
cmap=plt.cm.hot)
plt.grid(False)
plt.scatter(*comparison_index, marker='o', s=500, c='r', lw=5, facecolors='none', label='comparison voxel')
plt.title('Comparison voxel: %s, (z=%.2f)' % (comparison_index, z_threshold[comparison_index]))
plt.subplot(122)
n, bins, patches = plt.hist(beta[0].ravel(), bins=75)
## MAKE HISTOGRAM LOOK NICE
cm = plt.cm.get_cmap('hot')
bin_centers = 0.5 * (bins[:-1] + bins[1:])
# scale values to interval [0,1]
col = bin_centers - min(bin_centers)
col /= max(col)
for c, p in zip(col, patches):
plt.setp(p, 'facecolor', cm(c))
plt.title('effect size')
plt.axvline(beta[(0,) + comparison_index], c='r', label='comparison voxel')
plt.legend()
<matplotlib.legend.Legend at 0x276a39d0>
( You can actually see the distribution is bimodal and the threshold maybe a bit small)
Now we will have to use the Sandwich to get the covariance between this comparison voxel and every other voxel, using
$\hat{V}_{ij} = \frac{1}{q} \sum_{k=1}^{q} (X_k'X_k)^{-1}X_k' W_{ij} X_k(X_k'X_k)^{-1},$ (formula 7)
with $W_{ij} = \frac{1}{q - 1} \sum_{k=1}^{q} \hat{r}_{ik}\hat{r}'_{jk}$ (formula 8) .
residuals_comparison_voxel = residuals[(Ellipsis,) + comparison_index + (Ellipsis,)]
W_ij = np.zeros((residuals.shape[-1],) * 2 + data.shape[:-1]) # (length_block, length_block, width, height)
for comp_residual, residual in zip(residuals_comparison_voxel, residuals):
W_ij += np.einsum('...i, ...j->ij...', comp_residual, residual) # do outer product over last two dimensions, broadcast the rest
W_ij /= (q - 1)
V_ij = np.zeros((X.shape[1],) * 2 + data.shape[:-1]) # (num_regresor, num_regressor, width, height)
for i, x in enumerate(X_cut):
sandwich1 = np.linalg.pinv(x.T.dot(x)).dot(x.T)
sandwich2 = x.dot(np.linalg.pinv(x.T.dot(x)))
# Apply first part sandwich
v = np.tensordot(sandwich1, W_ij, axes=((1), (0)))
# Apply second part
v = np.tensordot(v, sandwich2, (1, 0))
# Roll axis to get correct shape
v = np.rollaxis(v, -1, 1)
V_ij += v
V_ij /= q
We see that the covariance for the task regressor is especially high around the comparison voxel.
plt.imshow(V_ij[0, 0].T, cmap=plt.cm.hot)
plt.scatter(*comparison_index, marker='o', s=500, c='w', lw=5, facecolors='none', label='comparison voxel')
plt.grid(False)
Now we can do the in limbo t-test: $t(c \beta_i - c \beta_v) = \frac{c \beta_i - c \beta_v}{\sqrt{c\hat{V_i}c' + c\hat{V_v}c' - 2 c\hat{V_iv}c'}}.$ (equation 11)
c = np.array([[1, 0, 0]])
t_in_limbo = np.tensordot(c, beta, (1, 0)).squeeze()
t_in_limbo -= np.tensordot(c, beta[(Ellipsis,) + comparison_index], (1, 0)).squeeze()
V1 = np.tensordot(np.tensordot(c, V, (1,0)), c.T, (1, 0))
V2 = np.tensordot(np.tensordot(c, V[(Ellipsis, Ellipsis,) + comparison_index], (1,0)), c.T, (1, 0))
V3 = np.tensordot(np.tensordot(c, V_ij, (1,0)), c.T, (1, 0))
t_in_limbo /= np.sqrt(V1 + V2 - 2*V3).squeeze()
# t_in_limbo /= (np.sqrt(V1 + V2 - 2*V3).squeeze())
plt.imshow(t_in_limbo, cmap=plt.cm.hot)
plt.colorbar()
plt.grid(False)
plt.scatter(*comparison_index, marker='o', s=500, c='w', lw=5, facecolors='none', label='comparison voxel')
<matplotlib.collections.PathCollection at 0x28334b90>
plt.imshow(t_in_limbo, cmap=plt.cm.hot)
plt.colorbar()
plt.grid(False)
plt.scatter(*comparison_index, marker='o', s=500, c='w', lw=5, facecolors='none', label='comparison voxel')
<matplotlib.collections.PathCollection at 0x28790410>
Get the thresold for $\alpha = 0.05$
threshold = sp.stats.t(dof).ppf(0.05)
threshold
-1.7108820799094282
plt.figure(figsize=(16,6))
cmap = plt.cm.hot
cmap.set_bad('black')
plt.subplot(131)
plt.imshow(z, cmap=cmap, vmin=z.min(), vmax=z.max())
plt.grid(False)
plt.title('Unthresholded z-map', fontsize=20)
plt.subplot(132)
plt.imshow(np.ma.masked_array(z_threshold, z_threshold == 0), cmap=cmap, vmin=z.min(), vmax=z.max())
plt.grid(False)
plt.title('Thresholded z-map', fontsize=20)
plt.subplot(133)
plt.imshow(np.ma.masked_where((z_threshold != 0) | ((z_threshold == 0) & (t_in_limbo < threshold)), z),
cmap=cmap,
vmin=z.min(), vmax=z.max() )
plt.title('In limbo regions', fontsize=20)
plt.grid(False)