In this notebook, we compare the accuracy of the DTM and the SFM to each other directly in each b value.
First, we import some modules:
import scipy.stats as stats
import nibabel as ni
import osmosis.viz.mpl as viz
import osmosis.utils as ozu
import osmosis.model.sparse_deconvolution as ssd
import osmosis.model.dti as dti
import osmosis.model.analysis as oza
We set the data-path to point to the installation-specific location where data might be found:
import os
import osmosis as oz
import osmosis.io as oio
oio.data_path = os.path.join(oz.__path__[0], 'data')
Point to the data-files of this subject:
subject = 'SUB1'
data_1k_1, data_1k_2 = oio.get_dwi_data(1000, subject)
data_2k_1, data_2k_2 = oio.get_dwi_data(2000, subject)
data_4k_1, data_4k_2 = oio.get_dwi_data(4000, subject)
Get the white-matter mask:
mask = np.zeros(ni.load(data_1k_1[0]).shape[:3])
wm_nifti = ni.load(oio.data_path + '/%s/%s_wm_mask.nii.gz'%(subject, subject)).get_data()
mask[np.where(wm_nifti==1)] = 1
We set the solver parameters for Elastic Net according to the calculations in the AppendixA notebook:
# This is the best according to rRMSE across bvals:
l1_ratio = 0.8
alpha = 0.0005
solver_params = dict(l1_ratio=l1_ratio, alpha=alpha, fit_intercept=False, positive=True)
Initialize the class instances of the SFM objects. Use AD/RD derived from the corpus callosum ROI distribution for each b value. Parameters are not saved, or read from a file (hence params_file=temp
).
ad_rd = oio.get_ad_rd(subject, 1000)
SD_1k_1 = ssd.SparseDeconvolutionModel(*data_1k_1, mask=mask, solver_params=solver_params, params_file = 'temp', axial_diffusivity=ad_rd[0]['AD'], radial_diffusivity=ad_rd[0]['RD'])
SD_1k_2 = ssd.SparseDeconvolutionModel(*data_1k_2, mask=mask, solver_params=solver_params, params_file = 'temp', axial_diffusivity=ad_rd[1]['AD'], radial_diffusivity=ad_rd[1]['RD'])
ad_rd = oio.get_ad_rd(subject, 2000)
SD_2k_1 = ssd.SparseDeconvolutionModel(*data_2k_1, mask=mask, solver_params=solver_params, params_file = 'temp', axial_diffusivity=ad_rd[0]['AD'], radial_diffusivity=ad_rd[0]['RD'])
SD_2k_2 = ssd.SparseDeconvolutionModel(*data_2k_2, mask=mask, solver_params=solver_params, params_file = 'temp', axial_diffusivity=ad_rd[1]['AD'], radial_diffusivity=ad_rd[1]['RD'])
ad_rd = oio.get_ad_rd(subject, 4000)
SD_4k_1 = ssd.SparseDeconvolutionModel(*data_4k_1, mask=mask, solver_params=solver_params, params_file = 'temp', axial_diffusivity=ad_rd[0]['AD'], radial_diffusivity=ad_rd[0]['RD'])
SD_4k_2 = ssd.SparseDeconvolutionModel(*data_4k_2, mask=mask, solver_params=solver_params, params_file = 'temp', axial_diffusivity=ad_rd[1]['AD'], radial_diffusivity=ad_rd[1]['RD'])
Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b1000_1.bvals Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b1000_1.bvecs Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b1000_2.bvals Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b1000_2.bvecs Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b2000_1.bvals Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b2000_1.bvecs Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b2000_2.bvals Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b2000_2.bvecs Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b4000_1.bvals Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b4000_1.bvecs Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b4000_2.bvals Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b4000_2.bvecs
Likewise, initialize the DTM objects with the same data
TM_1k_1 = dti.TensorModel(*data_1k_1, mask=mask, params_file='temp')
TM_1k_2 = dti.TensorModel(*data_1k_2, mask=mask, params_file='temp')
TM_2k_1 = dti.TensorModel(*data_2k_1, mask=mask, params_file='temp')
TM_2k_2 = dti.TensorModel(*data_2k_2, mask=mask, params_file='temp')
TM_4k_1 = dti.TensorModel(*data_4k_1, mask=mask, params_file='temp')
TM_4k_2 = dti.TensorModel(*data_4k_2, mask=mask, params_file='temp')
Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b1000_1.bvals Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b1000_1.bvecs Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b1000_2.bvals Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b1000_2.bvecs Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b2000_1.bvals Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b2000_1.bvecs Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b2000_2.bvals Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b2000_2.bvecs Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b4000_1.bvals Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b4000_1.bvecs Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b4000_2.bvals Loading from file: /home/arokem/usr/lib/python2.7/site-packages/osmosis/data/SUB1/SUB1_b4000_2.bvecs
Calculate the rRMSE for each b value and each model, based on cross-prediction: a model is fit to one data-set and used to predict the other data-set from the same b value. The rRMSE is the average of the two folds of this process (1=>2 and 2=>1).
dtm_rrmse_1k = oza.cross_predict(TM_1k_1, TM_1k_2)
dtm_rrmse_2k = oza.cross_predict(TM_2k_1, TM_2k_2)
dtm_rrmse_4k = oza.cross_predict(TM_4k_1, TM_4k_2)
sfm_rrmse_1k = oza.cross_predict(SD_1k_1, SD_1k_2)
sfm_rrmse_2k = oza.cross_predict(SD_2k_1, SD_2k_2)
sfm_rrmse_4k = oza.cross_predict(SD_4k_1, SD_4k_2)
SparseDeconvolutionModel.model_params [****************100%******************] 68693 of 68694 complete
/home/arokem/usr/local/lib/python2.7/site-packages/numexpr/necompiler.py:716: DeprecationWarning: using `oa_ndim == 0` when `op_axes` is NULL is deprecated. Use `oa_ndim == -1` or the MultiNew iterator for NumPy <1.8 compatibility return compiled_ex(*arguments, **kwargs) /home/arokem/usr/local/lib/python2.7/site-packages/numexpr/necompiler.py:716: DeprecationWarning: using `oa_ndim == 0` when `op_axes` is NULL is deprecated. Use `oa_ndim == -1` or the MultiNew iterator for NumPy <1.8 compatibility return compiled_ex(*arguments, **kwargs) /home/arokem/usr/local/lib/python2.7/site-packages/numexpr/necompiler.py:716: DeprecationWarning: using `oa_ndim == 0` when `op_axes` is NULL is deprecated. Use `oa_ndim == -1` or the MultiNew iterator for NumPy <1.8 compatibility return compiled_ex(*arguments, **kwargs)
In each b value, we will plot a 2D, or image histogram of all voxels and their rRMSE values in the two models:
idx = np.logical_and(np.logical_and(np.isfinite(dtm_rrmse_1k), dtm_rrmse_1k<1.5), np.isfinite(sfm_rrmse_1k))
fig, cbar = viz.scatter_density(np.hstack([0.5, dtm_rrmse_1k[idx], 1.5]), np.hstack([0.5, sfm_rrmse_1k[idx], 1.5]),
return_cbar=True, vmin=0, vmax=3, cmap=matplotlib.cm.gray_r)
fig.axes[0].plot([0,100],[100,0], 'k--')
fig.axes[0].plot([100,0],[50,50], 'k--')
fig.axes[0].plot([50,50],[100,0], 'k--')
cbar.set_ticks([0,1,2,3])
cbar.set_ticklabels([1,10,100,1000])
fig.set_size_inches([10,10])
fig.savefig('figures/scatter_rrmse_1k.svg')
fig_hist = viz.probability_hist(dtm_rrmse_1k[idx] - sfm_rrmse_1k[idx])
fig_hist.set_size_inches([8,6])
-c:1: RuntimeWarning: invalid value encountered in less /home/arokem/usr/lib/python2.7/site-packages/osmosis/viz/mpl.py:353: RuntimeWarning: divide by zero encountered in log10 imax = ax.matshow(np.log10(np.flipud(data_arr.T)), cmap=cmap, **kwargs)
idx = np.logical_and(np.logical_and(np.isfinite(dtm_rrmse_2k), dtm_rrmse_2k<1.5), np.isfinite(sfm_rrmse_2k))
fig, cbar = viz.scatter_density(np.hstack([0.5, dtm_rrmse_2k[idx], 1.5]), np.hstack([0.5, sfm_rrmse_2k[idx], 1.5]),
return_cbar=True, vmin=0, vmax=3, cmap=matplotlib.cm.gray_r)
fig.axes[0].plot([0,100],[100,0], 'k--')
fig.axes[0].plot([100,0],[50,50], 'k--')
fig.axes[0].plot([50,50],[100,0], 'k--')
cbar.set_ticks([0,1,2,3])
cbar.set_ticklabels([1,10,100,1000])
fig.set_size_inches([10,10])
fig.savefig('figures/scatter_rrmse_2k.svg')
idx = np.logical_and(np.logical_and(np.isfinite(dtm_rrmse_4k), dtm_rrmse_4k<1.5), np.isfinite(sfm_rrmse_4k))
fig, cbar = viz.scatter_density(np.hstack([0.5, dtm_rrmse_4k[idx], 1.5]), np.hstack([0.5, sfm_rrmse_4k[idx], 1.5]),
return_cbar=True, vmin=0, vmax=3, cmap=matplotlib.cm.gray_r)
fig.axes[0].plot([0,100],[100,0], 'k--')
fig.axes[0].plot([100,0],[50,50], 'k--')
fig.axes[0].plot([50,50],[100,0], 'k--')
cbar.set_ticks([0,1,2,3])
cbar.set_ticklabels([1,10,100,1000])
fig.set_size_inches([10,10])
fig.savefig('figures/scatter_rrmse_4k.svg')
We define a function used for permutation testing of the statistical significance of the differences between the two models.
def permutation_test_rel(arr1, arr2, n_perms=1000):
"""
Test the difference between two nd arrays
"""
# Linearize and get the finite elements:
arr1 = arr1[np.isfinite(arr1)]
arr2 = arr2[np.isfinite(arr2)] # We assume that the indices are good for both
orig_arr = np.vstack([arr1, arr2])
orig_diff = np.median(arr1 - arr2)
perm_diff = np.empty(1000)
for ii in xrange(n_perms):
idx1 = np.random.randint(0, 2, orig_arr.shape[-1])
idx2 = np.mod(idx1+1, 2)
perm_arr1 = orig_arr[idx1, np.arange(idx1.shape[0])]
perm_arr2 = orig_arr[idx2, np.arange(idx2.shape[0])]
perm_diff[ii] = np.median(perm_arr1 - perm_arr2)
return orig_diff, perm_diff
The test is applied to each b value between DTM and SFM:
orig_diff1k, medians1k = permutation_test_rel(dtm_rrmse_1k, sfm_rrmse_1k)
orig_diff2k, medians2k = permutation_test_rel(dtm_rrmse_2k, sfm_rrmse_2k)
orig_diff4k, medians4k = permutation_test_rel(dtm_rrmse_4k, sfm_rrmse_4k)
We also derive from the permutations a boot-strap confidence interval on the difference of the medians:
ci_1k = stats.scoreatpercentile(medians1k, 97.5) - stats.scoreatpercentile(medians1k, 2.5)
ci_2k = stats.scoreatpercentile(medians2k, 97.5) - stats.scoreatpercentile(medians2k, 2.5)
ci_4k = stats.scoreatpercentile(medians4k, 97.5) - stats.scoreatpercentile(medians4k, 2.5)
The following function finds the boot-strap distribution of the individual medians
def boots_strap_medians(arr, n_iters=1000):
"""
Produce a boot-strap distribution of the median of an array
"""
# Linearize and exclude nan/infs:
arr = arr[np.isfinite(arr)]
medians = np.empty(n_iters)
for ii in xrange(n_iters):
this_arr = arr[np.random.random_integers(0, arr.shape[0]-1, arr.shape[0])]
medians[ii] = np.median(this_arr)
return medians
Compute the boot-strap confidence intervals of the median rRMSE in each model/b-value
medians_dtm_1k = boots_strap_medians(dtm_rrmse_1k)
medians_dtm_2k = boots_strap_medians(dtm_rrmse_2k)
medians_dtm_4k = boots_strap_medians(dtm_rrmse_4k)
medians_sfm_1k = boots_strap_medians(sfm_rrmse_1k)
medians_sfm_2k = boots_strap_medians(sfm_rrmse_2k)
medians_sfm_4k = boots_strap_medians(sfm_rrmse_4k)
dtm_ci_1k = (stats.scoreatpercentile(medians_dtm_1k, 97.5) - stats.scoreatpercentile(medians_dtm_1k, 2.5))
sfm_ci_1k = (stats.scoreatpercentile(medians_sfm_1k, 97.5) - stats.scoreatpercentile(medians_sfm_1k, 2.5))
dtm_ci_2k = (stats.scoreatpercentile(medians_dtm_2k, 97.5) - stats.scoreatpercentile(medians_dtm_2k, 2.5))
sfm_ci_2k = (stats.scoreatpercentile(medians_sfm_2k, 97.5) - stats.scoreatpercentile(medians_sfm_2k, 2.5))
dtm_ci_4k = (stats.scoreatpercentile(medians_dtm_4k, 97.5) - stats.scoreatpercentile(medians_dtm_4k, 2.5))
sfm_ci_4k = (stats.scoreatpercentile(medians_sfm_4k, 97.5) - stats.scoreatpercentile(medians_sfm_4k, 2.5))
We display the median +/- 95% confidence interval in each condition
fig, ax = plt.subplots(1)
ax.bar([1,2],
[np.median(dtm_rrmse_1k[np.isfinite(dtm_rrmse_1k)]),
np.median(sfm_rrmse_1k[np.isfinite(sfm_rrmse_1k)])],
yerr=[dtm_ci_1k, sfm_ci_1k], color=[0.8, 0.8, 0.8], ecolor='k')
ax.bar([4,5],
[np.median(dtm_rrmse_2k[np.isfinite(dtm_rrmse_2k)]),
np.median(sfm_rrmse_2k[np.isfinite(sfm_rrmse_2k)])],
yerr=[dtm_ci_2k, sfm_ci_2k], color=[0.59, 0.59, 0.59], ecolor='k')
ax.bar([7,8],
[np.median(dtm_rrmse_4k[np.isfinite(dtm_rrmse_4k)]),
np.median(sfm_rrmse_4k[np.isfinite(sfm_rrmse_4k)])],
yerr=[dtm_ci_4k, sfm_ci_4k], ecolor='k', color=[0.32, 0.32, 0.32])
ax.set_ylim([1/np.sqrt(2), 0.8])
ax.set_xlim([0.8, 9])
ax.grid()
ax.set_xticks([0])
fig.set_size_inches([10,6])
fig.savefig('figures/rrmse_bars.svg')
We also print the values to the notebook, supporting the text in the paper manuscript:
print [np.median(dtm_rrmse_1k[np.isfinite(dtm_rrmse_1k)]),
np.median(sfm_rrmse_1k[np.isfinite(sfm_rrmse_1k)])]
print [dtm_ci_1k, sfm_ci_1k]
print [np.median(dtm_rrmse_2k[np.isfinite(dtm_rrmse_2k)]),
np.median(sfm_rrmse_2k[np.isfinite(sfm_rrmse_2k)])]
print [dtm_ci_2k, sfm_ci_2k]
print [np.median(dtm_rrmse_4k[np.isfinite(dtm_rrmse_4k)]),
np.median(sfm_rrmse_4k[np.isfinite(sfm_rrmse_4k)])]
print [dtm_ci_1k, sfm_ci_1k]
[0.77969796018681747, 0.77403867631975576] [0.0011072940946956766, 0.00097989156449773684] [0.78027668620035762, 0.75709104161134455] [0.0010044846549480679, 0.00077619529484007632] [0.78807302716479444, 0.7581707734866665] [0.0011072940946956766, 0.00097989156449773684]