%pylab inline import numpy as np import numpy.linalg as npl import matplotlib.pyplot as plt import matplotlib.mlab as mlab from scipy.stats import t as t_dist, f as f_dist, gamma def scale_design_mtx(X): """utility to scale the design matrix for display This scales the columns to their own range so we can see the variations across the column for all the columns, regardless of the scaling of the column. """ mi, ma = X.min(axis=0), X.max(axis=0) col_neq = (ma - mi) > 1.e-8 Xs = np.ones_like(X) mi = mi[col_neq] ma = ma[col_neq] Xs[:,col_neq] = (X[:,col_neq] - mi)/(ma - mi) return Xs def show_design(X, design_title, **kwargs): """ Show the design matrix nicely """ plt.figure() plt.gray() # Gray colormap plt.imshow(scale_design_mtx(X), interpolation='nearest', **kwargs) plt.title(design_title) def fit_ols(Y, X): """ betas, fitted data, and residuals from OLS linear fit. This is OLS estimation; we assume the errors to have independent and identical normal distributions around zero for each $i$ in $\Epsilon_i$ (i.i.d) """ Y = np.asarray(Y) X = np.asarray(X) # Get the estimated betas betah = npl.pinv(X).dot(Y) fitted = X.dot(betah) resid = Y - fitted return betah, fitted, resid def t_stat(Y, X, C): """ betas, t statistic and significance test given data, design matrix, contrast Ordinary least squares estimation - see `fit_ols` function. """ Y = np.asarray(Y) X = np.asarray(X) C = np.atleast_2d(C) # Calculate the parameters B, fitted, resid = fit_ols(Y, X) # Residual sum of squares RSS = (resid**2).sum(axis=0) # Degrees of freedom - number of observations - number of fitted parameters df = X.shape[0] - npl.matrix_rank(X) # Mean residual sum of squares MRSS = RSS / df # Standard error of contrast estimate C.dot(B) SE = np.sqrt(MRSS * C.dot(npl.pinv(X.T.dot(X)).dot(C.T))) t = C.dot(B)/SE ltp = t_dist(df).cdf(t) # lower tail p p = 1 - ltp # upper tail p return B, t, df, p a_block = np.array([-0.1, 0, 0.1]) baseline = 10 activation = 11 on_off = np.hstack((a_block + baseline, a_block + activation)) n_on_off = 3 e = np.tile(a_block, (2 * n_on_off,)) y = np.tile(on_off, (n_on_off,)) plt.plot(y) x_on = np.hstack((np.zeros(len(a_block)), np.ones(len(a_block)))) x_off = 1 - x_on X_over_part = np.column_stack((x_on, x_off, np.ones_like(x_on))) X_over = np.tile(X_over_part, (n_on_off, 1)) show_design(X_over, 'over parametrized') X_well_part = np.column_stack((x_on, np.ones_like(x_on))) X_well = np.tile(X_well_part, (n_on_off, 1)) show_design(X_well, 'well parametrized') X_times2 = X_well.copy() X_times2[:, 0] *= 2 show_design(X_times2, 'times 2') B_over = npl.pinv(X_over).dot(y) B_over # Print betas, t statistic, df, p value for different designs, contrasts print(t_stat(y, X_over, [1, 0, 0])) print(t_stat(y, X_well, [1, 0])) print(t_stat(y, X_times2, [1, 0])) def is_estimable(X, C): """ Is a contrast C estimable on a design X To be estimable, the contrast needs to be orthogonal to the null-space of X. The null space of X is all the vectors $k$ such that Xk = 0. If $k$ is a vector in the null-space of X, then: Y = X B + e and Y = X (B + k) + e give the same fit (X B == X (B + k)). The null space of X is the orthogonal complement of the row-space of X. So C has to be in the row space of X. A less mathematical way of seeing this is that the information we will need to form our betas is made of linear combinations of the rows of X, because the betas are constructed from Xt Y. """ C = np.atleast_2d(C) rankX = npl.matrix_rank(X) return rankX == npl.matrix_rank(np.vstack((C, X))) is_estimable(X_over, [1, 0, 0]) is_estimable(X_over, [1, -1, 0]) import sympy X = sympy.Matrix(X_over) sympy.pretty_print(X.nullspace()) [u, s, vt] = npl.svd(X_over, full_matrices=False) tol = s.max() * max(X_over.shape) * np.finfo(s.dtype).eps nz = np.where(s > tol)[0] print nz vt[np.abs(vt) < tol] = 0 print vt.T[:,nz] print "\nNormalize the columns \n" print vt.T[:,nz].dot(np.diagflat([1./vt[0,0], 1./vt[1,1]])) def f_stat(Y, X, col): """ betas, F statistic and significance test given data, design and column to test We do the F statistic long hand, by fitting the model with and without the column to test. This is OLS estimation; we assume the errors to have independent and identical normal distributions around zero for each $i$ in $\Epsilon_i$ (i.i.d). """ Y = np.asarray(Y) full_X = np.asarray(X) # Delete column to make reduced design reduced_X = np.delete(full_X, col, 1) # fit both designs _, _, full_resid = fit_ols(Y, full_X) _, _, reduced_resid = fit_ols(Y, reduced_X) n_obs = Y.shape[0] # Number of parameters used by each design n_p_full = npl.matrix_rank(full_X) n_p_reduced = npl.matrix_rank(reduced_X) # Extra sum of squares full_SS = (full_resid ** 2).sum() reduced_SS = (reduced_resid ** 2).sum() extra_SS = (reduced_SS - full_SS) / (n_p_full - n_p_reduced) # F statistic F = extra_SS / (full_SS / (n_obs - n_p_full)) ltp = f_dist((n_p_full - n_p_reduced), n_obs - n_p_full).cdf(F) # lower tail p p = 1 - ltp # upper tail p return F, p f_stat(y, X_well, 0) f_stat(y, X_over, 0) # Remind ourselves about the t statistic on the over-parametrized design print(t_stat(y, X_over, [0, 0, 1])) # Add a tiny bit of noise to the over-parametrized design X_over_tweaked = X_over.copy() X_over_tweaked[0, 2] = 1e-14 print(t_stat(y, X_over_tweaked, [0, 0, 1])) # first a quick function to yield two correlated regressor with correlation c def correlated(c, n=20, seed=None): np.random.seed(seed) # To get predictable random numbers # generate two uncorrelated regressors Y = np.random.normal(0,1,(n,2)) #make them correlated with mixing matrix M: #we want a correlation c between Y[0] and Y[1], with unit variance #this is the mixing matrix needed: c1 = np.sqrt( (1 + np.sqrt(1-c*c))/2 ) c2 = .5 * c / c1 M = np.asarray([[c1, c2],[c2, c1]]) Yc = Y.dot(M) # Make Yc = MY return Yc n = 20 Xc = correlated(.8, n) x1c = Xc[:,0]; x2c = Xc[:,1]; print np.corrcoef(Xc.T)[0,1] y_corr = x1c + 1.5*np.random.normal(size=n) + 10 #x1c = np.random.normal(size=n) #x2c = np.random.normal(size=n) X_corr = np.column_stack((x1c, x2c, np.ones_like(x2c))) C = np.array([1, 0, 0]) # The contrast t_stat(y_corr, X_corr, C) XtX = X_corr.T.dot(X_corr) XtX npl.pinv(XtX) C.dot(npl.pinv(XtX)).dot(C) #Xc = correlated(.0, n, seed=42) #y_corr = Xc[:,0] + 1.*np.random.normal(size=n) + 10 n=20 x1c = np.random.normal(size=n) x2c = np.random.normal(size=n) y_corr = x1c + 1.*np.random.normal(size=n) + 10 # if you make the data exactly the same, for the # experiment to work, you would have to make the correlated regressor # different (use the correlated function with the seed not fixed). # Otherwise, you are not going to change your t since # the amount of the first regressor that is in the data is just going # to be scaled, and the scaling doesnt change the t-value. In other # words you are not changing the amount of data variance that can be # explained by the part of x1 that is orthogonal to x2, since x1 and # x2 directions remains exactly the same. x2c_corr = np.linspace(0.1, 0.999, 15) dtype = np.dtype(dict(names=['t', 'mss', 'b0', 'b1', 'b2', 'xvar', 'df'], formats=['f8'] * 7)) res = np.zeros(len(x2c_corr), dtype=dtype) for i, x2c_corr in enumerate(x2c_corr): new_x1c = (1 - x2c_corr) * x1c + x2c_corr * x2c #Xc = correlated(x2c_corr, n, seed=42) #new_x1c = Xc[:,0]; x2c = Xc[:,1]; #print np.corrcoef(new_x1c,x2c)[0,1] X_corr = np.column_stack((new_x1c, x2c, np.ones_like(x2c))) y_corr = new_x1c + 1.*np.random.normal(size=n) + 10 betas, res[i]['t'], res[i]['df'], p = t_stat(y_corr, X_corr, C) res[i]['b0'], res[i]['b1'], res[i]['b2'] = betas[:] fitted = X_corr.dot(betas) res[i]['mss'] = ((y_corr - fitted)**2).sum() / res[i]['df'] res[i]['xvar'] = C.dot(npl.pinv(X_corr.T.dot(X_corr)).dot(C)) print(mlab.rec2txt(res)) def spm_hrf(t): """ Return SPM hrf sampled at times `t` """ # gamma.pdf only defined for t > 0 hrf = np.zeros_like(t, dtype=np.float) hrf[t > 0] = gamma.pdf(t[t > 0], 6, 0, 1) - gamma.pdf(t[t > 0], 16, 0, 1) / 6. return hrf / np.sum(hrf) def spm_hrf_d(t): """ Return temporal derivative of SPM HRF sampled at times `t` """ # This is what spm does! return spm_hrf(t) - spm_hrf(t - 1) t = np.arange(24) plt.plot(t, spm_hrf(t), label='HRF') plt.plot(t, spm_hrf_d(t), label='TD') plt.legend() block_length = 24 on_off = np.hstack((np.zeros(block_length), np.ones(block_length))) off_on = 1 - on_off X_hrf_over = np.column_stack((on_off, off_on, np.ones_like(on_off))) X_hrf_over = np.tile(X_hrf_over, (10, 1)) show_design(X_hrf_over, 'before convolution', aspect=0.01) S = npl.svd(X_hrf_over, compute_uv=False) print('Smallest singular value (if close to 0 matrix may be rank deficient)') print(S.min()) # Is -1 -1 1 still in the null space? Yes. np.allclose(X_hrf_over.dot([-1, -1, 1]), 0) hrf = spm_hrf(t) x0 = np.convolve(hrf, X_hrf_over[:, 0], mode='same') x1 = np.convolve(hrf, X_hrf_over[:, 1], mode='same') X_conv_over = np.column_stack((x0, x1, np.ones_like(x0))) show_design(X_conv_over, 'Convolved design', aspect=0.01) S = npl.svd(X_conv_over, compute_uv=False) print('Smallest singular value (if close to 0 matrix may be rank deficient)') print(S.min()) # Is -1 -1 1 still in the null space? np.allclose(X_conv_over.dot([-1, -1, 1]), 0) x0_hrf = X_conv_over[:, 0] X_conv_well = np.column_stack((x0_hrf, np.ones_like(x0))) y = (x0_hrf - x0_hrf.mean()) * 1 + 10 npl.pinv(X_conv_well).dot(y) x0_hrf.mean() X_conv_orth = np.column_stack((x0_hrf - x0_hrf.mean(), np.ones_like(x0))) npl.pinv(X_conv_orth).dot(y) dhrf = spm_hrf_d(np.arange(24)) x0_dhrf = np.convolve(dhrf, x0, mode='same') # Basis functions, before convolution with block - nearly orthogonal hrf.dot(dhrf) # After convolution with block - not orthogonal x0_hrf.dot(x0_dhrf) def orth_x_wrt_y(x, y): """ Orthonalize vector `x` with respect to matrix `y` """ y_in_x_beta = npl.pinv(y).dot(x) y_in_x = y.dot(y_in_x_beta) return x - y_in_x x0_dhrf_orth = orth_x_wrt_y(x0_dhrf, X_conv_orth) x0_dhrf_orth.T.dot(X_conv_orth) x0_hrf.dot(x0_dhrf_orth) X_dconv_orth = np.column_stack((X_conv_orth[:, 0], x0_dhrf_orth, np.ones_like(x0))) npl.pinv(X_dconv_orth).dot(y) ye = y + np.random.normal(size=(y.shape)) print(npl.pinv(X_conv_orth).dot(ye)) print(npl.pinv(X_dconv_orth).dot(ye)) # Design in which hrf regressor not orthogonal to constant partial_orthed = orth_x_wrt_y(x0_dhrf, x0_hrf[:, None]) X_colinear = np.column_stack((x0_hrf, np.ones_like(x0_hrf))) X_mixed = np.column_stack((x0_hrf, partial_orthed, np.ones_like(x0_hrf))) X_mixed.T.dot(X_mixed) print(npl.pinv(X_colinear).dot(ye)) print(npl.pinv(X_mixed).dot(ye))