def linear_function(x,a,b): """ A linear function Parameters ---------- x : ndarray Input variable a : float offset parameter b : float slope parameter """ return a + b * x N = 100 x = np.linspace(-pi, pi, N) a = np.random.randn() * 10 b = np.random.randn() * 10 y = linear_function(x, a, b) fig, ax = plt.subplots(1) ax.plot(x, y, '.') ax.set_xlabel('x') ax.set_ylabel('y') t = np.linspace(-pi, pi, N) x = np.sin(t) fig, ax = plt.subplots(1) ax.plot(x) y = linear_function(x, a, b) fig, ax = plt.subplots(1) ax.plot(y, '.') fig, ax = plt.subplots(1) ax.plot(x, y, '.') ax.set_xlabel('x') ax.set_ylabel('y') p = 10 # Number of inputs # Preallocate a matrix (make it a matrix of ones to account for beta_0): X = np.ones((N, p)) for ii in range(1, p): X[:, ii] = np.sin((ii+1)*t) fig, ax = plt.subplots(1) ax.plot(t,X) ax.set_ylim([-1.1, 1.1]) ax.set_xlim([-pi, pi]) ax.set_xlabel('t') ax.set_ylabel('x') def linear_model(X, beta): """ The linear model Parameters ---------- X : 2d array The design matrix : a matrix of regressors beta : 1d array Model coefficients """ return np.dot(X, beta) beta = np.random.randn(p) f = linear_model(X, beta) fig, ax = plt.subplots(1) ax.plot(t, f) ax.set_xlim([-pi, pi]) fig, ax = plt.subplots(1) ax.plot(X[:, 1], y, '.') y = f + 3*np.random.randn(*f.shape) fig, ax = plt.subplots(1) ax.plot(f, label='f') ax.plot(y, label='y') plt.legend() import scipy.linalg as lin def ols(X): """ The matrix which solves the OLS regression problem for full-rank X """ return np.dot(lin.pinv(np.dot(X.T, X)), X.T) beta_hat = np.dot(ols(X), y) plot(beta, beta_hat, 'o') plt.xlabel('Real parameter value') plt.ylabel('Estimate parameter value') y_hat = np.dot(X, beta_hat) fig, ax = plt.subplots(1) ax.plot(y, label='$y$') ax.plot(y_hat, label='$\hat{y}$') plt.legend() H = np.dot(X, ols(X)) H.shape y_hat = np.dot(H.T, y) plot(y, y_hat, 'o') plt.xlabel('The original output') plt.ylabel('Linear estimate of the output') First, finding some nifti or img on the disk. Let's assume we have some data in our current directory: import nibabel as nib import os from os.path import join as pjoin this_course = 'pna' def load_images(course): if course == 'pna': PNA_DATA_PATH = pjoin(os.path.expanduser('~'), 'data', 'qcpna', 'bolddata') img_fname = pjoin(PNA_DATA_PATH, 'smallbold.nii.gz') if os.path.isfile(img_fname): img = nib.load(img_fname) print img.shape return img else: print "where is this image: " + fname + "\n" return None else: files_1 = [pjoin('ds107','sub' + "%03d" %i, 'BOLD','task001_run001','meanabold.nii') for i in range(1,15)] print [(f,os.path.isfile(f)) for f in files_1] #files = [os.path.join(scans_dir, 'snn03055dy%i.img' % i) for i in range(1,13)] files = [f for f in files_1 if os.path.isfile(f)] return [nib.load(f) for f in files] img = load_images('pna') # transformation from voxel no to mm try: M = img.get_affine() except: M = img[0].get_affine() # mm to voxel no iM = np.linalg.inv(M) # coordinates of voxel of interest in mm (MNI space) posmm = [-20.0, -42, 34] (x, y, z) = tuple(posmm) # coordinates in voxel space (in homogenous coordinates) posvox = np.dot(iM, posmm + [1]) # We grab the spatial part of the output. Since we want to use it as an # index, we need to make it a tuple i,j,k = tuple(np.round(posvox[:3]).astype(int)) print i,j,k vdata = img.get_data()[i,j,k+2, :] import matplotlib.pyplot as plt fig, (ax1, ax2) = plt.subplots(1,2, figsize=(14,4)) ax1.plot(vdata); ax1.set_title('the time series') ax2.set_title('the slice at z = %d' %z) # time t = 0 t = 0 imshow(img.get_data()[:,:,z,t], interpolation='nearest') x0 = np.asarray([1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1]) x1 = np.ones(12) X = np.vstack((x0,x1)).T print X X_firstmodel = copy(X) from scipy import linalg as lin # take only the first 12 values of the data - for # scale voxel data for better visual display; make its mean == 3.0 Y = vdata[:12] Y = (Y - Y.mean())/np.std(Y) + 3.0 vdata = Y pinvX = lin.pinv(X) betah = pinvX.dot(Y) Yfitted = X.dot(betah) resid = Y - Yfitted print "Y:\n", Y, "\nresid:\n", resid, "\nbetah:\n", betah print "mean of Y: ", np.mean(Y), "\t mean of resid: ", np.mean(resid) # make this a little function as we will be reusing it : def glm(X,Y): """ a simple GLM function returning the estimated parameters and residuals """ betah = lin.pinv(X).dot(Y) Yfitted = X.dot(betah) resid = Y - Yfitted return betah, Yfitted, resid # Plot the results x = range(12) f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,4)) ax1.plot(x, Y, 'r-', x, Yfitted, 'b-.') ax2.plot(x, resid, 'g--') # Again, we will want to reuse this code, let's make a tiny function: def plot_glm(Y, Yf, r): x = range(Y.shape[0]) f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,4)) ax1.plot(x, Y, 'r-', x, Yf, 'b-.') ax1.set_title('Y (red) Y fitted (blue)') ax2.plot(x, r, 'g--') ax2.set_title('residuals') vdata = vdata[:12] Y = vdata * 1. print mean(Y) x0 = np.asarray([1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1]) x1 = np.hstack((np.ones(6),np.zeros(6))) x2 = np.hstack((np.zeros(6),np.ones(6))) x3 = np.ones(12) X = np.vstack((x0,x1,x2,x3)).T print X second_X = X # Add some signal to our voxel, for the sake of the example: Y += 0.5*mean(Y)*x1 - 0.5*mean(Y)*x2 print Y, mean(Y) beta, Yfitted, resid = glm(X,Y) print beta, mean(Y) # (beta[1]/2 + beta[2]/2 + beta[3]) # Plot the results plot_glm(Y, Yfitted, resid) print X_firstmodel glm_result = glm(X_firstmodel, Y) plot_glm(Y, glm_result[1], glm_result[2]) print glm_result[0] # the betas X = X_firstmodel x2 = np.hstack((np.ones(6),-np.ones(6))) X_ = np.vstack((X.T, x2)).T print X_.T.dot(X_) glm_result = glm(X_, Y) # original data plot_glm(Y, glm_result[1], glm_result[2]) print glm_result[0] # the betas X = second_X f, (a1, a2, a3) = subplots(1, 3, figsize=(10,3)) a1.imshow(Y[:,np.newaxis], interpolation='nearest', cmap='gray') a2.imshow(X, interpolation='nearest',cmap='gray') a3.imshow(resid[:,np.newaxis], interpolation='nearest',cmap='gray') titles = ['Y','X','e'] for ix,a in enumerate([a1, a2, a3]): a.set_xticklabels([]) a.set_title(titles[ix]) #a.set_yticklabels([]) X betah, Yfitted, resid = glm(X,Y) # fit the model RSS = sum((Y - Yfitted)**2) MRSS = RSS/(len(Y) - linalg.matrix_rank(X)) print "RSS :", RSS #, "\nRSS metho 2:", sum(resid**2), print "MRSS :", MRSS #, "\nMRSS method 2:", RSS/9 #checking print betah cT = np.asarray([0, 1, -1, 0]) c = cT[:,newaxis] pXTX = np.linalg.pinv(X.T.dot(X)) t_num = cT.dot(betah) SE = np.sqrt(MRSS* cT.dot(pXTX).dot(c)) t = t_num / SE print t from scipy.stats import t as tdist, norm as ndist pvalue = 1.0 - tdist.cdf(t,10) print "t-value = ", t, "p-value = ", pvalue # if t was 1.8 ? t = 1.8 pvalue = 1.0 - tdist.cdf(t,10) print "t-value = ", t, "p-value = ", pvalue print X X0 = X[:,[0,3]] betah, Yfitted, resid = glm(X,Y) betah0, Yfitted0, resid0 = glm(X0,Y) PX = X.dot(lin.pinv(X)) RX = np.eye(X.shape[0]) - PX PX0 = X0.dot(lin.pinv(X0)) F_num = (sum(resid0**2) - sum(resid**2)) nu1 = np.trace(PX - PX0) F_den = sum(resid**2) nu2 = np.trace(RX) F = (F_num/nu1)/(F_den/nu2) print F, nu1, nu2, F_num, F_den from scipy.stats import f as Fdist Fdist.sf(F,nu1,nu2) def t_test(betah, resid, X): """ test the parameters betah one by one - this assumes they are estimable (X full rank) betah : (p, 1) estimated parameters resid : (n, 1) estimated residuals X : design matrix """ RSS = sum((resid)**2) n = resid.shape[0] q = np.linalg.matrix_rank(X) df = n-q MRSS = RSS/df XTX = np.linalg.pinv(X.T.dot(X)) tval = np.zeros(betah.shape) pval = np.zeros(betah.shape) for (idx, beta) in enumerate(betah): c = zeros(betah.shape) c[idx] = 1 t_num = c.T.dot(betah) SE = np.sqrt(MRSS* c.T.dot(XTX).dot(c)) tval[idx] = t_num / SE pval[idx] = 1.0 - tdist.cdf(tval[idx], df) return tval, pval #print t_test(betah, Yfitted, resid) n = 20 x = np.random.randn(n,1) X = np.hstack((x, np.ones(x.shape))) print X.shape m_y = 0 m_y = 3.14156 e = np.random.randn(n,1) y = 1.4 * x + m_y + e plt.plot(x,y,'o') betah, Yfitted, resid = glm(X,y) #print Yfitted.shape t, p = t_test(betah, resid, X) print " betah = \n", betah print " t = \n", t print " p = \n", p # do this the other way : let's have y be the explanatory variable: X2 = np.hstack((y - m_y, np.ones(x.shape))) y2 = x betah2, Yfitted2, resid2 = glm(X2, y2) print "compare the slopes : \t\t", 1./betah2[0,0], betah[0,0] print "compare the residual variance:\t", resid.T.dot(resid)[0,0], resid2.T.dot(resid2)[0,0] t2, p2 = t_test(betah2, resid2, X2) print "betah2 : \t\t\t", betah2[:,0] print "compare t values : \n", np.hstack((t, t2)) print "compare p values : \n", np.hstack((p, p2)) # Fisher z transform of correlation coefficient: def fisher_transf(rho): """ take a coefficient of correlation and z transform it see en.wikipedia.org/wiki/Fisher_transformation """ return (0.5 * np.log((1. + rho) / (1. - rho))) corr = np.corrcoef(x[:,0],y[:,0]) z = fisher_transf(corr[0,1]) p_fisher = 1. - ndist.cdf(z, 0, 1./np.sqrt(n-3)) print p_fisher # An example of a t-test with 20 "voxels" # from nipy.modalities.fmri.glm import FMRILinearModel from IPython import display as d def F_test_reducedX(X, X0, Y): betah, Yfitted, resid = glm(X,Y) betah0, Yfitted0, resid0 = glm(X0,Y) PX = X.dot(lin.pinv(X)) RX = np.eye(X.shape[0]) - PX PX0 = X0.dot(lin.pinv(X0)) F_num = (sum(resid0**2) - sum(resid**2)) nu1 = np.trace(PX - PX0) F_den = sum(resid**2) nu2 = np.trace(RX) F = (F_num/nu1)/(F_den/nu2) return F, Fdist.sf(F,nu1,nu2), nu1, nu2 # Set up a model with two regressors - first uncorrelated n = 20 age = np.random.randn(n,1) likes_pies = np.random.randn(n,1) x0 = age # assumes likes_pies has no correlation with age : remove the correlation x1 = likes_pies - (likes_pies.T.dot(age)/(age.T.dot(age))) * age # check with print x0.T.dot(x1) x2 = np.ones((n,1)) # X = np.hstack((x0,x1,x2)) print X.shape m_y = pi sig_e = 1.5 e = sig_e * np.random.normal(size=(n,1)) y = X.dot(np.array([1.0, 1.0, m_y])[:,newaxis]) + e betah, yfitted, resid = glm(X,y) plot_glm(y, yfitted, resid) t, p = t_test(betah, resid, X) F, pF, nu1, nu2 = F_test_reducedX(X, X[:,[2]], y) print betah, "\n", t, "\n", p, "\n", F, nu1, nu2, pF # Set up a model with two regressors - correlated n = 20 age = np.random.randn(n,1) likes_pies = np.random.randn(n,1) x0 = age + .1*likes_pies x1 = age - .1*likes_pies # can you tell what is the expected correlation ? print corrcoef(x0.T, x1.T)[0,1] x2 = np.ones((n,1)) # X = np.hstack((x0,x1,x2)) m_y = pi sig_e = 1.5 e = sig_e * np.random.normal(size=(n,1)) y = X.dot(np.array([1.0, 1.0, m_y])[:,newaxis]) + e betah, yfitted, resid = glm(X,y) plot_glm(y, yfitted, resid) t, p = t_test(betah, resid, X) F, pF, nu1, nu2 = F_test_reducedX(X, x2, y) print betah, "\n", t, "\n", p, "\n", F, nu1, nu2, pF HOME = os.path.expanduser('~') d.Image(filename=pjoin(HOME, 'code', 'pna-notebooks', 't_F_test.jpg'))