%pylab inline import numpy as np import matplotlib import matplotlib.pyplot as plt matplotlib.rcParams['image.cmap'] = 'gray' matplotlib.rcParams['image.interpolation'] = 'nearest' import os import tarfile import urllib def fetch_scans(): # Names of the files in the data bundle scans_dir = 'egscans' files = [os.path.join(scans_dir, 'snn03055dy%i.img' % i) for i in range(1,13)] # The remote url and filename for local storage data_url = 'http://imaging.mrc-cbu.cam.ac.uk/downloads/Tutscans/egscans.tar.gz' tarball = 'egscans.tar.gz' # First, check that we don't already have the files, whose names are: if all(map(os.path.isfile, files)): print 'All files already present, nothing to download.' return files # If we don't have them, make the storage directory, fetch (if needed) # and unpack skipping the .mat files that are in the tarball. if not os.path.isdir(scans_dir): os.mkdir(scans_dir) if not os.path.isfile(tarball): print "Fetching remote tarball, this may take some time depending on your network..." urllib.urlretrieve(data_url, tarball) print "Unpacking scans, omitting .mat files" tf = tarfile.open(tarball, 'r:gz') image_files = [ f for f in tf.getmembers() if not f.name.endswith('.mat')] tf.extractall(scans_dir, image_files) print "All files unpacked into", scans_dir, ', done!' return files files = fetch_scans() import nibabel as nib images = map(nib.load, files) # grand mean for data GM = 50 # transformation from voxel no to mm M = images[0].get_affine() # mm to voxel no iM = np.linalg.inv(M) # coordinates of voxel of interest in mm (MNI space) posmm = [-20.0, -42, 34] # coordinates in voxel space (in homogenous coordinates) posvox = np.dot(iM, posmm + [1]) # We grab the spatial part of the output. Since we want to use it as an # index, we need to make it a tuple posvox = tuple(posvox[:3].astype(int)) posvox def global_mean(img): """Compute a "sorted global mean" of an image. This returns the mean of the image voxels that are in the range above 1/8 of the mean.""" m = img.mean()/8.0 return img[img>m].mean() nimgs = len(images) vdata = np.zeros(nimgs) gdata = np.zeros(nimgs) for i, V in enumerate(images): d = V.get_data() vdata[i] = d[posvox] gdata[i] = global_mean(d) gdata last_image = images[-1] last_data = last_image.get_data() imshow(last_data[:, :, 30]) plt.plot(gdata, vdata, 'x') plt.xlabel('TMVV for scan') plt.ylabel('Voxel value for -20 -42 34') plt.title('The relationship of overall signal to values for an individual voxel') # proportionally scale data pvdata = vdata / gdata # scale to global mean of 50 Y = pvdata * GM # our covariate ourcov = np.array([5,4,4,2,3,1,6,3,1,6,5,2]) # plot data plot(ourcov, Y, 'x') xlabel('Task difficulty') ylabel('PS voxel value') xlim(0, 7) title('The relationship of task difficulty to voxel value'); # guess intercept, slope, estimated points guessic = 55 guesslope = 0.5 guessPts = guessic + guesslope*ourcov minx = 0 maxx = ourcov.max()+1 # min max x for plots axs = np.array([minx, maxx, Y.min()*0.98, Y.max()*1.02]) # plot data with guess intercept and slope plot(ourcov, Y, 'x') plot(axs[:2], guessic+guesslope*axs[:2], ':') # guessed line # plot guessed residuals for i, c in enumerate(ourcov): plot([c, c], [Y[i], guessPts[i]], color='r') plt.axis(axs) xlabel('Task difficulty') ylabel('PS voxel value') xlim(0, 7) title('A first guess at a linear relationship, and its residuals'); # residuals from guess slope guessRes = Y - guessPts X = np.column_stack([ourcov, ones(nimgs)]) def scale_design_mtx(X): """utility to scale the design matrix for display This scales the columns to their own range so we can see the variations across the column for all the columns, regardless of the scaling of the column. """ mi, ma = X.min(axis=0), X.max(axis=0) col_neq = (ma - mi) > 1.e-8 Xs = np.ones_like(X) mi = mi[col_neq] ma = ma[col_neq] Xs[:,col_neq] = (X[:,col_neq] - mi)/(ma - mi) return Xs def show_design(X, design_title): """ Show the design matrix nicely """ figure() plt.gray() imshow(scale_design_mtx(X), interpolation='nearest') title(design_title) # display using our fancy function show_design(X, 'Design matrix for the first analysis') def matrix_rank(X): U, S, V = np.linalg.svd(X) # Number of singular values above an arbitrary small threshold return np.sum(S > 1e-5) df = nimgs - matrix_rank(X) # mean SoS for guess slope guessRSS = (guessRes**2).sum() / df # the analysis, giving slope and constant B = dot(pinv(X), Y) print B # plot data with new least squares line plot(ourcov, Y, 'x') bound = np.array([minx, maxx]) plot(bound, bound*B[0]+B[1], 'r') axis(axs) xlabel('Task difficulty') ylabel('PS voxel value') title('The least squares linear relationship'); from scipy.stats import t as tdist, norm as normdist # Contrast C = np.array([1, 0]) # t statistic and significance test RSS = ((Y - dot(X, B))**2).sum() MRSS = RSS / df SE = np.sqrt(MRSS*dot(C, (dot(pinv(dot(X.T, X)), C.T)))) t = dot(C, B)/SE ltp = tdist(df).cdf(t) # lower tail p p = 1-ltp # upper tail p # print results to window print 'First TD analysis: t= %2.2f, p= %0.6f' % (t, p) def t_stat(Y, X, C): """ betas, t statistic and significance test given data, design matrix, contrast """ # Calculate the parameters B = dot(pinv(X), Y) RSS = ((Y - dot(X, B))**2).sum(axis=0) # Recalculate df df = X.shape[0] - matrix_rank(X) MRSS = RSS / df SE = np.sqrt(MRSS*dot(C, (dot(pinv(dot(X.T, X)), C.T)))) t = dot(C, B)/SE ltp = tdist(df).cdf(t) # lower tail p p = 1 - ltp # upper tail p return B, t, df, p # Results are the same print 'First TD analysis again: B = %s, t= %2.2f, df= %d, p= %0.6f' % t_stat(Y, X, C) # save data for analysis in some package that reads csv type files np.savetxt("voxdata.txt", Y) # design matrix, df X = np.column_stack([ourcov, np.arange(1, nimgs+1), np.ones(nimgs)]) show_design(X, 'Design matrix for added covariate') # Contrast C = np.array([1, 0, 0]) # the analysis B, t, df, p = t_stat(Y, X, C) print 'Added covariate analysis: t= %2.2f, p= %0.6f' % (t, p) X = np.zeros((nimgs, 2)) X[0:6, 0] = 1 X[6:, 1] = 1 show_design(X, 'Design matrix for two conditions') B = np.linalg.pinv(X).dot(Y) B np.mean(Y[:6]), np.mean(Y[6:]) # Contrast for activation minus rest C = np.array([-1, 1]) # the analysis B, t, df, p = t_stat(Y, X, C) print 'Analysis for two conditions: t= %2.2f, p= %0.6f' % (t, p) # do an ANCOVA for globals, instead of proportional scaling # GM scale Yanc = vdata * GM / np.mean(gdata) X = np.column_stack((ourcov, gdata, np.ones(nimgs))) show_design(X, 'Design with ANCOVA for global signal') # Contrast for task covariate only C = np.array([1, 0, 0]) # the analysis B, t, df, p = t_stat(Yanc, X, C) print 'TD analysis with ANCOVA for global: t= %2.2f, p= %0.6f' % (t, p) # coordinates of voxel of interest in mm (MNI space) posmm2 = [6, 0, 6] # Coordinates in voxel space posvox2 = np.dot(iM, posmm2 + [1]) # We grab the spatial part of the output. Since we want to use it as an # index, we need to make it a tuple posvox2 = tuple(posvox2[:3].astype(int)) posvox2 # Get the data for this voxel vdata2 = np.zeros(nimgs) for i, V in enumerate(images): d = V.get_data() vdata2[i] = d[posvox2] # Proportional and global mean scale Y2 = vdata2 / gdata * 50 X = np.column_stack((ourcov, np.ones(nimgs))) C = np.array([1, 0]) # the analysis B, t, df, p = t_stat(Y2, X, C) print 'First TD analysis second voxel: t= %2.2f, p= %0.6f' % (t, p) Y_both = np.column_stack((Y, Y2)) # the analysis B, t, df, p = t_stat(Y_both, X, C) print 'First TD analysis first voxel: t= %2.2f, p= %0.6f' % (t[0], p[0]) print 'First TD analysis second voxel: t= %2.2f, p= %0.6f' % (t[1], p[1]) B.shape