%pylab inline

import numpy as np
import matplotlib
import matplotlib.pyplot as plt

matplotlib.rcParams['image.cmap'] = 'gray'
matplotlib.rcParams['image.interpolation'] = 'nearest'

import os
import tarfile
import urllib

def fetch_scans():
    # Names of the files in the data bundle
    scans_dir = 'egscans'
    files = [os.path.join(scans_dir, 'snn03055dy%i.img' % i) for i in range(1,13)]
    
    # The remote url and filename for local storage
    data_url = 'http://imaging.mrc-cbu.cam.ac.uk/downloads/Tutscans/egscans.tar.gz'
    tarball = 'egscans.tar.gz'

    # First, check that we don't already have the files, whose names are:
    if all(map(os.path.isfile, files)):
        print 'All files already present, nothing to download.'
        return files
    
    # If we don't have them, make the storage directory, fetch (if needed)
    # and unpack skipping the .mat files that are in the tarball.
    if not os.path.isdir(scans_dir):
        os.mkdir(scans_dir)

    if not os.path.isfile(tarball):
        print "Fetching remote tarball, this may take some time depending on your network..."
        urllib.urlretrieve(data_url, tarball)

    print "Unpacking scans, omitting .mat files"
    tf = tarfile.open(tarball, 'r:gz')
    image_files = [ f for f in tf.getmembers() if not f.name.endswith('.mat')]
    tf.extractall(scans_dir, image_files)
    print "All files unpacked into", scans_dir, ', done!'
    return files

files = fetch_scans()

import nibabel as nib
images = map(nib.load, files)

# grand mean for data
GM = 50

# transformation from voxel no to mm
M = images[0].get_affine()
# mm to voxel no
iM = np.linalg.inv(M)
# coordinates of voxel of interest in mm (MNI space)
posmm = [-20.0, -42, 34]
# coordinates in voxel space (in homogenous coordinates)
posvox = np.dot(iM, posmm + [1])
# We grab the spatial part of the output.  Since we want to use it as an 
# index, we need to make it a tuple
posvox = tuple(posvox[:3].astype(int))
posvox

def global_mean(img):
    """Compute a "sorted global mean" of an image.

    This returns the mean of the image voxels that are in the range above 
    1/8 of the mean."""
    
    m = img.mean()/8.0
    return img[img>m].mean()

nimgs = len(images)
vdata = np.zeros(nimgs)
gdata = np.zeros(nimgs)
for i, V in enumerate(images):
    d = V.get_data()
    vdata[i] = d[posvox]
    gdata[i] = global_mean(d)
gdata

last_image = images[-1]
last_data = last_image.get_data()
imshow(last_data[:, :, 30])

plt.plot(gdata, vdata, 'x')
plt.xlabel('TMVV for scan')
plt.ylabel('Voxel value for -20 -42 34')
plt.title('The relationship of overall signal to values for an individual voxel')

# proportionally scale data
pvdata = vdata / gdata
# scale to global mean of 50
Y = pvdata * GM
# our covariate
ourcov = np.array([5,4,4,2,3,1,6,3,1,6,5,2])

# plot data 
plot(ourcov, Y, 'x')
xlabel('Task difficulty')
ylabel('PS voxel value')
xlim(0, 7)
title('The relationship of task difficulty to voxel value');

# guess intercept, slope, estimated points
guessic   = 55
guesslope = 0.5
guessPts  = guessic + guesslope*ourcov

minx = 0
maxx = ourcov.max()+1 # min max x for plots
axs = np.array([minx, maxx, Y.min()*0.98, Y.max()*1.02])

# plot data with guess intercept and slope
plot(ourcov, Y, 'x')
plot(axs[:2], guessic+guesslope*axs[:2], ':') # guessed line
# plot guessed residuals
for i, c in enumerate(ourcov):
  plot([c, c], [Y[i], guessPts[i]], color='r')
plt.axis(axs)
xlabel('Task difficulty')
ylabel('PS voxel value')
xlim(0, 7)
title('A first guess at a linear relationship, and its residuals');

# residuals from guess slope
guessRes = Y - guessPts

X  = np.column_stack([ourcov, ones(nimgs)])

def scale_design_mtx(X):
    """utility to scale the design matrix for display

    This scales the columns to their own range so we can see the variations 
    across the column for all the columns, regardless of the scaling of the 
    column.
    """
    mi, ma = X.min(axis=0), X.max(axis=0)
    col_neq = (ma - mi) > 1.e-8
    Xs = np.ones_like(X)
    mi = mi[col_neq]
    ma = ma[col_neq]
    Xs[:,col_neq] = (X[:,col_neq] - mi)/(ma - mi)
    return Xs

def show_design(X, design_title):
    """ Show the design matrix nicely """
    figure()
    plt.gray()
    imshow(scale_design_mtx(X), interpolation='nearest')
    title(design_title)

# display using our fancy function
show_design(X, 'Design matrix for the first analysis')

def matrix_rank(X):
    U, S, V = np.linalg.svd(X)
    # Number of singular values above an arbitrary small threshold
    return np.sum(S > 1e-5)

df = nimgs - matrix_rank(X)
# mean SoS for guess slope
guessRSS = (guessRes**2).sum() / df

# the analysis, giving slope and constant
B = dot(pinv(X), Y)
print B

# plot data with new least squares line
plot(ourcov, Y, 'x')
bound = np.array([minx, maxx])
plot(bound, bound*B[0]+B[1], 'r')
axis(axs)
xlabel('Task difficulty')
ylabel('PS voxel value')
title('The least squares linear relationship');

from scipy.stats import t as tdist, norm as normdist

# Contrast
C = np.array([1, 0])
# t statistic and significance test
RSS   = ((Y - dot(X, B))**2).sum()
MRSS  = RSS / df
SE    = np.sqrt(MRSS*dot(C, (dot(pinv(dot(X.T, X)), C.T))))
t     = dot(C, B)/SE
ltp   = tdist(df).cdf(t) # lower tail p
p     = 1-ltp           # upper tail p 

# print results to window
print 'First TD analysis: t= %2.2f, p= %0.6f' % (t, p)

def t_stat(Y, X, C):
    """ betas, t statistic and significance test given data, design matrix, contrast
    """
    # Calculate the parameters
    B = dot(pinv(X), Y)
    RSS   = ((Y - dot(X, B))**2).sum(axis=0)
    # Recalculate df
    df =  X.shape[0] - matrix_rank(X)
    MRSS  = RSS / df
    SE    = np.sqrt(MRSS*dot(C, (dot(pinv(dot(X.T, X)), C.T))))
    t     = dot(C, B)/SE
    ltp   = tdist(df).cdf(t) # lower tail p
    p = 1 - ltp # upper tail p
    return B, t, df, p

# Results are the same
print 'First TD analysis again: B = %s, t= %2.2f, df= %d, p= %0.6f' % t_stat(Y, X, C)

# save data for analysis in some package that reads csv type files
np.savetxt("voxdata.txt", Y)

# design matrix, df
X = np.column_stack([ourcov, np.arange(1, nimgs+1), np.ones(nimgs)])
show_design(X, 'Design matrix for added covariate')

# Contrast
C = np.array([1, 0, 0])
# the analysis
B, t, df, p = t_stat(Y, X, C)
print 'Added covariate analysis: t= %2.2f, p= %0.6f' % (t, p)

X = np.zeros((nimgs, 2))
X[0:6, 0] = 1
X[6:, 1] = 1
show_design(X, 'Design matrix for two conditions')

B = np.linalg.pinv(X).dot(Y)
B

np.mean(Y[:6]), np.mean(Y[6:])

# Contrast for activation minus rest
C = np.array([-1, 1])
# the analysis
B, t, df, p = t_stat(Y, X, C)
print 'Analysis for two conditions: t= %2.2f, p= %0.6f' % (t, p)

# do an ANCOVA for globals, instead of proportional scaling
# GM scale
Yanc = vdata * GM / np.mean(gdata)

X = np.column_stack((ourcov, gdata, np.ones(nimgs)))
show_design(X, 'Design with ANCOVA for global signal')

# Contrast for task covariate only
C = np.array([1, 0, 0])
# the analysis
B, t, df, p = t_stat(Yanc, X, C)
print 'TD analysis with ANCOVA for global: t= %2.2f, p= %0.6f' % (t, p)

# coordinates of voxel of interest in mm (MNI space)
posmm2  = [6, 0, 6]
# Coordinates in voxel space
posvox2 = np.dot(iM, posmm2 + [1])
# We grab the spatial part of the output.  Since we want to use it as an 
# index, we need to make it a tuple
posvox2 = tuple(posvox2[:3].astype(int))
posvox2

# Get the data for this voxel
vdata2 = np.zeros(nimgs)
for i, V in enumerate(images):
    d = V.get_data()
    vdata2[i] = d[posvox2]

# Proportional and global mean scale
Y2 = vdata2 / gdata * 50

X = np.column_stack((ourcov, np.ones(nimgs)))
C = np.array([1, 0])
# the analysis
B, t, df, p = t_stat(Y2, X, C)
print 'First TD analysis second voxel: t= %2.2f, p= %0.6f' % (t, p)

Y_both = np.column_stack((Y, Y2))

# the analysis
B, t, df, p = t_stat(Y_both, X, C)
print 'First TD analysis first voxel: t= %2.2f, p= %0.6f' % (t[0], p[0])
print 'First TD analysis second voxel: t= %2.2f, p= %0.6f' % (t[1], p[1])

B.shape