def linear_function(x,a,b):
    """ 
    A linear function
    
    Parameters
    ----------
    x : ndarray
        Input variable
    
    a : float
        offset parameter
    
    b : float 
        slope parameter
    """
    return a + b * x 


N = 100
x = np.linspace(-pi, pi, N)

a = np.random.randn() * 10
b = np.random.randn() * 10
y = linear_function(x, a, b)

fig, ax = plt.subplots(1)
ax.plot(x, y, '.')
ax.set_xlabel('x')
ax.set_ylabel('y')

t = np.linspace(-pi, pi, N)
x = np.sin(t)

fig, ax = plt.subplots(1)
ax.plot(x)

y = linear_function(x, a, b)
fig, ax = plt.subplots(1)
ax.plot(y, '.')

fig, ax = plt.subplots(1)
ax.plot(x, y, '.')
ax.set_xlabel('x')
ax.set_ylabel('y')

p = 10  # Number of inputs

# Preallocate a matrix (make it a matrix of ones to account for beta_0):
X = np.ones((N, p)) 
for ii in range(1, p):
    X[:, ii] = np.sin((ii+1)*t)
    

fig, ax = plt.subplots(1) 
ax.plot(t,X)
ax.set_ylim([-1.1, 1.1])
ax.set_xlim([-pi, pi])
ax.set_xlabel('t')
ax.set_ylabel('x')

def linear_model(X, beta):
    """ 
    The linear model

    Parameters
    ----------
    X : 2d array
        The design matrix : a matrix of regressors
    
    beta : 1d array 
        Model coefficients
    """ 
    return np.dot(X, beta)
    

beta = np.random.randn(p)
f = linear_model(X, beta)
fig, ax = plt.subplots(1)
ax.plot(t, f)
ax.set_xlim([-pi, pi])

fig, ax = plt.subplots(1)
ax.plot(X[:, 1], y, '.')

y = f + 3*np.random.randn(*f.shape)

fig, ax = plt.subplots(1)
ax.plot(f, label='f')
ax.plot(y, label='y')
plt.legend()

import scipy.linalg as lin

def ols(X):
    """
    The matrix which solves the OLS regression problem for full-rank X
    """
    return np.dot(lin.pinv(np.dot(X.T, X)), X.T)

beta_hat = np.dot(ols(X), y)

plot(beta, beta_hat, 'o')
plt.xlabel('Real parameter value')
plt.ylabel('Estimate parameter value')

y_hat = np.dot(X, beta_hat)
fig, ax = plt.subplots(1)
ax.plot(y, label='$y$')
ax.plot(y_hat, label='$\hat{y}$')
plt.legend()

H = np.dot(X, ols(X))
H.shape
y_hat = np.dot(H.T, y)

plot(y, y_hat, 'o')
plt.xlabel('The original output') 
plt.ylabel('Linear estimate of the output')
First, finding some nifti or img on the disk. Let's assume we have some data in our current directory:
import nibabel as nib
import os
from os.path import join as pjoin

this_course = 'pna'

def load_images(course):
    if course == 'pna':
        PNA_DATA_PATH = pjoin(os.path.expanduser('~'), 'data', 'qcpna', 'bolddata')
        img_fname = pjoin(PNA_DATA_PATH, 'smallbold.nii.gz')
        if os.path.isfile(img_fname):
            img = nib.load(img_fname)
            print img.shape
            return img
        else:
            print "where is this image: " + fname + "\n"
            return None
                     
    else:
        files_1 = [pjoin('ds107','sub' + "%03d" %i, 'BOLD','task001_run001','meanabold.nii') for i in range(1,15)]
        print [(f,os.path.isfile(f)) for f in files_1]
        #files = [os.path.join(scans_dir, 'snn03055dy%i.img' % i) for i in range(1,13)]
        files = [f for f in files_1 if os.path.isfile(f)]
        return [nib.load(f) for f in files]
    
img = load_images('pna')

# transformation from voxel no to mm

try:
    M = img.get_affine()
except: 
    M = img[0].get_affine()


# mm to voxel no
iM = np.linalg.inv(M)
# coordinates of voxel of interest in mm (MNI space)
posmm = [-20.0, -42, 34]
(x, y, z) = tuple(posmm)
# coordinates in voxel space (in homogenous coordinates)
posvox = np.dot(iM, posmm + [1])
# We grab the spatial part of the output.  Since we want to use it as an 
# index, we need to make it a tuple
i,j,k = tuple(np.round(posvox[:3]).astype(int))

print i,j,k
vdata = img.get_data()[i,j,k+2, :]

import matplotlib.pyplot as plt

fig, (ax1, ax2) = plt.subplots(1,2, figsize=(14,4))
ax1.plot(vdata); ax1.set_title('the time series')
ax2.set_title('the slice at z = %d' %z)
# time t = 0
t = 0
imshow(img.get_data()[:,:,z,t], interpolation='nearest')

x0 = np.asarray([1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1])
x1 = np.ones(12)
X = np.vstack((x0,x1)).T
print X
X_firstmodel = copy(X)

from scipy import linalg as lin

# take only the first 12 values of the data - for 
# scale voxel data for better visual display; make its mean == 3.0
Y = vdata[:12]  
Y = (Y - Y.mean())/np.std(Y)  + 3.0
vdata = Y

pinvX   = lin.pinv(X)
betah   = pinvX.dot(Y)
Yfitted = X.dot(betah)
resid   = Y - Yfitted
print "Y:\n", Y, "\nresid:\n", resid, "\nbetah:\n", betah
print "mean of Y: ", np.mean(Y), "\t mean of resid: ", np.mean(resid)

# make this a little function as we will be reusing it :
def glm(X,Y):
    """ a simple GLM function returning the estimated parameters and residuals """
    betah   =  lin.pinv(X).dot(Y)
    Yfitted =  X.dot(betah)
    resid   =  Y - Yfitted
    return betah, Yfitted, resid


# Plot the results
x = range(12)
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,4))
ax1.plot(x, Y, 'r-', x, Yfitted, 'b-.')
ax2.plot(x, resid, 'g--')

# Again, we will want to reuse this code, let's make a tiny function:
def plot_glm(Y, Yf, r):
    x = range(Y.shape[0])
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,4))
    ax1.plot(x, Y, 'r-', x, Yf, 'b-.')
    ax1.set_title('Y (red) Y fitted (blue)')
    ax2.plot(x, r, 'g--')
    ax2.set_title('residuals')


vdata = vdata[:12]
Y = vdata * 1.
print mean(Y)
x0 = np.asarray([1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1])
x1 = np.hstack((np.ones(6),np.zeros(6)))
x2 = np.hstack((np.zeros(6),np.ones(6)))
x3 = np.ones(12)
X = np.vstack((x0,x1,x2,x3)).T
print X

second_X = X

# Add some signal to our voxel, for the sake of the example:

Y += 0.5*mean(Y)*x1 - 0.5*mean(Y)*x2

print Y, mean(Y)

beta, Yfitted, resid = glm(X,Y)
print beta, mean(Y)
# (beta[1]/2 + beta[2]/2 + beta[3])

# Plot the results
plot_glm(Y, Yfitted, resid)

print X_firstmodel


glm_result = glm(X_firstmodel, Y)
plot_glm(Y, glm_result[1], glm_result[2])
print glm_result[0] # the betas

X = X_firstmodel
x2 = np.hstack((np.ones(6),-np.ones(6)))
X_ = np.vstack((X.T, x2)).T
print X_.T.dot(X_)
glm_result = glm(X_, Y) # original data
plot_glm(Y, glm_result[1], glm_result[2])
print glm_result[0] # the betas

X = second_X
f, (a1, a2, a3) = subplots(1, 3, figsize=(10,3))
a1.imshow(Y[:,np.newaxis], interpolation='nearest', cmap='gray')
a2.imshow(X, interpolation='nearest',cmap='gray')
a3.imshow(resid[:,np.newaxis], interpolation='nearest',cmap='gray')
titles = ['Y','X','e']
for ix,a in enumerate([a1, a2, a3]):
    a.set_xticklabels([])
    a.set_title(titles[ix])
    #a.set_yticklabels([])

X

betah, Yfitted, resid = glm(X,Y) # fit the model
RSS = sum((Y - Yfitted)**2)
MRSS = RSS/(len(Y) - linalg.matrix_rank(X))

print "RSS :", RSS  #, "\nRSS metho 2:", sum(resid**2),  
print "MRSS :", MRSS #, "\nMRSS method 2:", RSS/9 #checking
print betah

cT = np.asarray([0, 1, -1, 0])
c  = cT[:,newaxis]
pXTX = np.linalg.pinv(X.T.dot(X))

t_num = cT.dot(betah)
SE = np.sqrt(MRSS* cT.dot(pXTX).dot(c))
t = t_num / SE

print t

from scipy.stats import t as tdist, norm as ndist

pvalue = 1.0 - tdist.cdf(t,10)
print "t-value = ", t, "p-value = ", pvalue

# if t was 1.8 ?

t = 1.8
pvalue = 1.0 - tdist.cdf(t,10)
print "t-value = ", t, "p-value = ", pvalue


print X
X0 = X[:,[0,3]]
betah, Yfitted, resid = glm(X,Y)
betah0, Yfitted0, resid0 = glm(X0,Y)
PX = X.dot(lin.pinv(X))
RX = np.eye(X.shape[0]) - PX
PX0 = X0.dot(lin.pinv(X0))

F_num = (sum(resid0**2) - sum(resid**2))
nu1 = np.trace(PX - PX0)
F_den = sum(resid**2)
nu2 = np.trace(RX)
F = (F_num/nu1)/(F_den/nu2)
print F, nu1, nu2, F_num, F_den

from scipy.stats import f as Fdist
Fdist.sf(F,nu1,nu2)

def t_test(betah, resid, X):
    """ 
    test the parameters betah one by one - this assumes they are estimable (X full rank)
    
    betah : (p, 1) estimated parameters
    resid : (n, 1) estimated residuals
    X : design matrix
    """

    RSS = sum((resid)**2)
    n = resid.shape[0]
    q = np.linalg.matrix_rank(X)
    df = n-q
    MRSS = RSS/df
    
    XTX = np.linalg.pinv(X.T.dot(X))

    tval = np.zeros(betah.shape)
    pval = np.zeros(betah.shape)


    for (idx, beta) in enumerate(betah):
        c = zeros(betah.shape)
        c[idx] = 1
        t_num = c.T.dot(betah)
        SE = np.sqrt(MRSS* c.T.dot(XTX).dot(c))
        tval[idx] = t_num / SE
   
        pval[idx] = 1.0 - tdist.cdf(tval[idx], df)
    
    return tval, pval

#print t_test(betah, Yfitted, resid)

n = 20
x = np.random.randn(n,1)
X  = np.hstack((x, np.ones(x.shape)))
print X.shape
m_y = 0 
m_y = 3.14156

e = np.random.randn(n,1)
y = 1.4 * x + m_y + e
plt.plot(x,y,'o')


betah, Yfitted, resid = glm(X,y)
#print Yfitted.shape
t, p =  t_test(betah, resid, X)
print " betah = \n", betah
print " t = \n", t
print " p = \n", p

# do this the other way : let's have y be the explanatory variable:

X2 =  np.hstack((y - m_y, np.ones(x.shape)))
y2 = x
betah2, Yfitted2, resid2 = glm(X2, y2)
print "compare the slopes : \t\t", 1./betah2[0,0], betah[0,0]
print "compare the residual variance:\t", resid.T.dot(resid)[0,0], resid2.T.dot(resid2)[0,0]
t2, p2 =  t_test(betah2, resid2, X2)

print "betah2 : \t\t\t", betah2[:,0]
print "compare t values : \n", np.hstack((t, t2))
print "compare p values : \n", np.hstack((p, p2))


# Fisher z transform of correlation coefficient:

def fisher_transf(rho):
    """ take a coefficient of correlation and z transform it 
        see en.wikipedia.org/wiki/Fisher_transformation
    """
    return (0.5 * np.log((1. + rho) / (1. - rho)))
    

corr = np.corrcoef(x[:,0],y[:,0])
z = fisher_transf(corr[0,1])

p_fisher = 1. - ndist.cdf(z, 0, 1./np.sqrt(n-3))
print p_fisher


# An example of a t-test with 20 "voxels"
# from nipy.modalities.fmri.glm import FMRILinearModel


from IPython import display as d

def F_test_reducedX(X, X0, Y):
    betah, Yfitted, resid = glm(X,Y)
    betah0, Yfitted0, resid0 = glm(X0,Y)
    PX = X.dot(lin.pinv(X))
    RX = np.eye(X.shape[0]) - PX
    PX0 = X0.dot(lin.pinv(X0))
    
    F_num = (sum(resid0**2) - sum(resid**2))
    nu1 = np.trace(PX - PX0)
    F_den = sum(resid**2)
    nu2 = np.trace(RX)
    F = (F_num/nu1)/(F_den/nu2)
    return F, Fdist.sf(F,nu1,nu2), nu1, nu2

# Set up a model with two regressors - first uncorrelated

n = 20
age = np.random.randn(n,1)
likes_pies = np.random.randn(n,1)

x0 = age
# assumes likes_pies has no correlation with age : remove the correlation
x1 = likes_pies - (likes_pies.T.dot(age)/(age.T.dot(age))) * age
# check with print x0.T.dot(x1)
x2 = np.ones((n,1))

# 
X = np.hstack((x0,x1,x2))

print X.shape
m_y = pi

sig_e = 1.5
e = sig_e * np.random.normal(size=(n,1))
y = X.dot(np.array([1.0, 1.0, m_y])[:,newaxis])  + e

betah, yfitted, resid = glm(X,y)
plot_glm(y, yfitted, resid)
t, p =  t_test(betah, resid, X)

F, pF, nu1, nu2 = F_test_reducedX(X, X[:,[2]], y)
print betah, "\n", t, "\n", p, "\n", F, nu1, nu2, pF


# Set up a model with two regressors -  correlated

n = 20
age = np.random.randn(n,1)
likes_pies = np.random.randn(n,1)

x0 = age + .1*likes_pies
x1 = age - .1*likes_pies
# can you tell what is the expected correlation ?
print corrcoef(x0.T, x1.T)[0,1]
x2 = np.ones((n,1))

# 
X = np.hstack((x0,x1,x2))
m_y = pi

sig_e = 1.5
e = sig_e * np.random.normal(size=(n,1))
y = X.dot(np.array([1.0, 1.0, m_y])[:,newaxis])  + e

betah, yfitted, resid = glm(X,y)
plot_glm(y, yfitted, resid)
t, p =  t_test(betah, resid, X)

F, pF, nu1, nu2 = F_test_reducedX(X, x2, y)
print betah, "\n", t, "\n", p, "\n", F, nu1, nu2, pF


HOME = os.path.expanduser('~')
d.Image(filename=pjoin(HOME, 'code', 'pna-notebooks', 't_F_test.jpg'))