def linear_function(x,a,b):
    """ 
    A linear function
    
    Parameters
    ----------
    x : ndarray
        Input variable
    
    a : float
        offset parameter
    
    b : float 
        slope parameter
    """
    return a + b * x 


N = 100
x = np.linspace(-pi, pi, N)

a = np.random.randn() * 10
b = np.random.randn() * 10
y = linear_function(x, a, b)

fig, ax = plt.subplots(1)
ax.plot(x, y, '.')
ax.set_xlabel('x')
ax.set_ylabel('y')


t = np.linspace(-pi, pi, N)
x = np.sin(t)

fig, ax = plt.subplots(1)
ax.plot(x)

y = linear_function(x, a, b)
fig, ax = plt.subplots(1)
ax.plot(y, '.')

fig, ax = plt.subplots(1)
ax.plot(x, y, '.')
ax.set_xlabel('x')
ax.set_ylabel('y')

p = 10  # Number of inputs

# Preallocate a matrix (make it a matrix of ones to account for beta_0):
X = np.ones((N, p)) 
for ii in range(1, p):
    X[:, ii] = np.sin((ii+1)*t)
    

fig, ax = plt.subplots(1) 
ax.plot(t,X)
ax.set_ylim([-1.1, 1.1])
ax.set_xlim([-pi, pi])
ax.set_xlabel('t')
ax.set_ylabel('x')

def linear_model(X, beta):
    """ 
    The linear model

    Parameters
    ----------
    X : 2d array
        The design matrix : a matrix of regressors
    
    beta : 1d array 
        Model coefficients
    """ 
    return np.dot(X, beta)
    

A = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
print A.shape
B = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
print B.shape

C = np.dot(A,B)
print C.shape
print C[2,1]

D = np.dot(B,A)
print D.shape
print D[2,1]

I = np.eye(4)
IB = np.dot(I, B)

beta = np.random.randn(p)
f = linear_model(X, beta)
fig, ax = plt.subplots(1)
ax.plot(t, f)
ax.set_xlim([-pi, pi])

fig, ax = plt.subplots(1)
ax.plot(X[:, 1], y, '.')

y = f + np.random.randn(*f.shape)

fig, ax = plt.subplots(1)
ax.plot(f, label='f')
ax.plot(y, label='y')
plt.legend()

import scipy.linalg as la

U,D,V = la.svd(X, full_matrices = False) # We call full_matrices = False, to conform with the conventions in the text

# We convert D to a diagonal matrix:
D = np.diag(D)
fig, ax = plt.subplots(1) 
im = ax.matshow(D)
fig.colorbar(im)

fig, ax = plt.subplots(1) 
im = ax.matshow(np.dot(U.T, U))
fig.colorbar(im)

fig, ax = plt.subplots(1) 
im = ax.matshow(np.dot(V.T, V))
fig.colorbar(im)

np.allclose(np.dot(np.dot(U, D), V), X)

def ols(X):
    """
    The matrix which solves the OLS regression problem for full-rank X
    """
    return np.dot(la.pinv(np.dot(X.T, X)), X.T)

beta_hat = np.dot(ols(X), y)

plot(beta, beta_hat, 'o')
plt.xlabel('Real parameter value')
plt.ylabel('Estimate parameter value')

y_hat = np.dot(X, beta_hat)
fig, ax = plt.subplots(1)
ax.plot(y, label='$y$')
ax.plot(y_hat, label='$\hat{y}$')
plt.legend()

H = np.dot(X, ols(X))
H.shape
y_hat = np.dot(H.T, y)

plot(y, y_hat, 'o')
plt.xlabel('The original output') 
plt.ylabel('Linear estimate of the output')

y_hat = []

for ii in range(1000):
    y = f + np.random.randn(*f.shape)
    y_hat.append(np.dot(H, y))


plot(f)
plot(np.mean(y_hat,0))

y_hat = []

for ii in range(1000):
    y = f + np.random.rand(*f.shape)
    y_hat.append(np.dot(H, y))


plot(f)
plot(np.mean(y_hat,0))

plot(np.std(y_hat,0))

def ridge_regression_matrix(X, L):
    """" 
    Calculate the matrix for ridge regression
    
    Parameters 
    ----------
    X : 2d array
        The design matrix
    
    L : float
        ridge parameter for regularization    
    """
    return np.dot(la.pinv(np.dot(X.T,X) + L * np.eye(X.shape[-1])), X.T)

beta_ridge = np.dot(ridge_regression_matrix(X,10), y) 

fig, ax = plt.subplots(1)
ax.plot(beta_hat, beta_ridge, 'o')
ax.plot([-2, 2],[-2, 2],'k--')
ax.plot([-2, 2],[0, 0],'k--')
ax.plot([0, 0],[-2, 2],'k--')
ax.set_xlabel(r'$\beta(OLS)$')
ax.set_ylabel(r'$\beta(Ridge)$')


H_ridge = np.dot(X, ridge_regression_matrix(X,10))

y_hat = []
y_hat_ridge = []

for ii in range(1000):
    y = f + np.random.randn(*f.shape)
    y_hat.append(np.dot(H, y))
    y_hat_ridge.append(np.dot(H_ridge, y))

plot(f)
plot(np.mean(y_hat,0))
plot(np.mean(y_hat_ridge, 0))

plot(np.std(y_hat,0))
plot(np.std(y_hat_ridge,0))


x_beta_hat = np.dot(np.dot(U, U.T), y)
plot(y)
plot(x_beta_hat)

y_coords_in_U = np.dot(U.T, y)
plot(y_coords_in_U, beta_hat, '.')

plot(U)

matshow(np.dot(U.T, U)) # Shows that U is an orthonormal basis set

D_sq = np.dot(D, D)
lamb = 10
UD = np.dot(U,D)
Dsq_LI_inv = la.pinv(D_sq + lamb*np.eye(D_sq.shape[0]))
DUt = np.dot(D, U.T)
svdRR = np.dot(np.dot(UD, Dsq_LI_inv), DUt)
x_b_ridge_svd = np.dot(svdRR, y)

plot(f)
plot(x_b_ridge_svd)