from __future__ import division #why?
import numpy as np
import matplotlib.pyplot as pl
%matplotlib inline
""" This is code for simple GP regression. 
It assumes a zero mean GP Prior """;

# This is the true unknown function we are trying to approximate
f = lambda x: np.sin(0.9*x).flatten() #flatten makes it 1-D
#f = lambda x: (0.25*(x**2)).flatten()
np.random.seed(123) #comment out for a consistent experience

# Define the kernel
def kernel(x, xp):
    """ GP squared exponential kernel """
    theta = 0.1
    sqdist = np.sum(x**2,1).reshape(-1,1) + np.sum(xp**2,1) - 2*np.dot(x, xp.T)
    return np.exp(-.5 * (1/theta) * sqdist)

kernel(np.array([[1],[2]]),np.array([[3],[2],[1]]))

kernel(np.array([[1,0,0],[2,0,0]]),np.array([[3,0,0],[2,0,0],[1,0,0]]))

n = 10         # number of training points.
N = 200         # number of test points. (star)
s = 0.05       # noise variance.

# Sample some input points and noisy versions of the function evaluated at
# these points. 
X = np.random.uniform(-5, 5, size=(n,1)) # "training"
y = f(X) + s*np.random.randn(n)

K = kernel(X, X)
# points we're going to make predictions at.
Xtest = np.linspace(-5, 5, N ).reshape(-1,1)
Ktest = kernel(X, Xtest)

L = np.linalg.cholesky(K + s*np.eye(n)) # i(eye)dentity added...
#...for numerical stabilization

# compute the mean at our test points.
u = np.linalg.solve(L, Ktest) # = something like LKstar
v=np.linalg.solve(L, y)
mu=np.dot(u.T, v)
#another way to calculate is commented out
#w=np.linalg.solve(L.T,v)
#mu = np.dot( Ktest.T , w )

# compute the variance at our test points.
Kstarstar = kernel(Xtest, Xtest)
s2 = np.diag(Kstarstar) - np.sum(u**2, axis=0) #I don't know how this works!
ss  = np.sqrt(s2)

# PLOTS:
import seaborn as sns
pl.figure(1)
pl.clf()

pl.gca().fill_between(Xtest.flat, mu-2*ss, mu+2*ss, color="#dddddd")
pl.scatter(X, y, s=200, marker='+', c='black')
pl.plot(Xtest, f(Xtest), 'b-')
pl.plot(Xtest, mu, 'r--', lw=2)
pl.title('Mean predictions plus 2 st.deviations')
pl.axis([-5, 5, -3, 3])
#pl.savefig('predictive.png', bbox_inches='tight')
pl.show()

# draw samples from the prior at our test points.
npr_samples=20
L = np.linalg.cholesky(Kstarstar  + 1e-6*np.eye( N ))
f_prior = np.dot(L, np.random.normal(size=(N,npr_samples)))
#notice no data goes in f_prior

pl.figure(2)
pl.clf()

#pl.plot(Xtest, f_prior)
sns.tsplot(f_prior.T, time=Xtest, err_style='unit_points')
pl.title(str(npr_samples)+' samples from the GP prior')
pl.axis([-5, 5, -3, 3])
#pl.savefig('prior.png', bbox_inches='tight')
pl.show()

# draw samples from the posterior at test points.

#commented out is a more direct way to get L
#L = np.linalg.cholesky(K_  
#- np.dot(  np.dot( kernel(X,Xtest).T
#    , np.linalg.inv(K) 
#)   
#    ,  kernel(X,Xtest) ) 
#+ 1e-6*np.eye(N)
#)
nps_samples=20
L = np.linalg.cholesky(Kstarstar + 1e-6*np.eye(N) - np.dot(u.T, u))
f_post = mu.reshape(-1,1) + np.dot(L, np.random.normal(size=(N,npr_samples)))

pl.figure(3)
pl.clf()

pl.plot(Xtest, f(Xtest), 'b-')
#pl.plot(Xtest, f_post) #mpl plot
sns.tsplot(f_post.T, time=Xtest , err_style='unit_points')
pl.scatter(X, y, s=1000, marker='+', c='red',alpha=1) #how do you make the markers more visible??
#pl.plot(Xtest, mu, 'r--', lw=2)
#^^^turns out the theoretical mu is close to the estimated mu from tsplot 
pl.title(str(npr_samples)+' samples from the GP posterior')
pl.axis([-5, 5, -3, 3])
#pl.savefig('post.png', bbox_inches='tight')

pl.show()