%pylab inline

import urllib
url = ("http://staffwww.dcs.shef.ac.uk/"
    + "people/N.Lawrence/dataset_mirror/"
    + "olympic_marathon_men/olympicMarathonTimes.csv")
urllib.urlretrieve(url, 'olympicMarathonTimes.csv')
olympics = np.loadtxt('olympicMarathonTimes.csv', delimiter=',')  

x = olympics[:, 0:1]
y = olympics[:, 1:2]

plt.plot(x, y, 'rx')

# set prior variance on w
alpha = 4.
# set the order of the polynomial basis set
order = 5
# set the noise variance
sigma2 = 0.01

num_data = x.shape[0]
num_pred_data = 100 # how many points to use for plotting predictions
x_pred = linspace(1890, 2016, num_pred_data)[:, None] # input locations for predictions

# build the basis set
Phi = np.zeros((num_data, order+1))
Phi_pred = np.zeros((num_pred_data, order+1))
for i in xrange(0, order+1):
    Phi[:, i:i+1] = x**i
    Phi_pred[:, i:i+1] = x_pred**i

w_vec = np.random.normal(size=200)

print 'w sample mean is ', w_vec.mean()
print 'w sample variance is ', w_vec.var()

phi = 7
f_vec = phi*w_vec
print 'True mean should be phi*0 = 0.'
print 'True variance should be phi*phi*1 = ', phi*phi
print 'f sample mean is ', f_vec.mean()
print 'f sample variance is ', f_vec.var()

mu = 4 # mean of the distribution
alpha = 2 # variance of the distribution
w_vec = np.random.normal(size=200)*np.sqrt(alpha) + mu
print 'w sample mean is ', w_vec.mean()
print 'w sample variance is ', w_vec.var()

# First the standard normal
z_vec = np.random.normal(size=1000) # by convention, in statistics, z is often used to denote samples from the standard normal
w_vec = z_vec*np.sqrt(alpha) + mu
# plot normalized histogram of w, and then normalized histogram of z on top
plt.hist(w_vec, bins=30, normed=True)
plt.hist(z_vec, bins=30, normed=True)
plt.legend(('$w$', '$z$'))

K = order + 1
z_vec = np.random.normal(size=K)
w_sample = z_vec*np.sqrt(alpha)
print w_sample

f_sample = np.dot(Phi_pred,w_sample)
plt.plot(x_pred.flatten(), f_sample.flatten(), 'r-')

span = np.max(x) - np.min(x) 
offset = np.min(x)
x -= offset
x_pred -= offset
x /= span # x is now between zero and 1
x_pred /= span 
x = x*2-1 # x is now between -1 and 1
x_pred = x_pred*2 - 1 

# rebuild the basis set
Phi = np.zeros((num_data, order+1))
Phi_pred = np.zeros((num_pred_data, order+1))
for i in xrange(0, order+1):
    Phi[:, i:i+1] = x**i
    Phi_pred[:, i:i+1] = x_pred**i

f_sample = np.dot(Phi_pred,w_sample)
plt.plot(x_pred.flatten(), f_sample.flatten(), 'r-')

num_samples = 10
K = order+1
for i in xrange(num_samples):
    z_vec = np.random.normal(size=K)
    w_sample = z_vec*np.sqrt(alpha)
    f_sample = np.dot(Phi_pred,w_sample)
    plt.plot(x_pred.flatten(), f_sample.flatten())


# compute the posterior covariance and mean
w_cov = np.linalg.inv(1/sigma2*np.dot(Phi.T, Phi) + 1/alpha*np.eye(order+1))
w_mean = np.dot(w_cov, 1/sigma2*np.dot(Phi.T, y))

w_sample = np.random.multivariate_normal(w_mean.flatten(), w_cov)
f_sample = np.dot(Phi_pred,w_sample)
plt.plot(x_pred.flatten(), f_sample.flatten(), 'r-')
plt.plot(x, y, 'rx') # plot data to show fit.

for i in xrange(num_samples):
    w_sample = np.random.multivariate_normal(w_mean.flatten(), w_cov)
    f_sample = np.dot(Phi_pred,w_sample)
    plt.plot(x_pred.flatten(), f_sample.flatten())
plt.plot(x, y, 'rx') # plot data to show fit.

K = 10 # how many Gaussians to add.
num_samples = 1000 # how many samples to have in y_vec
mus = np.linspace(0, 5, K) # mean values generated linearly spaced between 0 and 5
sigmas = np.linspace(0.5, 2, K) # sigmas generated linearly spaced between 0.5 and 2
y_vec = np.zeros(num_samples)
for mu, sigma in zip(mus, sigmas):
    z_vec = np.random.normal(size=num_samples) # z is from standard normal
    y_vec += z_vec*sigma + mu # add to y z*sigma + mu

# now y_vec is the sum of each scaled and off set z.
print 'Sample mean is ', y_vec.mean(), ' and sample variance is ', y_vec.var()
print 'True mean should be ', mus.sum()
print 'True variance should be ', (sigmas**2).sum(), ' standard deviation ', np.sqrt((sigmas**2).sum()) 

plt.hist(y_vec, bins=30, normed=True)
plt.legend('$y$')

# compute mean under posterior density
f_pred_mean = np.dot(Phi_pred, w_mean)

# print the error and plot the predictions
plt.plot(x_pred, f_pred_mean)
plt.plot(x, y, 'rx')
ax = plt.gca()
ax.set_title('Predictions for Order ' + str(order))
ax.set_xlabel('year')
ax.set_ylabel('pace (min/km)')

f_mean = np.dot(Phi, w_mean)

# compute the sum of squares term
sum_squares = ((y-f_mean)**2).sum()
# fit the noise variance
error = (num_data/2*np.log(sigma2) + sum_squares/(2*sigma2))
print 'The error is: ',error

# compute the error bars
f_pred_cov = np.dot(Phi_pred, np.dot(w_cov, Phi_pred.T))
f_pred_var = np.diag(f_pred_cov)[:, None]
f_pred_std = np.sqrt(f_pred_var)

# plot mean, and error bars at 2 standard deviations
plt.plot(x_pred.flatten(), f_pred_mean.flatten(), 'b-')
plt.plot(x_pred.flatten(), (f_pred_mean+2*f_pred_std).flatten(), 'b--')
plt.plot(x_pred.flatten(), (f_pred_mean-2*f_pred_std).flatten(), 'b--')
plt.plot(x, y, 'rx')

# set prior variance on w
alpha = 4.
# set the order of the polynomial basis set
order = 5
# set the noise variance
sigma2 = 0.01

span = np.max(x) - np.min(x) 
offset = np.min(x)
x -= offset
x /= span # x is now between zero and 1
x = x*2-1 # x is now between -1 and 1

num_data = x.shape[0]
num_pred_data = 100 # how many points to use for plotting predictions
x_pred = linspace(-1.2, 1.2, num_pred_data)[:, None] # input locations for predictions

# build the basis set
Phi = np.zeros((num_data, order+1))
Phi_pred = np.zeros((num_pred_data, order+1))
for i in range(0, order+1):
    Phi[:, i:i+1] = x**i
    Phi_pred[:, i:i+1] = x_pred**i

num_samples = 10
K = order+1
for i in xrange(num_samples):
    z_vec = np.random.normal(size=K)
    w_sample = z_vec*np.sqrt(alpha)
    f_sample = np.dot(Phi_pred,w_sample)
    plt.plot(x_pred.flatten(), f_sample.flatten())


K = alpha*np.dot(Phi_pred, Phi_pred.T)
for i in xrange(10):
    f_sample = np.random.multivariate_normal(mean=np.zeros(x_pred.size), cov=K)
    plt.plot(x_pred.flatten(), f_sample.flatten())

plt.imshow(K)
plt.colorbar()

K = alpha*np.dot(Phi_pred, Phi_pred.T) + sigma2*np.eye(x_pred.size)
for i in xrange(10):
    y_sample = np.random.multivariate_normal(mean=np.zeros(x_pred.size), cov=K)
    plt.plot(x_pred.flatten(), y_sample.flatten())

sigma2 = 1.
K = alpha*np.dot(Phi_pred, Phi_pred.T) + sigma2*np.eye(x_pred.size)
for i in xrange(10):
    y_sample = np.random.multivariate_normal(mean=np.zeros(x_pred.size), cov=K)
    plt.plot(x_pred.flatten(), y_sample.flatten())

import urllib
url = ("http://staffwww.dcs.shef.ac.uk/"
    + "people/N.Lawrence/dataset_mirror/"
    + "olympic_marathon_men/olympicMarathonTimes.csv")
urllib.urlretrieve(url, 'olympicMarathonTimes.csv')
olympics = np.loadtxt('olympicMarathonTimes.csv', delimiter=',')
x = olympics[:, 0:1]
#x_pred = linspace(1892, 2016, num_pred_data)[:, None]
x_pred = linspace(0, 4, num_pred_data)[:, None]

def kern(x, xprime, variance=1.0, lengthscale=1.0):
    return 64*np.exp((-0.5*(x - xprime)**2)/lengthscale**2) + 16*min(x, xprime) + 64*x*xprime
    
alpha = 1.0
lengthscale = 1
K = np.zeros((x_pred.size, x_pred.size))
for i in xrange(x_pred.size):
    for j in xrange(x_pred.size):
        K[i, j] = kern(x_pred[i], x_pred[j], variance=alpha, lengthscale=lengthscale)


plt.imshow(K,interpolation='none')
plt.colorbar()

for i in xrange(10):
    y_sample = np.random.multivariate_normal(mean=np.zeros(x_pred.size), cov=K)
    plt.plot(x_pred.flatten(), y_sample.flatten())

import urllib
url = ("http://staffwww.dcs.shef.ac.uk/"
    + "people/N.Lawrence/dataset_mirror/"
    + "olympic_marathon_men/olympicMarathonTimes.csv")
urllib.urlretrieve(url, 'olympicMarathonTimes.csv')
olympics = np.loadtxt('olympicMarathonTimes.csv', delimiter=',')
x = olympics[:, 0:1]
x_pred = linspace(1892, 2016, num_pred_data)[:, None]

alpha = 1.0
lengthscale = 4.
K = np.zeros((x_pred.size, x_pred.size))
for i in xrange(x_pred.size):
    for j in xrange(x_pred.size):
        K[i, j] = kern(x_pred[i], x_pred[j], variance=alpha, lengthscale=lengthscale)
        
for i in xrange(10):
    y_sample = np.random.multivariate_normal(mean=np.zeros(x_pred.size), cov=K)
    plt.plot(x_pred.flatten(), y_sample.flatten())

# set covariance function parameters
alpha = 16.0
lengthscale = 8
# set noise variance
sigma2 = 0.05

# compute covariance for training data, x
K = np.zeros((x.size, x.size))
for i in xrange(x.size):
    for j in xrange(x.size):
        K[i, j] = kern(x[i], x[j], variance=alpha, lengthscale=lengthscale)

# compute covariance between training data, x, and test data, x_pred
K_star = np.zeros((x.size, x_pred.size))
for i in xrange(x.size):
    for j in xrange(x_pred.size):
        K_star[i, j] = kern(x[i], x_pred[j], variance=alpha, lengthscale=lengthscale)
        
# compute covariance for test data, x_pred
K_starstar = np.zeros((x_pred.size, x_pred.size))
for i in xrange(x_pred.size):
    for j in xrange(x_pred.size):
        K_starstar[i, j] = kern(x_pred[i], x_pred[j], variance=alpha, lengthscale=lengthscale)

full_K = np.vstack([np.hstack([K, K_star]), np.hstack([K_star.T, K_starstar])])
plt.imshow(full_K)
plt.colorbar

a = np.dot(np.linalg.inv(K + sigma2*eye(x.size)), K_star)
mu_f = np.dot(a.T, y)
C_f = K_starstar - np.dot(a.T, K_star)

plt.imshow(C_f)
plt.colorbar

plt.plot(x, y, 'rx')
plt.plot(x_pred, mu_f, 'b-')

var_f = np.diag(C_f)[:, None]
std_f = np.sqrt(var_f)

plt.plot(x, y, 'rx')
plt.plot(x_pred, mu_f, 'b-')
plt.plot(x_pred, mu_f+2*std_f, 'b--')
plt.plot(x_pred, mu_f-2*std_f, 'b--')

%pylab inline
import numpy as np
import pylab as pb
import GPy

d = 1          # input dimension
var = 1.       # variance
theta = 0.2    # lengthscale
k = GPy.kern.rbf(d, variance=var, lengthscale=theta) + GPy.kern.Brownian(1, variance=10)

print k

k.plot()

k = GPy.kern.rbf(d)     # By default, the parameters are set to 1.
theta = np.asarray([0.2,0.5,1.,2.,4.])
for t in theta:
    k['.*lengthscale']=t
    k.plot()
pb.legend(theta)
# Exercise 1 a) answer
# Exercise 1 b) answer


kb = GPy.kern.Brownian(input_dim=1)
inputs = np.array([2., 4.])
for x in inputs:
    kb.plot(x,plot_limits=[0,5])
pb.legend(inputs)
pb.ylim(-0.1,5.1)

k = GPy.kern.Matern52(input_dim=2)
X = np.random.rand(50,2)       # 50*2 matrix of iid standard Gaussians
C = k.K(X,X)
eigvals = np.linalg.eigvals(C)           # Computes the eigenvalues of a matrix
pb.bar(np.arange(len(eigvals)), eigvals)
title('Eigenvalues of the Matern 5/2 Covariance')
# Exercise 2 a) answer# Exercise 2 b) answer
kern1 = GPy.kern.rbf(1, variance=1., lengthscale=2.)
kern2 = GPy.kern.Matern52(1, variance=2., lengthscale=4.)
kern = kern1 + kern2
print kern
kern.plot()

kern = kern1*kern2
print kern
kern.plot()

k = GPy.kern.
X = X[:,None] # reshape X to make it n*p --- we try to use 'design matrices' in GPy 

mu = np.zeros((500)) # vector of the means --- we could use a mean function here, but here it is just zero.
C = k.K(X,X) # compute the covariance matrix associated with inputs X

# Generate 20 separate samples paths from a Gaussian with mean mu and covariance C
Z = np.random.multivariate_normal(mu,C,20)

pb.figure()     # open a new plotting window
for i in range(2):
    pb.plot(X[:],Z[i,:])

pb.matshow(C)

# Try plotting sample paths here
# Exercise 3 answer
np.random.normal?

X = np.linspace(0.05,0.95,10)[:,None]
Y = -np.cos(np.pi*X) + np.sin(4*np.pi*X) + np.random.normal(loc=0.0, scale=0.1, size=(10,1)) 
pb.figure()
pb.plot(X,Y,'kx',mew=1.5)

k = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.)

m = GPy.models.GPRegression(X,Y,k)

print m

m.plot()
# Exercise 4 a) answer
# Exercise 4 b) answer

# Exercise 4 c) answer

m.constrain_positive('.*') # Constrains all parameters matching .* to be positive, i.e. everything

m.optimize()
m.plot()
print m

GPy.util.datasets.authorize_download = True # prevents requesting authorization for download.
data = GPy.util.datasets.olympic_marathon_men()
print data['details']

X = data['X']
Y = data['Y']
pb.plot(X, Y, 'bx')
pb.xlabel('year')
pb.ylabel('marathon pace min/km')

# Exercise 5 a) answer 
kern = GPy.kern.rbf(1, lengthscale=80)*GPy.kern.Matern52(1, lengthscale=10) + GPy.kern.bias(1)
model = GPy.models.GPRegression(X, Y, kern)
model.optimize()
model.plot()# Exercise 5 d) answer
model.log_likelihood()
print model

# Exercise 5 b) answer


# Exercise 5 c) answer

# Exercise 5 d) answer

# Exercise 5 e) answer

# Definition of the Branin test function
def branin(X):
    y = (X[:,1]-5.1/(4*np.pi**2)*X[:,0]**2+5*X[:,0]/np.pi-6)**2
    y += 10*(1-1/(8*np.pi))*np.cos(X[:,0])+10
    return(y)

# Training set defined as a 5*5 grid:
xg1 = np.linspace(-5,10,5)
xg2 = np.linspace(0,15,5)
X = np.zeros((xg1.size * xg2.size,2))
for i,x1 in enumerate(xg1):
    for j,x2 in enumerate(xg2):
        X[i+xg1.size*j,:] = [x1,x2]

Y = branin(X)[:,None]

# Fit a GP
# Create an exponentiated quadratic plus bias covariance function
kg = GPy.kern.rbf(input_dim=2, ARD = True)
kb = GPy.kern.bias(input_dim=2)
k = kg + kb

# Build a GP model
m = GPy.models.GPRegression(X,Y,k,normalize_Y=True)

# constrain parameters to be bounded
m.constrain_bounded('.*rbf_var',1e-3,1e5)
m.constrain_bounded('.*bias_var',1e-3,1e5)
m.constrain_bounded('.*rbf_len',.1,200.)

# fix the noise variance
m.constrain_fixed('.*noise',1e-5)

# Randomize the model and optimize
m.randomize()
m.optimize()

# Plot the resulting approximation to Brainin
# Here you get a two-d plot becaue the function is two dimensional.
m.plot()

# Compute the mean of model prediction on 1e5 Monte Carlo samples
Xp = np.random.uniform(size=(1e5,2))
Xp[:,0] = Xp[:,0]*15-5
Xp[:,1] = Xp[:,1]*15
Yp = m.predict(Xp)[0]
np.mean(Yp)
# Exercise 6 a) answer 
# Exercise 6 b) answer

# Exercise 7 a) answer

# Exercise 7 b) answer

# Exercise 7 c) answer
# Exercise 8 a) answer
# Exercise 8 b) answer