%pylab inline
import numpy as np
import pylab as pb
import GPy

GPy.util.datasets.authorize_download = lambda x: True # prevents requesting authorization for download.
data = GPy.util.datasets.olympic_sprints()
X = data['X']
y = data['Y']
print data['info'], data['details']

print data['citation']

print data['output_info']

print 'First column of X contains the olympic years.'
print data['X'][:, 0]
print 'Second column of X contains the event index.'
print data['X'][:, 1]

markers = ['bo', 'ro', 'bx', 'rx', 'bs', 'rs']
for i in range(6):
    # extract the event 
    x_event = X[np.nonzero(X[:, 1]==i), 0]
    y_event = y[np.nonzero(X[:, 1]==i), 0]
    pb.plot(x_event, y_event, markers[i])
pb.title('Olympic Sprint Times')
xlabel('year')
ylabel('time/s')

GPy.kern.coregionalize?

kern = GPy.kern.rbf(1, lengthscale=80)**GPy.kern.coregionalize(output_dim=6, rank=5)

model = GPy.models.GPRegression(X, y, kern)
model.optimize()

for i in range(6):
    model.plot(fignum=1,fixed_inputs=[(1, i)])
xlabel('years')
ylabel('time/s')

kern1 = GPy.kern.rbf(1, lengthscale=80)**GPy.kern.coregionalize(output_dim=6, rank=1)
kern2 = GPy.kern.bias(1)**GPy.kern.coregionalize(output_dim=6, rank=1)
kern = kern1 + kern2

model = GPy.models.GPRegression(X, y, kern)
model.optimize()

for i in range(6):
    model.plot(fignum=1,fixed_inputs=[(1, i)])
xlabel('years')
ylabel('time/s')

kern1 = GPy.kern.rbf(1, lengthscale=80) + GPy.kern.bias(1)
kern2 = GPy.kern.coregionalize(output_dim=6, rank=5)
kern = kern1**kern2

model = GPy.models.GPRegression(X, y, kern)
model.optimize()

for i in range(6):
    model.plot(fignum=1,fixed_inputs=[(1, i)])
xlabel('years')
ylabel('time/s')

# Question 1 answer here

X2 = np.zeros((X.shape[0], 3))
X2[:, 0] = X[:, 0]
X2[:, 1] = np.remainder(X[:, 1],2) == 1
X2[:, 2] = np.floor(X[:, 1]/2)
print X2[:, 2]

kern = ((GPy.kern.rbf(1, lengthscale=80)
         **GPy.kern.coregionalize(output_dim=2, rank=2)
         **GPy.kern.coregionalize(output_dim=3,rank=2))
        + GPy.kern.bias(1)**GPy.kern.coregionalize(output_dim=3,rank=0)**GPy.kern.coregionalize(output_dim=3,rank=0))
        #+ (GPy.kern.bias(1)
        #   **GPy.kern.coregionalize(output_dim=2, rank=0)
        #   **GPy.kern.coregionalize(output_dim=3,rank=0)))
model = GPy.models.GPRegression(X2, y, kern) 

model.optimize()

for sex in range(2):
    for event in range(3):
        model.plot(fignum=1,fixed_inputs=[(1, sex), (2, event)])
xlabel('years')
ylabel('time/s')

print model

X = np.sort(np.random.rand(50,1)*12,0)
k = GPy.kern.rbf(1)
K = k.K(X)
K+= np.eye(50)*0.01 # add some independence (noise) to K
y = np.random.multivariate_normal(np.zeros(50), K).reshape(50,1)

m = GPy.models.GPRegression(X,y)
m.optimize()
m.plot()
mu, var = m._raw_predict(X) # this fetches the posterior of f
pb.vlines(X[:,0], mu[:,0]-2.*np.sqrt(var[:,0]), mu[:,0]+2.*np.sqrt(var[:,0]),color='r')

# Exercise 2 answer here

Z = np.random.rand(3,1)*12
m = GPy.models.SparseGPRegression(X,y,Z=Z)
print m

mu, var = m._raw_predict(Z) 
pb.vlines(Z[:,0], mu[:,0]-2.*np.sqrt(var[:,0]), mu[:,0]+2.*np.sqrt(var[:,0]),color='r')

# Exercise 3 a answer
# Exercise 3 b answer
%pylab inline
import numpy as np
import GPy
import pylab as pb
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import cPickle as pickle
import urllib

urllib.urlretrieve('http://staffwww.dcs.sheffield.ac.uk/people/M.Zwiessele/gpss/lab2/EastTimor.pickle', 'EastTimor2.pickle')

#Load the data
with open("./EastTimor2.pickle","rb") as f:
    X,y,polygons = pickle.load(f)

#Visualize a map of East-Timor
fig = pb.figure()
ax = fig.add_subplot(111)
for p in polygons:
    ax.add_collection(PatchCollection([Polygon(p)],facecolor="#F4A460"))
ax.set_xlim(124.,127.5)
ax.set_ylim(-9.6,-8.1)
ax.set_xlabel("longitude")
ax.set_ylabel("latitude")

#Define the model
kern = GPy.kern.rbf(2)
m = GPy.models.GPClassification(X,y, kernel=kern)
m.update_likelihood_approximation() #Unlike regression we need to add this step.
print m
m.plot(ax=ax)

m.update_likelihood_approximation() #Unlike regression we need to add this step.
m.optimize()
print m
m.plot()

# Exercise 5 answer here

# redownload the marathon data from yesterday and plot
GPy.util.datasets.authorize_download = lambda x: True # prevents requesting authorization for download.
data = GPy.util.datasets.olympic_marathon_men()
X = data['X']
Y = data['Y']

pb.plot(X, Y, 'bx')
pb.xlabel('year')
pb.ylabel('marathon pace min/km')

GPy.likelihoods.noise_model_constructors.student_t?

GPy.models.GPRegression?

# make a student t likelihood with standard parameters
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=5, sigma2=2)
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution)

kern = GPy.kern.rbf(1, lengthscale=10) + GPy.kern.bias(1)
model = GPy.models.GPRegression(X, Y, kernel=kern, likelihood=stu_t_likelihood)
model.constrain_positive('t_noise')

model.optimize()
model.plot()
#model.log_likelihood()
print model

# Exercise 6 answer

import urllib
from scipy import io
import pylab as pb
import numpy as np
import pylab as pb
import GPy
urllib.urlretrieve('http://www.cs.nyu.edu/~roweis/data/olivettifaces.mat', 'faces.mat')
face_data = io.loadmat('faces.mat')

faces = face_data['faces'].T
pb.imshow(faces[120].reshape(64,64,order='F'),
interpolation='nearest',cmap=pb.cm.gray)

urllib.urlretrieve(\
'http://staffwww.dcs.sheffield.ac.uk/people/J.Hensman/gpsummer/datasets/has_glasses.np',
'has_glasses.np')
y = np.load('has_glasses.np')
y = np.where(y=='y',1,0).reshape(-1,1)

index = np.random.permutation(faces.shape[0])
num_training = 200
Xtrain = faces[index[:num_training],:]
Xtest = faces[index[num_training:],:]
ytrain = y[index[:num_training],:]
ytest = y[index[num_training:]]

from scipy import cluster
M = 8
Z, distortion = cluster.vq.kmeans(Xtrain,M)

k = GPy.kern.rbf(4096,lengthscale=50) + GPy.kern.white(4096,0.001)
m = GPy.models.SparseGPClassification(Xtrain, ytrain, kernel=k, Z=Z, normalize_X=True)
m.update_likelihood_approximation()
m.optimize()

pb.figure()
pb.imshow(m.dL_dZ()[0].reshape(64,64,order='F'),interpolation='nearest',cmap=pb.cm.gray)
# Exercise 7 a) answer here# Exercise 7 b) answer here# Exercise 7 c) answer here