%pylab inline import numpy as np import pylab as pb import GPy GPy.util.datasets.authorize_download = lambda x: True # prevents requesting authorization for download. data = GPy.util.datasets.olympic_sprints() X = data['X'] y = data['Y'] print data['info'], data['details'] print data['citation'] print data['output_info'] print 'First column of X contains the olympic years.' print data['X'][:, 0] print 'Second column of X contains the event index.' print data['X'][:, 1] markers = ['bo', 'ro', 'bx', 'rx', 'bs', 'rs'] for i in range(6): # extract the event x_event = X[np.nonzero(X[:, 1]==i), 0] y_event = y[np.nonzero(X[:, 1]==i), 0] pb.plot(x_event, y_event, markers[i]) pb.title('Olympic Sprint Times') xlabel('year') ylabel('time/s') GPy.kern.coregionalize? kern = GPy.kern.rbf(1, lengthscale=80)**GPy.kern.coregionalize(output_dim=6, rank=5) model = GPy.models.GPRegression(X, y, kern) model.optimize() for i in range(6): model.plot(fignum=1,fixed_inputs=[(1, i)]) xlabel('years') ylabel('time/s') kern1 = GPy.kern.rbf(1, lengthscale=80)**GPy.kern.coregionalize(output_dim=6, rank=1) kern2 = GPy.kern.bias(1)**GPy.kern.coregionalize(output_dim=6, rank=1) kern = kern1 + kern2 model = GPy.models.GPRegression(X, y, kern) model.optimize() for i in range(6): model.plot(fignum=1,fixed_inputs=[(1, i)]) xlabel('years') ylabel('time/s') kern1 = GPy.kern.rbf(1, lengthscale=80) + GPy.kern.bias(1) kern2 = GPy.kern.coregionalize(output_dim=6, rank=5) kern = kern1**kern2 model = GPy.models.GPRegression(X, y, kern) model.optimize() for i in range(6): model.plot(fignum=1,fixed_inputs=[(1, i)]) xlabel('years') ylabel('time/s') # Question 1 answer here X2 = np.zeros((X.shape[0], 3)) X2[:, 0] = X[:, 0] X2[:, 1] = np.remainder(X[:, 1],2) == 1 X2[:, 2] = np.floor(X[:, 1]/2) print X2[:, 2] kern = ((GPy.kern.rbf(1, lengthscale=80) **GPy.kern.coregionalize(output_dim=2, rank=2) **GPy.kern.coregionalize(output_dim=3,rank=2)) + GPy.kern.bias(1)**GPy.kern.coregionalize(output_dim=3,rank=0)**GPy.kern.coregionalize(output_dim=3,rank=0)) #+ (GPy.kern.bias(1) # **GPy.kern.coregionalize(output_dim=2, rank=0) # **GPy.kern.coregionalize(output_dim=3,rank=0))) model = GPy.models.GPRegression(X2, y, kern) model.optimize() for sex in range(2): for event in range(3): model.plot(fignum=1,fixed_inputs=[(1, sex), (2, event)]) xlabel('years') ylabel('time/s') print model X = np.sort(np.random.rand(50,1)*12,0) k = GPy.kern.rbf(1) K = k.K(X) K+= np.eye(50)*0.01 # add some independence (noise) to K y = np.random.multivariate_normal(np.zeros(50), K).reshape(50,1) m = GPy.models.GPRegression(X,y) m.optimize() m.plot() mu, var = m._raw_predict(X) # this fetches the posterior of f pb.vlines(X[:,0], mu[:,0]-2.*np.sqrt(var[:,0]), mu[:,0]+2.*np.sqrt(var[:,0]),color='r') # Exercise 2 answer here Z = np.random.rand(3,1)*12 m = GPy.models.SparseGPRegression(X,y,Z=Z) print m mu, var = m._raw_predict(Z) pb.vlines(Z[:,0], mu[:,0]-2.*np.sqrt(var[:,0]), mu[:,0]+2.*np.sqrt(var[:,0]),color='r') # Exercise 3 a answer # Exercise 3 b answer %pylab inline import numpy as np import GPy import pylab as pb from matplotlib.patches import Polygon from matplotlib.collections import PatchCollection import cPickle as pickle import urllib urllib.urlretrieve('http://staffwww.dcs.sheffield.ac.uk/people/M.Zwiessele/gpss/lab2/EastTimor.pickle', 'EastTimor2.pickle') #Load the data with open("./EastTimor2.pickle","rb") as f: X,y,polygons = pickle.load(f) #Visualize a map of East-Timor fig = pb.figure() ax = fig.add_subplot(111) for p in polygons: ax.add_collection(PatchCollection([Polygon(p)],facecolor="#F4A460")) ax.set_xlim(124.,127.5) ax.set_ylim(-9.6,-8.1) ax.set_xlabel("longitude") ax.set_ylabel("latitude") #Define the model kern = GPy.kern.rbf(2) m = GPy.models.GPClassification(X,y, kernel=kern) m.update_likelihood_approximation() #Unlike regression we need to add this step. print m m.plot(ax=ax) m.update_likelihood_approximation() #Unlike regression we need to add this step. m.optimize() print m m.plot() # Exercise 5 answer here # redownload the marathon data from yesterday and plot GPy.util.datasets.authorize_download = lambda x: True # prevents requesting authorization for download. data = GPy.util.datasets.olympic_marathon_men() X = data['X'] Y = data['Y'] pb.plot(X, Y, 'bx') pb.xlabel('year') pb.ylabel('marathon pace min/km') GPy.likelihoods.noise_model_constructors.student_t? GPy.models.GPRegression? # make a student t likelihood with standard parameters t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=5, sigma2=2) stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution) kern = GPy.kern.rbf(1, lengthscale=10) + GPy.kern.bias(1) model = GPy.models.GPRegression(X, Y, kernel=kern, likelihood=stu_t_likelihood) model.constrain_positive('t_noise') model.optimize() model.plot() #model.log_likelihood() print model # Exercise 6 answer import urllib from scipy import io import pylab as pb import numpy as np import pylab as pb import GPy urllib.urlretrieve('http://www.cs.nyu.edu/~roweis/data/olivettifaces.mat', 'faces.mat') face_data = io.loadmat('faces.mat') faces = face_data['faces'].T pb.imshow(faces[120].reshape(64,64,order='F'), interpolation='nearest',cmap=pb.cm.gray) urllib.urlretrieve(\ 'http://staffwww.dcs.sheffield.ac.uk/people/J.Hensman/gpsummer/datasets/has_glasses.np', 'has_glasses.np') y = np.load('has_glasses.np') y = np.where(y=='y',1,0).reshape(-1,1) index = np.random.permutation(faces.shape[0]) num_training = 200 Xtrain = faces[index[:num_training],:] Xtest = faces[index[num_training:],:] ytrain = y[index[:num_training],:] ytest = y[index[num_training:]] from scipy import cluster M = 8 Z, distortion = cluster.vq.kmeans(Xtrain,M) k = GPy.kern.rbf(4096,lengthscale=50) + GPy.kern.white(4096,0.001) m = GPy.models.SparseGPClassification(Xtrain, ytrain, kernel=k, Z=Z, normalize_X=True) m.update_likelihood_approximation() m.optimize() pb.figure() pb.imshow(m.dL_dZ()[0].reshape(64,64,order='F'),interpolation='nearest',cmap=pb.cm.gray) # Exercise 7 a) answer here# Exercise 7 b) answer here# Exercise 7 c) answer here