from __future__ import division #why? import numpy as np import matplotlib.pyplot as pl %matplotlib inline """ This is code for simple GP regression. It assumes a zero mean GP Prior """; # This is the true unknown function we are trying to approximate f = lambda x: np.sin(0.9*x).flatten() #flatten makes it 1-D #f = lambda x: (0.25*(x**2)).flatten() np.random.seed(123) #comment out for a consistent experience # Define the kernel def kernel(x, xp): """ GP squared exponential kernel """ theta = 0.1 sqdist = np.sum(x**2,1).reshape(-1,1) + np.sum(xp**2,1) - 2*np.dot(x, xp.T) return np.exp(-.5 * (1/theta) * sqdist) kernel(np.array([[1],[2]]),np.array([[3],[2],[1]])) kernel(np.array([[1,0,0],[2,0,0]]),np.array([[3,0,0],[2,0,0],[1,0,0]])) n = 10 # number of training points. N = 200 # number of test points. (star) s = 0.05 # noise variance. # Sample some input points and noisy versions of the function evaluated at # these points. X = np.random.uniform(-5, 5, size=(n,1)) # "training" y = f(X) + s*np.random.randn(n) K = kernel(X, X) # points we're going to make predictions at. Xtest = np.linspace(-5, 5, N ).reshape(-1,1) Ktest = kernel(X, Xtest) L = np.linalg.cholesky(K + s*np.eye(n)) # i(eye)dentity added... #...for numerical stabilization # compute the mean at our test points. u = np.linalg.solve(L, Ktest) # = something like LKstar v=np.linalg.solve(L, y) mu=np.dot(u.T, v) #another way to calculate is commented out #w=np.linalg.solve(L.T,v) #mu = np.dot( Ktest.T , w ) # compute the variance at our test points. Kstarstar = kernel(Xtest, Xtest) s2 = np.diag(Kstarstar) - np.sum(u**2, axis=0) #I don't know how this works! ss = np.sqrt(s2) # PLOTS: import seaborn as sns pl.figure(1) pl.clf() pl.gca().fill_between(Xtest.flat, mu-2*ss, mu+2*ss, color="#dddddd") pl.scatter(X, y, s=200, marker='+', c='black') pl.plot(Xtest, f(Xtest), 'b-') pl.plot(Xtest, mu, 'r--', lw=2) pl.title('Mean predictions plus 2 st.deviations') pl.axis([-5, 5, -3, 3]) #pl.savefig('predictive.png', bbox_inches='tight') pl.show() # draw samples from the prior at our test points. npr_samples=20 L = np.linalg.cholesky(Kstarstar + 1e-6*np.eye( N )) f_prior = np.dot(L, np.random.normal(size=(N,npr_samples))) #notice no data goes in f_prior pl.figure(2) pl.clf() #pl.plot(Xtest, f_prior) sns.tsplot(f_prior.T, time=Xtest, err_style='unit_points') pl.title(str(npr_samples)+' samples from the GP prior') pl.axis([-5, 5, -3, 3]) #pl.savefig('prior.png', bbox_inches='tight') pl.show() # draw samples from the posterior at test points. #commented out is a more direct way to get L #L = np.linalg.cholesky(K_ #- np.dot( np.dot( kernel(X,Xtest).T # , np.linalg.inv(K) #) # , kernel(X,Xtest) ) #+ 1e-6*np.eye(N) #) nps_samples=20 L = np.linalg.cholesky(Kstarstar + 1e-6*np.eye(N) - np.dot(u.T, u)) f_post = mu.reshape(-1,1) + np.dot(L, np.random.normal(size=(N,npr_samples))) pl.figure(3) pl.clf() pl.plot(Xtest, f(Xtest), 'b-') #pl.plot(Xtest, f_post) #mpl plot sns.tsplot(f_post.T, time=Xtest , err_style='unit_points') pl.scatter(X, y, s=1000, marker='+', c='red',alpha=1) #how do you make the markers more visible?? #pl.plot(Xtest, mu, 'r--', lw=2) #^^^turns out the theoretical mu is close to the estimated mu from tsplot pl.title(str(npr_samples)+' samples from the GP posterior') pl.axis([-5, 5, -3, 3]) #pl.savefig('post.png', bbox_inches='tight') pl.show()