# Import Theano to start! import theano import GPy from theano import tensor as T import numpy as np # Create a two dimensioanal array X = T.matrix() # Sum of the elements (even when is empty) f = T.sum(T.square(X)) f # Define theano function using f and X my_func = theano.function([X], f) # Generate some values for X and evaluate X_values = np.random.randn(3,4) my_func(X_values) # Define gradients g = theano.grad(f,X) # Compute the derivarives mu_new_func = theano.function([X], [f,g]) # Theano derivatives mu_new_func(X_values) # Exact derivatives X_values*2 # Define the elements of the regression w = T.dvector() Xw = T.dot(X,w) y = T.dvector() error = T.sum(T.square(y-Xw)) sigma = T.dscalar() neg_log_lik = 0.5*y.size*np.log(2*np.pi) + 0.5*y.size*T.log(sigma**2) + 0.5*error/sigma**2 # Create objective my_func = theano.function([X, y, sigma, w], [neg_log_lik, theano.grad(neg_log_lik, w), theano.grad(neg_log_lik, sigma)]) # test with data w_true = np.random.randn(2) X_values = np.random.randn(200,2) y = np.dot(X_values, w_true) + np.random.randn(200)*0.01 y_values = np.dot(X_values, w_true) + np.random.randn(200)*0.01 # evaluate likelihood, gradiends with respect to w and gradients with respect to sigma my_func(X_values, y_values , 0.02, np.array([1,1])) # elements of the kernel X = T.matrix() Z = T.matrix() len_scale = T.dscalar() sigma2 = T.dscalar() # create the RBF kernel r2 = ((X[:,None,:]/len_scale - Z[None,:,:]/len_scale)**2).sum(2) rbf_kernel = sigma2*T.exp(-0.5*r2) # Create the theano functions for r2 and the kernel r2_eval = theano.function([X,Z,len_scale],[r2]) kern_eval = theano.function([X,Z,len_scale,sigma2],[rbf_kernel]) # Generate some data to evaluate r2 and the kernel X_val = np.random.randn(4,2) Z_val = np.random.randn(6,2) sigma2_val = 1 len_scale_val = 2 # Distances evaluation!!! Order of the arguments should match r2_eval(X_val,Z_val,len_scale_val) # Kernel evaluation kern_eval(X_val,Z_val,len_scale_val,sigma2_val) # We check that it matches with the same GPy kernel kernelGPy = GPy.kern.RBF(2, variance = sigma2_val, lengthscale=len_scale_val) kernelGPy.K(X_val,Z_val) # We calculate the derivatives of our kernel with respect to sigma2 dL_dK = T.matrix() h = T.sum(dL_dK * rbf_kernel) # Define gradients with respect to sigma2 grads_wrt_sigma2 = theano.grad(h,sigma2) grads_wrt_sigma2_eval = theano.function([X,Z,len_scale,sigma2,dL_dK],grads_wrt_sigma2) # Define gradients with respect to the lengthscale grads_wrt_lengthscale = theano.grad(h,len_scale) grads_wrt_lengthscale_eval = theano.function([X,Z,len_scale,sigma2,dL_dK],grads_wrt_lengthscale) # Define gradients with respect to X grads_wrt_X= theano.grad(h,X) grads_wrt_X_eval = theano.function([X,Z,len_scale,sigma2,dL_dK],grads_wrt_X) # All gradients together grads_all_eval = theano.function([X,Z,len_scale,sigma2,dL_dK], [grads_wrt_sigma2, grads_wrt_lengthscale, grads_wrt_X]) # We test the result in a simple GPy model import GPy from matplotlib import pyplot as plt # Some data x = np.linspace(-np.pi, np.pi, 201)[:,None] y = np.sin(x) + np.random.rand(201)[:,None] # plot the model %pylab inline model = GPy.models.GPRegression(x,y) model.optimize() model.plot() ## Class of kernels for GPy using theano (that uses our grads_all_eval) class TheanoKern(GPy.kern.Kern): def __init__(self, input_dim, variance=1., lengthscale=1.): GPy.kern.Kern.__init__(self, input_dim, active_dims=None, name='theanokern') self.var = GPy.core.Param('variance', variance) self.ls = GPy.core.Param('lengthscale', lengthscale) self.link_parameters(self.var, self.ls) # Add here a function which initializes all the theano symbolic functions def K(self, X, X2=None): if X2 is None: X2 = X return kern_eval(X, X2, self.ls[0], self.var[0])[0] def Kdiag(self, X): return np.diag(self.K(X)) def update_gradients_full(self, dL_dK, X, X2=None): if X2 is None: X2 = X dvar, dl, dX = grads_all_eval(X, X2, self.ls[0], self.var[0], dL_dK) self.var.gradient = dvar self.ls.gradient = dl def gradients_X(self, dL_dK, X, X2=None): if X2 is None: X2 = X return grads_wrt_X_eval(X, X2, self.ls[0], self.var[0], dL_dK) def update_gradients_diag(self, dL_dKdiag, X): pass # Create our theano kernel k = TheanoKern(2) k.K(X_val) # model m = GPy.models.GPRegression(x, y, kernel=TheanoKern(1)) # Ckeck with the gradients m.checkgrad(1)