!date
import matplotlib.pyplot as plt, seaborn as sns
%matplotlib inline
Sun Jan 18 14:37:28 PST 2015
Experiment to use the PyMC2 Matern covariance function in the sklearn GP model:
import pymc.gp, sklearn
import numpy as np
from sklearn import gaussian_process
def f(x):
return x * np.sin(x)
X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T
y = f(X).ravel()
gp = gaussian_process.GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1, nugget=.00001)
gp.fit(X, y)
x = np.atleast_2d(np.linspace(0, 10, 1000)).T
y_pred, sigma2_pred = gp.predict(x, eval_MSE=True)
sigma_pred = np.sqrt(sigma2_pred)
plt.plot(X.ravel(), y, 's')
plt.plot(x, y_pred)
[<matplotlib.lines.Line2D at 0x7fbd55606210>]
sklearn has a nice gaussian process implementation, but doesn't have the fanciest Matern covariance functions.
gp = gaussian_process.GaussianProcess(corr=sklearn.gaussian_process.correlation_models.generalized_exponential,
theta0=[1e-2,10], thetaL=[1e-4,10], thetaU=[1e-1,10], nugget=1)
gp.fit(X, y)
x = np.atleast_2d(np.linspace(0, 10, 1000)).T
y_pred, sigma2_pred = gp.predict(x, eval_MSE=True)
sigma_pred = np.sqrt(sigma2_pred)
plt.plot(X.ravel(), y, 's')
plt.plot(x, y_pred)
[<matplotlib.lines.Line2D at 0x7fbd4f36e3d0>]
PyMC has a very flexible GP implementation, but it has very spartan documentation, and is a bit too much for many common situations.
I will use the PyMC Matern covariance in the sklearn GP code:
pymc.gp.cov_funs.matern.euclidean(X, np.array([0]), diff_degree=1.4, amp=0.4, scale=10)
matrix([[ 0.15555892], [ 0.13196298], [ 0.10330798], [ 0.08970638], [ 0.0771805 ], [ 0.06590167]])
def iso_matern(theta, d):
"""
Matern correlation model.
theta, d --> r(diff_degree=theta[0], amp=theta[1], scale=theta[2]; d)
Parameters
----------
theta : array_like
An array with shape 3 (isotropic) giving the
autocorrelation parameters (diff_degree, amp, scale)
d : array_like
An array with shape (n_eval, n_features) giving the componentwise
distances between locations x and x' at which the correlation model
should be evaluated.
Returns
-------
r : array_like
An array with shape (n_eval, ) with the values of the autocorrelation
model.
"""
print theta
#return sklearn.gaussian_process.correlation_models.generalized_exponential(theta[:2], d)
theta = np.asarray(theta, dtype=np.float).reshape(3)
#theta[0] = 2
#theta[1] = .4
d = np.asarray(d, dtype=np.float)
assert theta.shape == (3,), 'expected theta.shape == (3,), with diff_degree=theta[0], amp=theta[1], scale=theta[2]'
r = pymc.gp.cov_funs.matern.euclidean(
d, np.array([0]),
diff_degree=theta[0], amp=theta[1], scale=theta[2])
return np.array(r).reshape(d.shape[0],)
iso_matern([1.4,.4,10], X).shape
[1.4, 0.4, 10]
(6,)
gp = gaussian_process.GaussianProcess(corr=iso_matern,
theta0=[1.5,1,10], nugget=.001)
gp.fit(X, y)
x = np.atleast_2d(np.linspace(0, 10, 1000)).T
y_pred, sigma2_pred = gp.predict(x, eval_MSE=True)
sigma_pred = np.sqrt(sigma2_pred)
plt.plot(X.ravel(), y, 's')
plt.plot(x, y_pred)
[[ 1.5 1. 10. ]] [[ 1.5 1. 10. ]]
[<matplotlib.lines.Line2D at 0x7fbd4cf99250>]
gp = gaussian_process.GaussianProcess(corr=iso_matern,
theta0=[2.5,.5,1], thetaL=[1., .1, 1], thetaU=[10., 1., 1], nugget=y/10.)
gp.fit(X, y)
x = np.atleast_2d(np.linspace(0, 10, 1000)).T
y_pred, sigma2_pred = gp.predict(x, eval_MSE=True)
sigma_pred = np.sqrt(sigma2_pred)
plt.plot(X.ravel(), y, 's')
plt.plot(x, y_pred)
[ 2.5 0.5 1. ] [ 25. 0.5 1. ] [ 2.5 5. 1. ] [ 2.5 0.5 10. ] [ nan nan nan]
--------------------------------------------------------------------------- error Traceback (most recent call last) <ipython-input-155-9965b7b21862> in <module>() 1 gp = gaussian_process.GaussianProcess(corr=iso_matern, 2 theta0=[2.5,.5,1], thetaL=[1., .1, 1], thetaU=[10., 1., 1], nugget=y/10.) ----> 3 gp.fit(X, y) 4 5 x = np.atleast_2d(np.linspace(0, 10, 1000)).T /homes/abie/anaconda/lib/python2.7/site-packages/sklearn/gaussian_process/gaussian_process.pyc in fit(self, X, y) 337 "autocorrelation parameters...") 338 self.theta_, self.reduced_likelihood_function_value_, par = \ --> 339 self._arg_max_reduced_likelihood_function() 340 if np.isinf(self.reduced_likelihood_function_value_): 341 raise Exception("Bad parameter region. " /homes/abie/anaconda/lib/python2.7/site-packages/sklearn/gaussian_process/gaussian_process.pyc in _arg_max_reduced_likelihood_function(self) 726 optimize.fmin_cobyla(minus_reduced_likelihood_function, 727 np.log10(theta0), constraints, --> 728 iprint=0) 729 except ValueError as ve: 730 print("Optimization failed. Try increasing the ``nugget``") /homes/abie/anaconda/lib/python2.7/site-packages/scipy/optimize/cobyla.pyc in fmin_cobyla(func, x0, cons, args, consargs, rhobeg, rhoend, iprint, maxfun, disp, catol) 169 170 sol = _minimize_cobyla(func, x0, args, constraints=con, --> 171 **opts) 172 if iprint > 0 and not sol['success']: 173 print("COBYLA failed to find a solution: %s" % (sol.message,)) /homes/abie/anaconda/lib/python2.7/site-packages/scipy/optimize/cobyla.pyc in _minimize_cobyla(fun, x0, args, constraints, rhobeg, tol, iprint, maxiter, disp, catol, **unknown_options) 244 xopt, info = _cobyla.minimize(calcfc, m=m, x=np.copy(x0), rhobeg=rhobeg, 245 rhoend=rhoend, iprint=iprint, maxfun=maxfun, --> 246 dinfo=info) 247 248 if info[3] > catol: /homes/abie/anaconda/lib/python2.7/site-packages/scipy/optimize/cobyla.pyc in calcfc(x, con) 236 237 def calcfc(x, con): --> 238 f = fun(x, *args) 239 for k, c in enumerate(constraints): 240 con[k] = c['fun'](x, *c['args']) /homes/abie/anaconda/lib/python2.7/site-packages/sklearn/gaussian_process/gaussian_process.pyc in minus_reduced_likelihood_function(log10t) 698 def minus_reduced_likelihood_function(log10t): 699 return - self.reduced_likelihood_function( --> 700 theta=10. ** log10t)[0] 701 702 constraints = [] /homes/abie/anaconda/lib/python2.7/site-packages/sklearn/gaussian_process/gaussian_process.pyc in reduced_likelihood_function(self, theta) 593 594 # Set up R --> 595 r = self.corr(theta, D) 596 R = np.eye(n_samples) * (1. + self.nugget) 597 R[ij[:, 0], ij[:, 1]] = r <ipython-input-100-f61aaf39ca0a> in iso_matern(theta, d) 31 r = pymc.gp.cov_funs.matern.euclidean( 32 d, np.array([0]), ---> 33 diff_degree=theta[0], amp=theta[1], scale=theta[2]) 34 return np.array(r).reshape(d.shape[0],) 35 iso_matern([1.4,.4,10], X).shape /homes/abie/anaconda/lib/python2.7/site-packages/pymc/gp/cov_funs/cov_utils.pyc in __call__(self, x, y, amp, scale, symm, *args, **kwargs) 170 171 if n_threads <= 1: --> 172 targ(C,x,y,0,-1,symm) 173 else: 174 thread_args = [(C,x,y,bounds[i],bounds[i+1],symm) for i in xrange(n_threads)] /homes/abie/anaconda/lib/python2.7/site-packages/pymc/gp/cov_funs/cov_utils.pyc in targ(C, x, y, cmin, cmax, symm, d_kwargs, c_args, c_kwargs) 166 self.cov_fun(C,x,y,cmin=cmin, cmax=cmax,symm=symm,*c_args,**c_kwargs) 167 else: --> 168 self.cov_fun(C, cmin=cmin, cmax=cmax,symm=symm, *c_args, **c_kwargs) 169 imul(C, amp*amp, cmin=cmin, cmax=cmax, symm=symm) 170 error: (diff_degree>0) failed for 2nd argument diff_degree: matern:diff_degree=nan