In [8]:
!date
import matplotlib.pyplot as plt, seaborn as sns
%matplotlib inline

Sun Jan 18 14:37:28 PST 2015


Experiment to use the PyMC2 Matern covariance function in the sklearn GP model:

In [9]:
import pymc.gp, sklearn

In [20]:
import numpy as np
from sklearn import gaussian_process

def f(x):
return x * np.sin(x)
X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T
y = f(X).ravel()

gp = gaussian_process.GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1, nugget=.00001)
gp.fit(X, y)

x = np.atleast_2d(np.linspace(0, 10, 1000)).T
y_pred, sigma2_pred = gp.predict(x, eval_MSE=True)
sigma_pred = np.sqrt(sigma2_pred)

plt.plot(X.ravel(), y, 's')
plt.plot(x, y_pred)

Out[20]:
[<matplotlib.lines.Line2D at 0x7fbd55606210>]

sklearn has a nice gaussian process implementation, but doesn't have the fanciest Matern covariance functions.

In [58]:
gp = gaussian_process.GaussianProcess(corr=sklearn.gaussian_process.correlation_models.generalized_exponential,
theta0=[1e-2,10], thetaL=[1e-4,10], thetaU=[1e-1,10], nugget=1)
gp.fit(X, y)

x = np.atleast_2d(np.linspace(0, 10, 1000)).T
y_pred, sigma2_pred = gp.predict(x, eval_MSE=True)
sigma_pred = np.sqrt(sigma2_pred)

plt.plot(X.ravel(), y, 's')
plt.plot(x, y_pred)

Out[58]:
[<matplotlib.lines.Line2D at 0x7fbd4f36e3d0>]

PyMC has a very flexible GP implementation, but it has very spartan documentation, and is a bit too much for many common situations.

I will use the PyMC Matern covariance in the sklearn GP code:

In [59]:
pymc.gp.cov_funs.matern.euclidean(X, np.array([0]), diff_degree=1.4, amp=0.4, scale=10)

Out[59]:
matrix([[ 0.15555892],
[ 0.13196298],
[ 0.10330798],
[ 0.08970638],
[ 0.0771805 ],
[ 0.06590167]])
In [100]:
def iso_matern(theta, d):
"""
Matern correlation model.

theta, d --> r(diff_degree=theta[0], amp=theta[1], scale=theta[2]; d)

Parameters
----------
theta : array_like
An array with shape 3 (isotropic) giving the
autocorrelation parameters (diff_degree, amp, scale)
d : array_like
An array with shape (n_eval, n_features) giving the componentwise
distances between locations x and x' at which the correlation model
should be evaluated.
Returns
-------
r : array_like
An array with shape (n_eval, ) with the values of the autocorrelation
model.
"""
print theta
#return sklearn.gaussian_process.correlation_models.generalized_exponential(theta[:2], d)
theta = np.asarray(theta, dtype=np.float).reshape(3)
#theta[0] = 2
#theta[1] = .4
d = np.asarray(d, dtype=np.float)

assert theta.shape == (3,), 'expected theta.shape == (3,),  with diff_degree=theta[0], amp=theta[1], scale=theta[2]'

r = pymc.gp.cov_funs.matern.euclidean(
d, np.array([0]),
diff_degree=theta[0], amp=theta[1], scale=theta[2])
return np.array(r).reshape(d.shape[0],)
iso_matern([1.4,.4,10], X).shape

[1.4, 0.4, 10]

Out[100]:
(6,)
In [153]:
gp = gaussian_process.GaussianProcess(corr=iso_matern,
theta0=[1.5,1,10], nugget=.001)
gp.fit(X, y)

x = np.atleast_2d(np.linspace(0, 10, 1000)).T
y_pred, sigma2_pred = gp.predict(x, eval_MSE=True)
sigma_pred = np.sqrt(sigma2_pred)

plt.plot(X.ravel(), y, 's')

plt.plot(x, y_pred)

[[  1.5   1.   10. ]]
[[  1.5   1.   10. ]]

Out[153]:
[<matplotlib.lines.Line2D at 0x7fbd4cf99250>]
In [155]:
gp = gaussian_process.GaussianProcess(corr=iso_matern,
theta0=[2.5,.5,1], thetaL=[1., .1, 1], thetaU=[10., 1., 1], nugget=y/10.)
gp.fit(X, y)

x = np.atleast_2d(np.linspace(0, 10, 1000)).T
y_pred, sigma2_pred = gp.predict(x, eval_MSE=True)
sigma_pred = np.sqrt(sigma2_pred)

plt.plot(X.ravel(), y, 's')

plt.plot(x, y_pred)

[ 2.5  0.5  1. ]
[ 25.    0.5   1. ]
[ 2.5  5.   1. ]
[  2.5   0.5  10. ]
[ nan  nan  nan]

---------------------------------------------------------------------------
error                                     Traceback (most recent call last)
<ipython-input-155-9965b7b21862> in <module>()
1 gp = gaussian_process.GaussianProcess(corr=iso_matern,
2     theta0=[2.5,.5,1], thetaL=[1., .1, 1], thetaU=[10., 1., 1], nugget=y/10.)
----> 3 gp.fit(X, y)
4
5 x = np.atleast_2d(np.linspace(0, 10, 1000)).T

/homes/abie/anaconda/lib/python2.7/site-packages/sklearn/gaussian_process/gaussian_process.pyc in fit(self, X, y)
337                       "autocorrelation parameters...")
338             self.theta_, self.reduced_likelihood_function_value_, par = \
--> 339                 self._arg_max_reduced_likelihood_function()
340             if np.isinf(self.reduced_likelihood_function_value_):
341                 raise Exception("Bad parameter region. "

/homes/abie/anaconda/lib/python2.7/site-packages/sklearn/gaussian_process/gaussian_process.pyc in _arg_max_reduced_likelihood_function(self)
726                         optimize.fmin_cobyla(minus_reduced_likelihood_function,
727                                              np.log10(theta0), constraints,
--> 728                                              iprint=0)
729                 except ValueError as ve:
730                     print("Optimization failed. Try increasing the nugget")

/homes/abie/anaconda/lib/python2.7/site-packages/scipy/optimize/cobyla.pyc in fmin_cobyla(func, x0, cons, args, consargs, rhobeg, rhoend, iprint, maxfun, disp, catol)
169
170     sol = _minimize_cobyla(func, x0, args, constraints=con,
--> 171                            **opts)
172     if iprint > 0 and not sol['success']:
173         print("COBYLA failed to find a solution: %s" % (sol.message,))

/homes/abie/anaconda/lib/python2.7/site-packages/scipy/optimize/cobyla.pyc in _minimize_cobyla(fun, x0, args, constraints, rhobeg, tol, iprint, maxiter, disp, catol, **unknown_options)
244     xopt, info = _cobyla.minimize(calcfc, m=m, x=np.copy(x0), rhobeg=rhobeg,
245                                   rhoend=rhoend, iprint=iprint, maxfun=maxfun,
--> 246                                   dinfo=info)
247
248     if info[3] > catol:

/homes/abie/anaconda/lib/python2.7/site-packages/scipy/optimize/cobyla.pyc in calcfc(x, con)
236
237     def calcfc(x, con):
--> 238         f = fun(x, *args)
239         for k, c in enumerate(constraints):
240             con[k] = c['fun'](x, *c['args'])

/homes/abie/anaconda/lib/python2.7/site-packages/sklearn/gaussian_process/gaussian_process.pyc in minus_reduced_likelihood_function(log10t)
698             def minus_reduced_likelihood_function(log10t):
699                 return - self.reduced_likelihood_function(
--> 700                     theta=10. ** log10t)[0]
701
702             constraints = []

/homes/abie/anaconda/lib/python2.7/site-packages/sklearn/gaussian_process/gaussian_process.pyc in reduced_likelihood_function(self, theta)
593
594         # Set up R
--> 595         r = self.corr(theta, D)
596         R = np.eye(n_samples) * (1. + self.nugget)
597         R[ij[:, 0], ij[:, 1]] = r

<ipython-input-100-f61aaf39ca0a> in iso_matern(theta, d)
31     r = pymc.gp.cov_funs.matern.euclidean(
32         d, np.array([0]),
---> 33         diff_degree=theta[0], amp=theta[1], scale=theta[2])
34     return np.array(r).reshape(d.shape[0],)
35 iso_matern([1.4,.4,10], X).shape

/homes/abie/anaconda/lib/python2.7/site-packages/pymc/gp/cov_funs/cov_utils.pyc in __call__(self, x, y, amp, scale, symm, *args, **kwargs)
170
--> 172             targ(C,x,y,0,-1,symm)
173         else:
error: (diff_degree>0) failed for 2nd argument diff_degree: matern:diff_degree=nan