In [8]:
!date
import matplotlib.pyplot as plt, seaborn as sns
%matplotlib inline
Sun Jan 18 14:37:28 PST 2015

Experiment to use the PyMC2 Matern covariance function in the sklearn GP model:

In [9]:
import pymc.gp, sklearn
In [20]:
import numpy as np
from sklearn import gaussian_process

def f(x):
    return x * np.sin(x)
X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T
y = f(X).ravel()

gp = gaussian_process.GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1, nugget=.00001)
gp.fit(X, y)  

x = np.atleast_2d(np.linspace(0, 10, 1000)).T
y_pred, sigma2_pred = gp.predict(x, eval_MSE=True)
sigma_pred = np.sqrt(sigma2_pred)

plt.plot(X.ravel(), y, 's')
plt.plot(x, y_pred)
Out[20]:
[<matplotlib.lines.Line2D at 0x7fbd55606210>]

sklearn has a nice gaussian process implementation, but doesn't have the fanciest Matern covariance functions.

In [58]:
gp = gaussian_process.GaussianProcess(corr=sklearn.gaussian_process.correlation_models.generalized_exponential,
    theta0=[1e-2,10], thetaL=[1e-4,10], thetaU=[1e-1,10], nugget=1)
gp.fit(X, y)

x = np.atleast_2d(np.linspace(0, 10, 1000)).T
y_pred, sigma2_pred = gp.predict(x, eval_MSE=True)
sigma_pred = np.sqrt(sigma2_pred)

plt.plot(X.ravel(), y, 's')
plt.plot(x, y_pred)
Out[58]:
[<matplotlib.lines.Line2D at 0x7fbd4f36e3d0>]

PyMC has a very flexible GP implementation, but it has very spartan documentation, and is a bit too much for many common situations.

I will use the PyMC Matern covariance in the sklearn GP code:

In [59]:
pymc.gp.cov_funs.matern.euclidean(X, np.array([0]), diff_degree=1.4, amp=0.4, scale=10)
Out[59]:
matrix([[ 0.15555892],
        [ 0.13196298],
        [ 0.10330798],
        [ 0.08970638],
        [ 0.0771805 ],
        [ 0.06590167]])
In [100]:
def iso_matern(theta, d):
    """
    Matern correlation model.

        theta, d --> r(diff_degree=theta[0], amp=theta[1], scale=theta[2]; d)

    Parameters
    ----------
    theta : array_like
        An array with shape 3 (isotropic) giving the
        autocorrelation parameters (diff_degree, amp, scale)
    d : array_like
        An array with shape (n_eval, n_features) giving the componentwise
        distances between locations x and x' at which the correlation model
        should be evaluated.
    Returns
    -------
    r : array_like
        An array with shape (n_eval, ) with the values of the autocorrelation
        model.
    """
    print theta
    #return sklearn.gaussian_process.correlation_models.generalized_exponential(theta[:2], d)
    theta = np.asarray(theta, dtype=np.float).reshape(3)
    #theta[0] = 2
    #theta[1] = .4
    d = np.asarray(d, dtype=np.float)

    assert theta.shape == (3,), 'expected theta.shape == (3,),  with diff_degree=theta[0], amp=theta[1], scale=theta[2]'

    r = pymc.gp.cov_funs.matern.euclidean(
        d, np.array([0]),
        diff_degree=theta[0], amp=theta[1], scale=theta[2])
    return np.array(r).reshape(d.shape[0],)
iso_matern([1.4,.4,10], X).shape
[1.4, 0.4, 10]
Out[100]:
(6,)
In [153]:
gp = gaussian_process.GaussianProcess(corr=iso_matern,
    theta0=[1.5,1,10], nugget=.001)
gp.fit(X, y)

x = np.atleast_2d(np.linspace(0, 10, 1000)).T
y_pred, sigma2_pred = gp.predict(x, eval_MSE=True)
sigma_pred = np.sqrt(sigma2_pred)

plt.plot(X.ravel(), y, 's')

plt.plot(x, y_pred)
[[  1.5   1.   10. ]]
[[  1.5   1.   10. ]]
Out[153]:
[<matplotlib.lines.Line2D at 0x7fbd4cf99250>]
In [155]:
gp = gaussian_process.GaussianProcess(corr=iso_matern,
    theta0=[2.5,.5,1], thetaL=[1., .1, 1], thetaU=[10., 1., 1], nugget=y/10.)
gp.fit(X, y)

x = np.atleast_2d(np.linspace(0, 10, 1000)).T
y_pred, sigma2_pred = gp.predict(x, eval_MSE=True)
sigma_pred = np.sqrt(sigma2_pred)

plt.plot(X.ravel(), y, 's')

plt.plot(x, y_pred)
[ 2.5  0.5  1. ]
[ 25.    0.5   1. ]
[ 2.5  5.   1. ]
[  2.5   0.5  10. ]
[ nan  nan  nan]
---------------------------------------------------------------------------
error                                     Traceback (most recent call last)
<ipython-input-155-9965b7b21862> in <module>()
      1 gp = gaussian_process.GaussianProcess(corr=iso_matern,
      2     theta0=[2.5,.5,1], thetaL=[1., .1, 1], thetaU=[10., 1., 1], nugget=y/10.)
----> 3 gp.fit(X, y)
      4 
      5 x = np.atleast_2d(np.linspace(0, 10, 1000)).T

/homes/abie/anaconda/lib/python2.7/site-packages/sklearn/gaussian_process/gaussian_process.pyc in fit(self, X, y)
    337                       "autocorrelation parameters...")
    338             self.theta_, self.reduced_likelihood_function_value_, par = \
--> 339                 self._arg_max_reduced_likelihood_function()
    340             if np.isinf(self.reduced_likelihood_function_value_):
    341                 raise Exception("Bad parameter region. "

/homes/abie/anaconda/lib/python2.7/site-packages/sklearn/gaussian_process/gaussian_process.pyc in _arg_max_reduced_likelihood_function(self)
    726                         optimize.fmin_cobyla(minus_reduced_likelihood_function,
    727                                              np.log10(theta0), constraints,
--> 728                                              iprint=0)
    729                 except ValueError as ve:
    730                     print("Optimization failed. Try increasing the ``nugget``")

/homes/abie/anaconda/lib/python2.7/site-packages/scipy/optimize/cobyla.pyc in fmin_cobyla(func, x0, cons, args, consargs, rhobeg, rhoend, iprint, maxfun, disp, catol)
    169 
    170     sol = _minimize_cobyla(func, x0, args, constraints=con,
--> 171                            **opts)
    172     if iprint > 0 and not sol['success']:
    173         print("COBYLA failed to find a solution: %s" % (sol.message,))

/homes/abie/anaconda/lib/python2.7/site-packages/scipy/optimize/cobyla.pyc in _minimize_cobyla(fun, x0, args, constraints, rhobeg, tol, iprint, maxiter, disp, catol, **unknown_options)
    244     xopt, info = _cobyla.minimize(calcfc, m=m, x=np.copy(x0), rhobeg=rhobeg,
    245                                   rhoend=rhoend, iprint=iprint, maxfun=maxfun,
--> 246                                   dinfo=info)
    247 
    248     if info[3] > catol:

/homes/abie/anaconda/lib/python2.7/site-packages/scipy/optimize/cobyla.pyc in calcfc(x, con)
    236 
    237     def calcfc(x, con):
--> 238         f = fun(x, *args)
    239         for k, c in enumerate(constraints):
    240             con[k] = c['fun'](x, *c['args'])

/homes/abie/anaconda/lib/python2.7/site-packages/sklearn/gaussian_process/gaussian_process.pyc in minus_reduced_likelihood_function(log10t)
    698             def minus_reduced_likelihood_function(log10t):
    699                 return - self.reduced_likelihood_function(
--> 700                     theta=10. ** log10t)[0]
    701 
    702             constraints = []

/homes/abie/anaconda/lib/python2.7/site-packages/sklearn/gaussian_process/gaussian_process.pyc in reduced_likelihood_function(self, theta)
    593 
    594         # Set up R
--> 595         r = self.corr(theta, D)
    596         R = np.eye(n_samples) * (1. + self.nugget)
    597         R[ij[:, 0], ij[:, 1]] = r

<ipython-input-100-f61aaf39ca0a> in iso_matern(theta, d)
     31     r = pymc.gp.cov_funs.matern.euclidean(
     32         d, np.array([0]),
---> 33         diff_degree=theta[0], amp=theta[1], scale=theta[2])
     34     return np.array(r).reshape(d.shape[0],)
     35 iso_matern([1.4,.4,10], X).shape

/homes/abie/anaconda/lib/python2.7/site-packages/pymc/gp/cov_funs/cov_utils.pyc in __call__(self, x, y, amp, scale, symm, *args, **kwargs)
    170 
    171         if n_threads <= 1:
--> 172             targ(C,x,y,0,-1,symm)
    173         else:
    174             thread_args = [(C,x,y,bounds[i],bounds[i+1],symm) for i in xrange(n_threads)]

/homes/abie/anaconda/lib/python2.7/site-packages/pymc/gp/cov_funs/cov_utils.pyc in targ(C, x, y, cmin, cmax, symm, d_kwargs, c_args, c_kwargs)
    166                 self.cov_fun(C,x,y,cmin=cmin, cmax=cmax,symm=symm,*c_args,**c_kwargs)
    167             else:
--> 168                 self.cov_fun(C, cmin=cmin, cmax=cmax,symm=symm, *c_args, **c_kwargs)
    169             imul(C, amp*amp, cmin=cmin, cmax=cmax, symm=symm)
    170 

error: (diff_degree>0) failed for 2nd argument diff_degree: matern:diff_degree=nan
In [ ]: