In [27]:
!date
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
%matplotlib inline
sns.set_context('poster')
sns.set_style('darkgrid')
Sat Jun 13 20:40:45 PDT 2015

Laplace approximation in PyMC3

Following the approach in http://www.seanet.com/~bradbell/tmb.htm, use the example from http://www.seanet.com/~bradbell/marginal_example.pdf

In [28]:
import pymc3 as pm, theano.tensor as tt, theano.tensor.slinalg, scipy.optimize
In [29]:
def sim_data(n, p, q, seed):
    """Simulate data to fit with Laplace approximation
    
    Parameters
    ----------
    n : int, rows of data
    p : int, number of fixed effect coeffs
    q : int, number of random effect coeffs
    
    Results
    -------
    returns dict  PyMC3 model
    """
    q=n
    
    # sim data for random effect model
    np.random.seed(seed)  # set random seed for reproducibility

    # ground truth for model parameters
    beta_true = np.linspace(-1,1,p)

    u_true = np.linspace(-1,1,q)
    sigma_true = .1

    # covariates
    X = np.random.normal(size=(n,p))
    Z = np.eye(q)

    # observed data
    y = np.dot(X, beta_true) + np.dot(Z, u_true) + np.random.normal(0., sigma_true, size=n)

    return locals()
data = sim_data(n=100, p=2, q=None, seed=12345)
In [30]:
def make_model(data):
    """Create a random effects model from simulated data
    
    Parameters
    ----------
    data : dict, as returned by sim_data function
    
    Results
    -------
    returns PyMC3 model for data
    """
    
    m = pm.Model()
    with m:
        beta  = pm.Normal('beta', mu=0., sd=1., shape=data['p'])
        u     = pm.Normal('u', mu=0., sd=1., shape=data['q'])
        sigma = pm.HalfNormal('sigma', sd=1.)

        y_obs = pm.Normal('y_obs', mu=tt.dot(data['X'], beta) + tt.dot(data['Z'], u), sd=sigma, observed=data['y'])
        
    return m

m = make_model(data)
In [31]:
# find MAP
with m:
    %time map_pt = pm.find_MAP()
CPU times: user 2.34 s, sys: 4 ms, total: 2.35 s
Wall time: 2.43 s

find Laplace approximation, by approximating integral of random effects with determinant of Hessian

In [32]:
H_uu = pm.hessian(m.logpt, [m.u])

# use Cholesky factorization to calculate determinant
L_H_uu = theano.tensor.slinalg.Cholesky()(H_uu)

# calculate log determinant of H_uu by summing log of diagonal of Cholesky factorization
log_det_H_uu = tt.sum(tt.log(tt.diag(L_H_uu)))

# make that into a theano function
f_log_det_H_uu = m.fn(log_det_H_uu)
In [33]:
def argmax_u(m, start):
    """Find u that maximizes model posterior for fixed beta, sigma
    
    Parameter
    ---------
    start : dict, with keys for all (transformed) model, e.g.
      start={'u':np.zeros(q), 'beta':beta_val, 'sigma_log':np.log(sigma_val)}
      
    Results
    -------
    return u_hat as an array
    """
    
    with m:
        pt = pm.find_MAP(start=start, vars=[m.u])
    return pt['u']

u_hat = argmax_u(m, {'u':np.zeros(data['q']), 'beta':.5*np.ones(data['p']), 'sigma_log':np.log(.1)})
In [34]:
u_hat = np.zeros(data['q'])

def approx_marginal_neg_log_posterior(theta):
    """Laplace approximation of the marginal negative log posterior of m
    
    Parameters
    ----------
    theta : array of shape (p+1,), with first p for beta, and final entry for sigma
    
    Results
    -------
    returns approx -logp as a float
    """
    global u_hat
    global m
    beta = theta[:-1]
    
    sigma = theta[-1]
    # prevent optimizer from going astray
    if sigma <= 1e-6:
        sigma = 1e-6
    sigma_log = np.log(sigma)
    
    pt = {'u': u_hat, 'beta': beta, 'sigma_log':sigma_log}
    u_hat = argmax_u(m, pt)
    pt['u'] = u_hat
    det_H_logp = f_log_det_H_uu(pt)
    
    return .5*det_H_logp - m.logp(pt)
    

approx_marginal_neg_log_posterior(.5*np.ones(data['p']+1))
Out[34]:
271.52666734010722
In [35]:
%time theta = scipy.optimize.fmin_bfgs(approx_marginal_neg_log_posterior, .1*np.ones(data['p']+1))
Warning: Desired error not necessarily achieved due to precision loss.
         Current function value: 184.814175
         Iterations: 0
         Function evaluations: 167
         Gradient evaluations: 31
CPU times: user 1.51 s, sys: 2 ┬Ás, total: 1.51 s
Wall time: 1.51 s

Compare the results

In [36]:
plt.plot(map_pt['u'], u_hat, 'o')
plt.plot([-1,1], [-1,1], 'k--')

plt.xlabel('Random Effect Coeffs from MAP')
plt.ylabel('Random Effect Coeffs from LA')
Out[36]:
<matplotlib.text.Text at 0x7f36f8cf7550>
In [37]:
plt.plot(map_pt['beta'], theta[:-1], 'o')
plt.plot([-1,1], [-1,1], 'k--')
plt.xlabel('Fixed Effect Coeffs from MAP')
plt.ylabel('Fixed Effect Coeffs from LA')
Out[37]:
<matplotlib.text.Text at 0x7f36f8c56ed0>
In [38]:
pd.Series({'Spread of RE from MAP': np.exp(map_pt['sigma_log']),
          'Spread of RE from LA': theta[-1]}).plot(kind='barh')
Out[38]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f36f8c3a490>
In [ ]: