In [1]:
!date
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
%matplotlib inline
sns.set_context('poster')
sns.set_style('darkgrid')

Thu Jun 11 10:52:33 PDT 2015


# Laplace approximation in PyMC3¶

In [2]:
import pymc3 as pm, theano.tensor as tt, theano.tensor.slinalg, scipy.optimize

Couldn't import dot_parser, loading of dot files will not be possible.

In [3]:
def sim_data(n, p, q, seed):
"""Simulate data to fit with Laplace approximation

Parameters
----------
n : int, rows of data
p : int, number of fixed effect coeffs
q : int, number of random effect coeffs

Results
-------
returns dict  PyMC3 model
"""

# sim data for random effect model
np.random.seed(seed)  # set random seed for reproducibility

# ground truth for model parameters
beta_true = np.linspace(-1,1,p)

u_true = np.linspace(-1,1,q)
sigma_true = .1

# covariates
X = np.random.normal(size=(n,p))
Z = np.random.uniform(0, 1, size=(n,q))
Z = 1. * (Z < .1)

# observed data
y = np.dot(X, beta_true) + np.dot(Z, u_true) + np.random.normal(0., sigma_true, size=n)

return locals()
data = sim_data(n=1000, p=5, q=500, seed=12345)

In [4]:
def make_model(data):
"""Create a random effects model from simulated data

Parameters
----------
data : dict, as returned by sim_data function

Results
-------
returns PyMC3 model for data
"""

m = pm.Model()
with m:
beta  = pm.Normal('beta', mu=0., sd=1., shape=data['p'])
u     = pm.Normal('u', mu=0., sd=1., shape=data['q'])
sigma = pm.HalfNormal('sigma', sd=1.)

y_obs = pm.Normal('y_obs', mu=tt.dot(data['X'], beta) + tt.dot(data['Z'], u), sd=sigma, observed=data['y'])

return m

m = make_model(data)

WARNING (theano.gof.compilelock): Overriding existing lock by dead process '32184' (I am process '24906')
WARNING:theano.gof.compilelock:Overriding existing lock by dead process '32184' (I am process '24906')

Cannot compute test value: input 0 (<TensorType(float32, matrix)>) of Op Dot22(<TensorType(float32, matrix)>, <TensorType(float32, matrix)>) missing default value

In [5]:
# find MAP
with m:
%time map_pt = pm.find_MAP()

CPU times: user 34.3 s, sys: 268 ms, total: 34.5 s
Wall time: 34.7 s


# find Laplace approximation, by approximating integral of random effects with determinant of Hessian¶

In [6]:
H_uu = pm.hessian(m.logpt, [m.u])

# use Cholesky factorization to calculate determinant
L_H_uu = theano.tensor.slinalg.Cholesky()(H_uu)

# calculate log determinant of H_uu by summing log of diagonal of Cholesky factorization
log_det_H_uu = tt.sum(tt.log(tt.diag(L_H_uu)))

# make that into a theano function
f_log_det_H_uu = m.fn(log_det_H_uu)

/homes/abie/anaconda/envs/testenv/lib/python2.7/site-packages/theano/scan_module/scan_perform_ext.py:133: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility
from scan_perform.scan_perform import *

In [7]:
def argmax_u(m, start):
"""Find u that maximizes model posterior for fixed beta, sigma

Parameter
---------
start : dict, with keys for all (transformed) model, e.g.
start={'u':np.zeros(q), 'beta':beta_val, 'sigma_log':np.log(sigma_val)}

Results
-------
return u_hat as an array
"""

with m:
pt = pm.find_MAP(start=start, vars=[m.u])
return pt['u']

u_hat = argmax_u(m, {'u':np.zeros(data['q']), 'beta':.5*np.ones(data['p']), 'sigma_log':np.log(.1)})

In [8]:
u_hat = np.zeros(data['q'])

def approx_marginal_neg_log_posterior(theta):
"""Laplace approximation of the marginal negative log posterior of m

Parameters
----------
theta : array of shape (p+1,), with first p for beta, and final entry for sigma

Results
-------
returns approx -logp as a float
"""
global u_hat
global m
beta = theta[:-1]

sigma = theta[-1]
# prevent optimizer from going astray
if sigma <= 1e-6:
sigma = 1e-6
sigma_log = np.log(sigma)

pt = {'u': u_hat, 'beta': beta, 'sigma_log':sigma_log}
u_hat = argmax_u(m, pt)
pt['u'] = u_hat
det_H_logp = f_log_det_H_uu(pt)

return .5*det_H_logp - m.logp(pt)

approx_marginal_neg_log_posterior(.5*np.ones(data['p']+1))

Out[8]:
4704.9488202994035
In [9]:
%time theta = scipy.optimize.fmin_bfgs(approx_marginal_neg_log_posterior, .1*np.ones(data['p']+1))

Warning: Desired error not necessarily achieved due to precision loss.
Current function value: 496.161295
Iterations: 66
Function evaluations: 862
CPU times: user 26min 17s, sys: 11.2 s, total: 26min 28s
Wall time: 26min 29s


# Compare the results¶

In [10]:
plt.plot(map_pt['u'], u_hat, 'o')
plt.plot([-1,1], [-1,1], 'k--')

plt.xlabel('Random Effect Coeffs from MAP')
plt.ylabel('Random Effect Coeffs from LA')

Out[10]:
<matplotlib.text.Text at 0x7fc67d90b250>
In [11]:
plt.plot(map_pt['beta'], theta[:-1], 'o')
plt.plot([-1,1], [-1,1], 'k--')
plt.xlabel('Fixed Effect Coeffs from MAP')
plt.ylabel('Fixed Effect Coeffs from LA')

Out[11]:
<matplotlib.text.Text at 0x7fc67d782250>
In [12]:
pd.Series({'Spread of RE from MAP': np.exp(map_pt['sigma_log']),
'Spread of RE from LA': theta[-1]}).plot(kind='barh')

Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fc67d723210>
In [12]: