Seeds: Random effect logistic regression

Ported to PyMC3 by Abraham Flaxman, 2/16/2014 from PyMC2 version, based on http://www.openbugs.info/Examples/Seeds.html

In [1]:
import numpy as np, pymc as pm, theano.tensor as T
Warning: statsmodels and/or patsy not found, not importing glm submodule.
In [2]:
### data - same as PyMC2 version
# germinated seeds
r =  np.array([10, 23, 23, 26, 17, 5, 53, 55, 32, 46, 10, 8, 10, 8, 23, 0, 3, 22, 15, 32, 3])

# total seeds
n =  np.array([39, 62, 81, 51, 39, 6, 74, 72, 51, 79, 13, 16, 30, 28, 45, 4, 12, 41, 30, 51, 7])

# seed type
x1 = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

# root type
x2 = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

# number of plates
N =  x1.shape[0]
In [3]:
### model - major differences from PyMC2:
###   * with model idiom
###   * initial values are not specified when constructing stochastics
###   * keyword `shape` instead of `size`
###   * Theano tensors instead of Lambda deterministics
###   * `observed` parameter takes a value instead of a boolean

with pm.Model() as m:
    ### hyperpriors
    tau = pm.Gamma('tau', 1.e-3, 1.e-3)
    sigma = tau**-.5
    
    ### parameters
    # fixed effects
    alpha_0 = pm.Normal('alpha_0', 0., 1e-6)
    alpha_1 = pm.Normal('alpha_1', 0., 1e-6) 
    alpha_2 = pm.Normal('alpha_2', 0., 1e-6) 
    alpha_12 = pm.Normal('alpha_12', 0., 1e-6)  
    
    # random effect
    b = pm.Normal('b', 0., tau,  shape=(N,))
    
    # expected parameter
    logit_p =  (alpha_0 + alpha_1*x1 + alpha_2*x2 + alpha_12*x1*x2 + b)
    p = T.exp(logit_p) / (1 + T.exp(logit_p))
    
    ### likelihood
    obs = pm.Binomial('obs', n, p, observed=r)
WARNING (theano.gof.compilelock): Overriding existing lock by dead process '3067' (I am process '3935')
In [4]:
%%time

### inference - major differences from PyMC2:
###   * find_MAP function instead of MAP object
###   * initial values specified in inference functions
###   * selection of MCMC step methods is more explicit
###   * sampling from parallel chains is super-simple

n = 2000

with m:
    start = pm.find_MAP({'tau': 10., 'alpha_0': 0., 'alpha_1': 0., 'alpha_2': 0., 'alpha_12': 0., 'b': np.zeros(N)})
    step = pm.HamiltonianMC(scaling=start)
    
    ptrace = pm.psample(n, step, start, progressbar=False, threads=4)
/homes/abie/anaconda/lib/python2.7/site-packages/Theano-0.6.0-py2.7.egg/theano/scan_module/scan_perform_ext.py:85: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility
  from scan_perform.scan_perform import *
CPU times: user 1min 3s, sys: 10.2 s, total: 1min 13s
Wall time: 1min 55s

BUGS results:

A burn in of 1000 updates followed by a further 10000 updates gave the following parameter estimates:

           mean    sd    
 alpha_0   -0.55   0.19  
 alpha_1    0.08   0.30  
 alpha_12  -0.82   0.41  
 alpha_2    1.35   0.26  
 sigma      0.27   0.15  
In [5]:
burn = 1000
pm.summary(ptrace[burn:])
tau:
 
  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  1106.713         660.327          38.079           [368.521, 2503.479]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  411.338        648.379        950.014        1300.858       3030.646


alpha_0:
 
  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  -0.574           0.127            0.009            [-0.814, -0.292]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  -0.885         -0.647         -0.567         -0.478         -0.342


alpha_1:
 
  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  0.175            0.219            0.014            [-0.276, 0.599]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  -0.282         0.040          0.175          0.328          0.597


alpha_2:
 
  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  1.353            0.167            0.010            [0.984, 1.627]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  1.003          1.242          1.351          1.468          1.671


alpha_12:
 
  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  -0.836           0.305            0.020            [-1.413, -0.226]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  -1.413         -1.048         -0.851         -0.631         -0.226


b:
 
  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  -0.000           0.030            0.002            [-0.060, 0.063]
  -0.002           0.032            0.002            [-0.058, 0.065]
  -0.008           0.030            0.002            [-0.070, 0.047]
  0.008            0.032            0.002            [-0.051, 0.068]
  0.010            0.032            0.002            [-0.044, 0.073]
  0.003            0.034            0.003            [-0.069, 0.064]
  -0.001           0.032            0.002            [-0.061, 0.069]
  0.009            0.034            0.003            [-0.058, 0.086]
  -0.001           0.034            0.003            [-0.058, 0.067]
  -0.012           0.031            0.002            [-0.068, 0.053]
  0.003            0.030            0.002            [-0.062, 0.056]
  0.001            0.030            0.002            [-0.053, 0.063]
  0.002            0.032            0.002            [-0.055, 0.070]
  -0.006           0.025            0.002            [-0.057, 0.039]
  0.003            0.031            0.002            [-0.055, 0.063]
  0.002            0.035            0.003            [-0.052, 0.085]
  -0.002           0.032            0.002            [-0.069, 0.052]
  -0.001           0.033            0.003            [-0.062, 0.067]
  -0.003           0.032            0.002            [-0.066, 0.056]
  0.003            0.031            0.002            [-0.063, 0.062]
  0.003            0.033            0.002            [-0.062, 0.058]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  -0.066         -0.021         -0.000         0.018          0.062
  -0.085         -0.020         -0.003         0.019          0.060
  -0.070         -0.029         -0.006         0.013          0.049
  -0.050         -0.014         0.005          0.029          0.073
  -0.059         -0.011         0.012          0.033          0.065
  -0.069         -0.022         0.003          0.023          0.072
  -0.082         -0.017         0.001          0.019          0.057
  -0.073         -0.011         0.012          0.027          0.079
  -0.064         -0.024         -0.001         0.023          0.067
  -0.080         -0.035         -0.011         0.010          0.047
  -0.062         -0.017         0.004          0.022          0.059
  -0.060         -0.020         0.003          0.024          0.058
  -0.055         -0.017         -0.002         0.020          0.070
  -0.053         -0.022         -0.004         0.010          0.048
  -0.057         -0.019         0.004          0.025          0.063
  -0.071         -0.019         0.000          0.021          0.085
  -0.069         -0.021         -0.003         0.022          0.061
  -0.069         -0.024         0.001          0.020          0.065
  -0.067         -0.025         -0.001         0.018          0.056
  -0.062         -0.017         0.002          0.022          0.070
  -0.062         -0.018         0.003          0.026          0.058

Gelman-Rubin convergence diagnostics:

In [6]:
pm.gelman_rubin(ptrace[burn:])
Out[6]:
{'alpha_0': 1.0194130588867218,
 'alpha_1': 1.0042532712404557,
 'alpha_12': 1.0041204872403353,
 'alpha_2': 1.0037001462197992,
 'b': array([ 1.01304092,  1.01520162,  1.01626442,  1.03409233,  1.01195989,
         1.01161977,  1.06471208,  1.00517131,  1.04701133,  1.01351903,
         1.02305173,  1.01105302,  1.00656117,  1.00287305,  1.04569727,
         1.0372103 ,  1.02239126,  1.01456888,  1.01328597,  1.04200797,
         1.09234026]),
 'tau': 1.0064134954958157}