Attempt at an ordered probit model

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

import pprint
import logging

import numpy as np

import pymc3 as pm
import pymc3.theanof

import theano.tensor as tt
In [2]:
print(pm.__version__)
3.8

Define an OrderedProbit distribution

In [3]:
# as per pm.OrderedLogistic
class OrderedProbit(pm.Categorical):
    def __init__(self, mu, sigma, cutpoints, *args, **kwargs):

        mu = tt.as_tensor_variable(pymc3.theanof.floatX(mu))
        sigma = tt.as_tensor_variable(pymc3.theanof.floatX(sigma))
        cutpoints = tt.as_tensor_variable(cutpoints)

        p = (
            self.phi(x=cutpoints[:, 1:], mu=mu, sigma=sigma) -
            self.phi(x=cutpoints[:, :-1], mu=mu, sigma=sigma)
        )

        # must be >= 0
        p = tt.where(p <= 0, 1e-50, p)

        super().__init__(p=p, *args, **kwargs)

    @staticmethod
    def phi(x, mu, sigma):
        p = 0.5 * (1.0 + pm.math.erf(((x - mu) / sigma) / pm.math.sqrt(2)))
        return p

Define the response scale and cutpoints

In [4]:
# can choose 1 to 10
response_options = np.arange(1, 11)

# evenly spaced cutpoints
cutpoints = np.concatenate(([-np.Inf], (response_options - 0.5)[1:], [+np.Inf]))[np.newaxis, :]
pprint.pprint(cutpoints)
array([[-inf,  1.5,  2.5,  3.5,  4.5,  5.5,  6.5,  7.5,  8.5,  9.5,  inf]])

Define the model

In [5]:
n_trials = 50
In [6]:
def get_model(y=None):
    
    with pm.Model() as model:
        
        mu = pm.Normal("mu", mu=np.mean(response_options), sigma=2.0)
        sigma = pm.Lognormal("sigma", mu=0.5, sigma=0.5)
        
        OrderedProbit("y", mu=mu, sigma=sigma, cutpoints=cutpoints, observed=y, shape=n_trials)
        
    return model
In [7]:
model = get_model()

Draw a sample from the prior

In [8]:
with model:
    draw = pm.sample_prior_predictive(samples=1, random_seed=1234)
In [9]:
pprint.pprint(draw)
{'mu': array(6.44287033),
 'sigma': array(0.90892941),
 'sigma_log__': -0.0954878473532322,
 'y': array([5, 6, 6, 5, 5, 6, 7, 6, 5, 5, 6, 6, 5, 6, 5, 3, 6, 7, 5, 6, 4, 5,
       7, 6, 5, 6, 5, 6, 6, 5, 6, 4, 6, 6, 5, 7, 5, 7, 4, 5, 4, 6, 6, 6,
       4, 6, 5, 5, 4, 6])}
In [10]:
plt.hist(draw["y"] + 1);
plt.xlim([response_options[0], response_options[-1]]);

Define the model with the prior draw as observed data

In [11]:
obs_model = get_model(y=draw["y"])

Try sampling

In [12]:
with obs_model:
    trace = pm.sample(cores=1, chains=1)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [sigma, mu]
Sampling chain 0, 0 divergences:  10%|█         | 103/1000 [00:02<00:17, 50.59it/s]
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-12-8b6cf7bec66f> in <module>
      1 with obs_model:
----> 2     trace = pm.sample(cores=1, chains=1)

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, **kwargs)
    487             _log.info("Sequential sampling ({} chains in 1 job)".format(chains))
    488             _print_step_hierarchy(step)
--> 489             trace = _sample_many(**sample_args)
    490 
    491     discard = tune if discard_tuned_samples else 0

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/pymc3/sampling.py in _sample_many(draws, chain, chains, start, random_seed, step, **kwargs)
    531     traces = []
    532     for i in range(chains):
--> 533         trace = _sample(
    534             draws=draws,
    535             chain=chain + i,

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/pymc3/sampling.py in _sample(chain, progressbar, random_seed, start, draws, step, trace, tune, model, **kwargs)
    603     try:
    604         strace = None
--> 605         for it, (strace, diverging) in enumerate(sampling):
    606             if it >= skip_first:
    607                 trace = MultiTrace([strace])

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/tqdm/std.py in __iter__(self)
   1125 
   1126         try:
-> 1127             for obj in iterable:
   1128                 yield obj
   1129                 # Update and possibly print the progressbar.

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/pymc3/sampling.py in _iter_sample(draws, step, start, trace, chain, tune, model, random_seed)
    698                 step = stop_tuning(step)
    699             if step.generates_stats:
--> 700                 point, stats = step.step(point)
    701                 if strace.supports_sampler_stats:
    702                     strace.record(point, stats)

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/pymc3/step_methods/arraystep.py in step(self, point)
    245 
    246         if self.generates_stats:
--> 247             apoint, stats = self.astep(array)
    248             point = self._logp_dlogp_func.array_to_full_dict(apoint)
    249             return point, stats

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/pymc3/step_methods/hmc/base_hmc.py in astep(self, q0)
    128                 (np.abs(check_test_point) >= 1e20) | np.isnan(check_test_point)
    129             ]
--> 130             self.potential.raise_ok(self._logp_dlogp_func._ordering.vmap)
    131             message_energy = (
    132                 "Bad initial energy, check any log probabilities that "

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/pymc3/step_methods/hmc/quadpotential.py in raise_ok(self, vmap)
    229                 errmsg.append('The derivative of RV `{}`.ravel()[{}]'
    230                               ' is zero.'.format(*name_slc[ii]))
--> 231             raise ValueError('\n'.join(errmsg))
    232 
    233         if np.any(~np.isfinite(self._stds)):

ValueError: Mass matrix contains zeros on the diagonal. 
The derivative of RV `sigma_log__`.ravel()[0] is zero.
The derivative of RV `mu`.ravel()[0] is zero.

Try removing np.infs

In [17]:
# evenly spaced cutpoints
cutpoints = np.concatenate(([-10e50], (response_options - 0.5)[1:], [+10e50]))[np.newaxis, :]
pprint.pprint(cutpoints)

obs_model = get_model(y=draw["y"])

with obs_model:
    trace = pm.sample(cores=1, chains=1)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
array([[-1.0e+51,  1.5e+00,  2.5e+00,  3.5e+00,  4.5e+00,  5.5e+00,
         6.5e+00,  7.5e+00,  8.5e+00,  9.5e+00,  1.0e+51]])
Sequential sampling (1 chains in 1 job)
NUTS: [sigma, mu]
Sampling chain 0, 0 divergences: 100%|██████████| 1000/1000 [00:00<00:00, 1997.36it/s]
Only one chain was sampled, this makes it impossible to run some convergence checks
In [18]:
pm.summary(trace)
arviz.stats.stats_utils - WARNING - Shape validation failed: input_shape: (1, 500), minimum_shape: (chains=2, draws=4)
arviz.stats.stats_utils - WARNING - Shape validation failed: input_shape: (1, 500), minimum_shape: (chains=2, draws=4)
Out[18]:
mean sd hpd_3% hpd_97% mcse_mean mcse_sd ess_mean ess_sd ess_bulk ess_tail r_hat
mu 6.439 0.128 6.223 6.682 0.007 0.005 372.0 372.0 380.0 244.0 NaN
sigma 0.899 0.105 0.723 1.100 0.006 0.004 334.0 334.0 292.0 320.0 NaN
In [19]:
pm.traceplot(trace);

Try sampling using pm.sample_smc()

In [20]:
with obs_model:
    trace = pm.sample_smc()
Sample initial stage: ...
Stage:   0 Beta: 0.040 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.129 Steps:  25 Acce: 0.531
Stage:   2 Beta: 0.363 Steps:   6 Acce: 0.440
Stage:   3 Beta: 1.000 Steps:   7 Acce: 0.359
In [21]:
pm.summary(trace)
arviz.stats.stats_utils - WARNING - Shape validation failed: input_shape: (1, 1000), minimum_shape: (chains=2, draws=4)
arviz.stats.stats_utils - WARNING - Shape validation failed: input_shape: (1, 1000), minimum_shape: (chains=2, draws=4)
Out[21]:
mean sd hpd_3% hpd_97% mcse_mean mcse_sd ess_mean ess_sd ess_bulk ess_tail r_hat
mu 6.461 0.135 6.223 6.721 0.004 0.003 914.0 914.0 918.0 828.0 NaN
sigma 0.901 0.101 0.727 1.092 0.003 0.002 979.0 979.0 913.0 932.0 NaN
In [22]:
pm.traceplot(trace);

Use pm.sample_smc() a few times to check

In [23]:
# run it a few times to check it wasn't just lucky
n_reps = 20
In [24]:
with model:
    draws = pm.sample_prior_predictive(samples=n_reps, random_seed=12345)
In [25]:
# temporarily turn off the output
logger = logging.getLogger("pymc3")
start_level = logger.level
logger.setLevel(logging.CRITICAL)

est_mus = np.full(n_reps, np.nan)
est_sigmas = np.full(n_reps, np.nan)

for i_rep in range(n_reps):
    
    obs_model = get_model(y=draws["y"][i_rep, :])
    
    with obs_model:
        trace = pm.sample_smc()
        
    est_mus[i_rep] = np.mean(trace["mu"])
    est_sigmas[i_rep] = np.mean(trace["sigma"])
    
logger.setLevel(start_level)
In [26]:
plt.scatter(draws["mu"], est_mus);
plt.xlim([0, 10]);
plt.ylim([0, 10]);
plt.plot([0, 10], [0, 10]);
plt.xlabel("True $\mu$");
plt.ylabel("Estimated $\mu$");
In [27]:
plt.scatter(draws["sigma"], est_sigmas);
plt.xlim([0, 10]);
plt.ylim([0, 10]);
plt.plot([0, 10], [0, 10]);
plt.xlabel("True $\sigma$");
plt.ylabel("Estimated $\sigma$");