Attempt at an ordered probit model¶

In [1]:

%matplotlib inline
import matplotlib.pyplot as plt

import pprint
import logging

import numpy as np

import pymc3 as pm
import pymc3.theanof

import theano.tensor as tt

In [2]:

print(pm.__version__)

3.8

Define an `OrderedProbit` distribution¶

In [3]:

# as per pm.OrderedLogistic
class OrderedProbit(pm.Categorical):
    def __init__(self, mu, sigma, cutpoints, *args, **kwargs):

        mu = tt.as_tensor_variable(pymc3.theanof.floatX(mu))
        sigma = tt.as_tensor_variable(pymc3.theanof.floatX(sigma))
        cutpoints = tt.as_tensor_variable(cutpoints)

        p = (
            self.phi(x=cutpoints[:, 1:], mu=mu, sigma=sigma) -
            self.phi(x=cutpoints[:, :-1], mu=mu, sigma=sigma)
        )

        # must be >= 0
        p = tt.where(p <= 0, 1e-50, p)

        super().__init__(p=p, *args, **kwargs)

    @staticmethod
    def phi(x, mu, sigma):
        p = 0.5 * (1.0 + pm.math.erf(((x - mu) / sigma) / pm.math.sqrt(2)))
        return p

Define the response scale and cutpoints¶

In [4]:

# can choose 1 to 10
response_options = np.arange(1, 11)

# evenly spaced cutpoints
cutpoints = np.concatenate(([-np.Inf], (response_options - 0.5)[1:], [+np.Inf]))[np.newaxis, :]
pprint.pprint(cutpoints)

array([[-inf,  1.5,  2.5,  3.5,  4.5,  5.5,  6.5,  7.5,  8.5,  9.5,  inf]])

Define the model¶

In [5]:

n_trials = 50

In [6]:

def get_model(y=None):
    
    with pm.Model() as model:
        
        mu = pm.Normal("mu", mu=np.mean(response_options), sigma=2.0)
        sigma = pm.Lognormal("sigma", mu=0.5, sigma=0.5)
        
        OrderedProbit("y", mu=mu, sigma=sigma, cutpoints=cutpoints, observed=y, shape=n_trials)
        
    return model

In [7]:

model = get_model()

Draw a sample from the prior¶

In [8]:

with model:
    draw = pm.sample_prior_predictive(samples=1, random_seed=1234)

In [9]:

pprint.pprint(draw)

{'mu': array(6.44287033),
 'sigma': array(0.90892941),
 'sigma_log__': -0.0954878473532322,
 'y': array([5, 6, 6, 5, 5, 6, 7, 6, 5, 5, 6, 6, 5, 6, 5, 3, 6, 7, 5, 6, 4, 5,
       7, 6, 5, 6, 5, 6, 6, 5, 6, 4, 6, 6, 5, 7, 5, 7, 4, 5, 4, 6, 6, 6,
       4, 6, 5, 5, 4, 6])}

In [10]:

plt.hist(draw["y"] + 1);
plt.xlim([response_options[0], response_options[-1]]);

Define the model with the prior draw as observed data¶

In [11]:

obs_model = get_model(y=draw["y"])

Try sampling¶

In [12]:

with obs_model:
    trace = pm.sample(cores=1, chains=1)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [sigma, mu]
Sampling chain 0, 0 divergences:  10%|█         | 103/1000 [00:02<00:17, 50.59it/s]

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-12-8b6cf7bec66f> in <module>
      1 with obs_model:
----> 2     trace = pm.sample(cores=1, chains=1)

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, **kwargs)
    487             _log.info("Sequential sampling ({} chains in 1 job)".format(chains))
    488             _print_step_hierarchy(step)
--> 489             trace = _sample_many(**sample_args)
    490 
    491     discard = tune if discard_tuned_samples else 0

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/pymc3/sampling.py in _sample_many(draws, chain, chains, start, random_seed, step, **kwargs)
    531     traces = []
    532     for i in range(chains):
--> 533         trace = _sample(
    534             draws=draws,
    535             chain=chain + i,

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/pymc3/sampling.py in _sample(chain, progressbar, random_seed, start, draws, step, trace, tune, model, **kwargs)
    603     try:
    604         strace = None
--> 605         for it, (strace, diverging) in enumerate(sampling):
    606             if it >= skip_first:
    607                 trace = MultiTrace([strace])

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/tqdm/std.py in __iter__(self)
   1125 
   1126         try:
-> 1127             for obj in iterable:
   1128                 yield obj
   1129                 # Update and possibly print the progressbar.

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/pymc3/sampling.py in _iter_sample(draws, step, start, trace, chain, tune, model, random_seed)
    698                 step = stop_tuning(step)
    699             if step.generates_stats:
--> 700                 point, stats = step.step(point)
    701                 if strace.supports_sampler_stats:
    702                     strace.record(point, stats)

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/pymc3/step_methods/arraystep.py in step(self, point)
    245 
    246         if self.generates_stats:
--> 247             apoint, stats = self.astep(array)
    248             point = self._logp_dlogp_func.array_to_full_dict(apoint)
    249             return point, stats

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/pymc3/step_methods/hmc/base_hmc.py in astep(self, q0)
    128                 (np.abs(check_test_point) >= 1e20) | np.isnan(check_test_point)
    129             ]
--> 130             self.potential.raise_ok(self._logp_dlogp_func._ordering.vmap)
    131             message_energy = (
    132                 "Bad initial energy, check any log probabilities that "

~/venv/sound_field_vr_ana/lib/python3.8/site-packages/pymc3/step_methods/hmc/quadpotential.py in raise_ok(self, vmap)
    229                 errmsg.append('The derivative of RV `{}`.ravel()[{}]'
    230                               ' is zero.'.format(*name_slc[ii]))
--> 231             raise ValueError('\n'.join(errmsg))
    232 
    233         if np.any(~np.isfinite(self._stds)):

ValueError: Mass matrix contains zeros on the diagonal. 
The derivative of RV `sigma_log__`.ravel()[0] is zero.
The derivative of RV `mu`.ravel()[0] is zero.

Try removing `np.inf`s¶

In [17]:

# evenly spaced cutpoints
cutpoints = np.concatenate(([-10e50], (response_options - 0.5)[1:], [+10e50]))[np.newaxis, :]
pprint.pprint(cutpoints)

obs_model = get_model(y=draw["y"])

with obs_model:
    trace = pm.sample(cores=1, chains=1)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...

array([[-1.0e+51,  1.5e+00,  2.5e+00,  3.5e+00,  4.5e+00,  5.5e+00,
         6.5e+00,  7.5e+00,  8.5e+00,  9.5e+00,  1.0e+51]])

Sequential sampling (1 chains in 1 job)
NUTS: [sigma, mu]
Sampling chain 0, 0 divergences: 100%|██████████| 1000/1000 [00:00<00:00, 1997.36it/s]
Only one chain was sampled, this makes it impossible to run some convergence checks

In [18]:

pm.summary(trace)

arviz.stats.stats_utils - WARNING - Shape validation failed: input_shape: (1, 500), minimum_shape: (chains=2, draws=4)
arviz.stats.stats_utils - WARNING - Shape validation failed: input_shape: (1, 500), minimum_shape: (chains=2, draws=4)

Out[18]:

	mean	sd	hpd_3%	hpd_97%	mcse_mean	mcse_sd	ess_mean	ess_sd	ess_bulk	ess_tail	r_hat
mu	6.439	0.128	6.223	6.682	0.007	0.005	372.0	372.0	380.0	244.0	NaN
sigma	0.899	0.105	0.723	1.100	0.006	0.004	334.0	334.0	292.0	320.0	NaN

In [19]:

pm.traceplot(trace);

Try sampling using `pm.sample_smc()`¶

In [20]:

with obs_model:
    trace = pm.sample_smc()

Sample initial stage: ...
Stage:   0 Beta: 0.040 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.129 Steps:  25 Acce: 0.531
Stage:   2 Beta: 0.363 Steps:   6 Acce: 0.440
Stage:   3 Beta: 1.000 Steps:   7 Acce: 0.359

In [21]:

pm.summary(trace)

arviz.stats.stats_utils - WARNING - Shape validation failed: input_shape: (1, 1000), minimum_shape: (chains=2, draws=4)
arviz.stats.stats_utils - WARNING - Shape validation failed: input_shape: (1, 1000), minimum_shape: (chains=2, draws=4)

Out[21]:

	mean	sd	hpd_3%	hpd_97%	mcse_mean	mcse_sd	ess_mean	ess_sd	ess_bulk	ess_tail	r_hat
mu	6.461	0.135	6.223	6.721	0.004	0.003	914.0	914.0	918.0	828.0	NaN
sigma	0.901	0.101	0.727	1.092	0.003	0.002	979.0	979.0	913.0	932.0	NaN

In [22]:

pm.traceplot(trace);

Use `pm.sample_smc()` a few times to check¶

In [23]:

# run it a few times to check it wasn't just lucky
n_reps = 20

In [24]:

with model:
    draws = pm.sample_prior_predictive(samples=n_reps, random_seed=12345)

In [25]:

# temporarily turn off the output
logger = logging.getLogger("pymc3")
start_level = logger.level
logger.setLevel(logging.CRITICAL)

est_mus = np.full(n_reps, np.nan)
est_sigmas = np.full(n_reps, np.nan)

for i_rep in range(n_reps):
    
    obs_model = get_model(y=draws["y"][i_rep, :])
    
    with obs_model:
        trace = pm.sample_smc()
        
    est_mus[i_rep] = np.mean(trace["mu"])
    est_sigmas[i_rep] = np.mean(trace["sigma"])
    
logger.setLevel(start_level)

In [26]:

plt.scatter(draws["mu"], est_mus);
plt.xlim([0, 10]);
plt.ylim([0, 10]);
plt.plot([0, 10], [0, 10]);
plt.xlabel("True $\mu$");
plt.ylabel("Estimated $\mu$");

In [27]:

plt.scatter(draws["sigma"], est_sigmas);
plt.xlim([0, 10]);
plt.ylim([0, 10]);
plt.plot([0, 10], [0, 10]);
plt.xlabel("True $\sigma$");
plt.ylabel("Estimated $\sigma$");

Attempt at an ordered probit model¶

Define an OrderedProbit distribution¶

Define the response scale and cutpoints¶

Define the model¶

Draw a sample from the prior¶

Define the model with the prior draw as observed data¶

Try sampling¶

Try removing np.infs¶

Try sampling using pm.sample_smc()¶

Use pm.sample_smc() a few times to check¶

Define an `OrderedProbit` distribution¶

Try removing `np.inf`s¶

Try sampling using `pm.sample_smc()`¶

Use `pm.sample_smc()` a few times to check¶