(port of Stan case study http://mc-stan.org/users/documentation/case-studies/identifying_mixture_models.html)
%matplotlib inline
import numpy as np
import pymc3 as pm
import matplotlib.pylab as plt
import theano.tensor as tt
import theano
%config InlineBackend.figure_format = 'retina'
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/h5py/__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`. from ._conv import register_converters as _register_converters
mu0 = np.asarray([-2.75, 2.75])
sigma0 = np.asarray([1, 1])
lam = 0.4
N = 1000
z = np.random.binomial(1, lam, size=(N,))
y = np.random.normal(loc=mu0[z], scale=sigma0[z])
plt.hist(y, 100);
with pm.Model() as model:
theta = pm.Beta('theta', 5., 5.)
mu = pm.Normal('mu', 0, 2, shape=2)
sd = pm.HalfNormal('sd', 2, shape=2)
w = tt.stack([theta, 1.-theta])
obs = pm.NormalMixture('x_obs', w, mu, sd=sd, observed=y)
trace = pm.sample(1000, tune=1000, njobs=4)
pm.traceplot(trace);
plt.figure()
plt.scatter(trace['mu'][:,0], trace['mu'][:,1], alpha=.01)
plt.plot([-3, 3], [-3, 3])
plt.xlabel('mu1')
plt.ylabel('mu2');
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [sd_log__, mu, theta_logodds__] 100%|██████████| 2000/2000 [00:12<00:00, 160.61it/s] The gelman-rubin statistic is larger than 1.4 for some parameters. The sampler did not converge. The estimated number of effective samples is smaller than 200 for some parameters.
with pm.Model() as model:
theta = pm.Beta('theta', 5., 5.)
mu0 = pm.Normal('mu0', 4, .5)
mu1 = pm.Normal('mu1', -4, .5)
sd = pm.HalfNormal('sd', 2, shape=2)
w = tt.stack([theta, 1.-theta])
obs = pm.NormalMixture('x_obs', w, [mu0, mu1], sd=sd, observed=y)
trace = pm.sample(1000, tune=1000, njobs=4)
pm.traceplot(trace);
plt.figure()
plt.scatter(trace['mu0'], trace['mu1'], alpha=.01)
plt.plot([-3, 3], [-3, 3])
plt.xlabel('mu1')
plt.ylabel('mu2');
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [sd_log__, mu1, mu0, theta_logodds__] 100%|██████████| 2000/2000 [00:18<00:00, 107.28it/s]
class Ordered(pm.distributions.transforms.ElemwiseTransform):
name = "ordered"
def forward(self, x):
out = tt.zeros(x.shape)
out = tt.inc_subtensor(out[0], x[0])
out = tt.inc_subtensor(out[1:], tt.log(x[1:] - x[:-1]))
return out
def forward_val(self, x, point=None):
x, = pm.distributions.distribution.draw_values([x], point=point)
return self.forward(x)
def backward(self, y):
out = tt.zeros(y.shape)
out = tt.inc_subtensor(out[0], y[0])
out = tt.inc_subtensor(out[1:], tt.exp(y[1:]))
return tt.cumsum(out)
def jacobian_det(self, y):
return tt.sum(y[1:])
with pm.Model() as model:
theta = pm.Beta('theta', 5., 5.)
mu = pm.Normal('mu', 0, 2, shape=2, transform=Ordered(),
testval=[0.1, 0.2])
sd = pm.HalfNormal('sd', 2, shape=2)
w = tt.stack([theta, 1.-theta])
obs = pm.NormalMixture('x_obs', w, mu, sd=sd, observed=y)
trace = pm.sample(1000, tune=1000, njobs=4)
pm.traceplot(trace);
plt.figure()
plt.scatter(trace['mu'][:,0], trace['mu'][:,1], alpha=.01)
plt.plot([-3, 3], [-3, 3])
plt.xlabel('mu1')
plt.ylabel('mu2');
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [sd_log__, mu_ordered__, theta_logodds__] 100%|██████████| 2000/2000 [00:28<00:00, 69.65it/s]
with pm.Model() as model:
theta = pm.Beta('theta', 5., 5.)
mu = pm.Normal('mu', 0, 2, shape=2, testval=[0.1, 0.2])
pm.Potential('ordered', tt.switch(mu[1]-mu[0] < 0, -np.inf, 0))
sd = pm.HalfNormal('sd', 2, shape=2)
w = tt.stack([theta, 1.-theta])
obs = pm.NormalMixture('x_obs', w, mu, sd=sd, observed=y)
trace = pm.sample(1000, tune=1000, njobs=4)
pm.traceplot(trace);
plt.figure()
plt.scatter(trace['mu'][:,0], trace['mu'][:,1], alpha=.01)
plt.plot([-3, 3], [-3, 3])
plt.xlabel('mu1')
plt.ylabel('mu2');
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [sd_log__, mu, theta_logodds__] 100%|██████████| 2000/2000 [00:14<00:00, 141.27it/s]
mu0 = np.asarray([-0.75, 0.75])
sigma0 = np.asarray([1, 1])
lam = 0.4
N = 1000
z = np.random.binomial(1, lam, size=(N,))
y = np.random.normal(loc=mu0[z], scale=sigma0[z])
plt.hist(y, 100);
with pm.Model() as model:
theta = pm.Beta('theta', 5., 5., observed=lam)
mu = pm.Normal('mu', 0, 2, shape=2)
sd = pm.HalfNormal('sd', 2, shape=2)
w = tt.stack([theta, 1.-theta])
obs = pm.NormalMixture('x_obs', w, mu, sd=sd, observed=y)
trace = pm.sample(1000, tune=1000, njobs=4)
pm.traceplot(trace);
plt.figure()
plt.scatter(trace['mu'][:,0], trace['mu'][:,1], alpha=.01)
plt.plot([-3, 3], [-3, 3])
plt.xlabel('mu1')
plt.ylabel('mu2');
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [sd_log__, mu] 100%|██████████| 2000/2000 [00:24<00:00, 81.26it/s] The acceptance probability does not match the target. It is 0.55775796702757, but should be close to 0.8. Try to increase the number of tuning steps. The gelman-rubin statistic is larger than 1.05 for some parameters. This indicates slight problems during sampling. The estimated number of effective samples is smaller than 200 for some parameters.
from sampled import sampled
@sampled
def gmix_model(theta):
theta = pm.Beta('theta', 5., 5.)
mu = pm.Normal('mu', 0, 2, shape=2)
sd = pm.HalfNormal('sd', 2, shape=2)
w = tt.stack([theta, 1.-theta])
obs = pm.NormalMixture('x_obs', w, mu, sd=sd, observed=y)
plt.figure()
for lam in [.25, .5, .75]:
with gmix_model(theta=lam):
trace = pm.sample(1000, tune=1000, njobs=4)
plt.scatter(trace['mu'][:,0], trace['mu'][:,1], alpha=.01)
plt.plot([-3, 3], [-3, 3])
plt.xlabel('mu1')
plt.ylabel('mu2');
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [sd_log__, mu] 100%|██████████| 2000/2000 [00:22<00:00, 90.29it/s] The acceptance probability does not match the target. It is 0.5261341844200678, but should be close to 0.8. Try to increase the number of tuning steps. The acceptance probability does not match the target. It is 0.957341203229737, but should be close to 0.8. Try to increase the number of tuning steps. The acceptance probability does not match the target. It is 0.9679129485582305, but should be close to 0.8. Try to increase the number of tuning steps. The estimated number of effective samples is smaller than 200 for some parameters. Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [sd_log__, mu] 100%|██████████| 2000/2000 [00:25<00:00, 78.75it/s] The acceptance probability does not match the target. It is 0.6851992401737543, but should be close to 0.8. Try to increase the number of tuning steps. There were 43 divergences after tuning. Increase `target_accept` or reparameterize. The acceptance probability does not match the target. It is 0.38992459534800944, but should be close to 0.8. Try to increase the number of tuning steps. The estimated number of effective samples is smaller than 200 for some parameters. Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [sd_log__, mu] 100%|██████████| 2000/2000 [00:21<00:00, 91.23it/s] The acceptance probability does not match the target. It is 0.8988304352714063, but should be close to 0.8. Try to increase the number of tuning steps. There were 5 divergences after tuning. Increase `target_accept` or reparameterize. The acceptance probability does not match the target. It is 0.3255515538658155, but should be close to 0.8. Try to increase the number of tuning steps. The acceptance probability does not match the target. It is 0.6889139876567418, but should be close to 0.8. Try to increase the number of tuning steps. The estimated number of effective samples is smaller than 200 for some parameters.
with pm.Model() as model:
theta = pm.Beta('theta', 5., 5.)
mu = pm.Normal('mu', 0, 2, shape=2)
sd = pm.HalfNormal('sd', 2, shape=2)
w = tt.stack([theta, 1.-theta])
obs = pm.NormalMixture('x_obs', w, mu, sd=sd, observed=y)
trace = pm.sample(1000, tune=1000, njobs=4)
pm.traceplot(trace);
plt.figure()
plt.scatter(trace['mu'][:,0], trace['mu'][:,1], alpha=.01)
plt.plot([-3, 3], [-3, 3])
plt.xlabel('mu1')
plt.ylabel('mu2');
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [sd_log__, mu, theta_logodds__] 100%|██████████| 2000/2000 [00:43<00:00, 46.10it/s] The acceptance probability does not match the target. It is 0.622506209295936, but should be close to 0.8. Try to increase the number of tuning steps. The acceptance probability does not match the target. It is 0.887349592607102, but should be close to 0.8. Try to increase the number of tuning steps. The estimated number of effective samples is smaller than 200 for some parameters.
with pm.Model() as model:
theta = pm.Beta('theta', 5., 5.)
mu = pm.Normal('mu', 0, 2, shape=2, transform=Ordered(),
testval=[0.1, 0.2])
sd = pm.HalfNormal('sd', 2, shape=2)
w = tt.stack([theta, 1.-theta])
obs = pm.NormalMixture('x_obs', w, mu, sd=sd, observed=y)
trace = pm.sample(1000, tune=1000, njobs=4)
pm.traceplot(trace);
plt.figure()
plt.scatter(trace['mu'][:,0], trace['mu'][:,1], alpha=.01)
plt.plot([-3, 3], [-3, 3])
plt.xlabel('mu1')
plt.ylabel('mu2');
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [sd_log__, mu_ordered__, theta_logodds__] 100%|██████████| 2000/2000 [00:43<00:00, 46.04it/s] The number of effective samples is smaller than 25% for some parameters.