from os import makedirs
makedirs('data', exist_ok=True)
from urllib.request import urlretrieve
urlretrieve('https://git.io/vXknD', 'data/challenger_data.csv')
('data/challenger_data.csv', <http.client.HTTPMessage at 0x14bdfc002b0>)
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize']=[12,8]
plt.rcParams['font.size']=24
challenger_data = np.genfromtxt('data/challenger_data.csv',
skip_header=1, usecols=[1,2],
missing_values='NA',
delimiter=',')
challenger_data = challenger_data[~np.isnan(challenger_data[:,1])]
challenger_data.shape
(23, 2)
plt.scatter(*challenger_data.T,s=75, color="k", alpha=.5)
plt.xlabel('Outside temerature (Fahrenheit)')
plt.ylabel('Damage incident?')
plt.yticks([0,1])
plt.title('Defects of the space shuttle O-rings versus temperature')
plt.show()
Whether the ring was broken is modelled as $D_i$.
$$ D_i \tilde \ Bern(D_i | p(t_i)) (i=1,2,...,N) $$import pymc as pm
temperature, D = challenger_data.T
beta = pm.Normal('beta', 0, 0.001, value=0)
alpha = pm.Normal('alpha', 0, 0.001, value=0)
@pm.deterministic
def p(t=temperature, alpha=alpha, beta=beta):
return 1.0 / (1. + np.exp(beta * t + alpha))
observed = pm.Bernoulli('bernoulli_obs', p, value=D, observed=True)
model = pm.Model([observed, beta, alpha])
map_ = pm.MAP(model)
map_.fit()
mcmc = pm.MCMC(model)
mcmc.sample(120000, 100000, 2)
[-----------------100%-----------------] 120000 of 120000 complete in 24.1 sec
alpha_samples = mcmc.trace('beta')[:].reshape(-1,)
beta_samples = mcmc.trace('alpha')[:].reshape(-1,)
plt.subplot(211)
plt.hist(beta_samples,histtype='stepfilled',
bins=35, alpha=.85, color='#7A68A6', density=True,
label=r'posterior of $ \beta $')
plt.title(r'Posterior distribution of the model parameters $\alpha$, $\beta$')
plt.legend()
plt.subplot(212)
plt.hist(alpha_samples,histtype='stepfilled',
bins=35, alpha=.85, color='#A60628', density=True,
label=r'posterior of $ \alpha $')
plt.xlabel('Value of parameter')
plt.ylabel('Density')
plt.legend()
plt.show()
t = np.linspace(temperature.min()-5,
temperature.max()+5, 50)[:,None]#.reshape(-1,)
def logistic(x, b, a):
return 1.0 / (1. + np.exp(np.dot(x, b) + a))
p_t = np.array([logistic(t.T, b, a).reshape(-1,)for a,b in zip(beta_samples, alpha_samples)])
mean_prob_t = p_t.mean(axis=0)
plt.plot(t, mean_prob_t, lw=3,
label="average posterior \n"
"probability of defect")
plt.plot(t, p_t[0, :], ls="--",
label="realization from posterior")
plt.plot(t, p_t[-2, :], ls='--',
label='realizatoin from posterior')
plt.scatter(temperature, D, color='k', s=50, alpha=.5)
plt.title('Posterior expected value of the probability of defect, '
'including two realizations')
plt.legend(loc='lower left')
plt.ylim(-.1, 1.1)
plt.xlim(t.min(),t.max())
plt.ylabel('Probability')
plt.xlabel('Temperature')
plt.show()
from scipy.stats.mstats import mquantiles
qs = mquantiles(p_t, [0.01, 0.99], axis=0)
plt.fill_between(t[:,0], *qs,alpha=.3)# , color="#7A68A6"
qs = mquantiles(p_t, [0.025, 0.975], axis=0)
plt.fill_between(t[:,0], *qs,alpha=.7, color="#7A68A6")
plt.plot(t[:,0], qs[0], label='95% CI', color='#7A68A6', alpha=.7)
plt.plot(t, mean_prob_t, lw=1, ls='--', color='k',
label='average posterior \n'
'probability of defect')
plt.xlim(t.min(), t.max())
plt.ylim(-.02, 1.02)
plt.legend(loc='lower left')
plt.scatter(temperature, D, color='k', s=50, alpha=.5)
plt.xlabel(r'Temperature, $t$')
plt.ylabel('Probability estimate')
plt.title(r'Posterior probability of estimats, '
'given temperature $t$')
plt.show()
simulated = pm.Bernoulli('bernoulli_sim', p)
N=10000
mcmc = pm.MCMC([simulated, alpha, beta, observed])
mcmc.sample(N)
[-----------------100%-----------------] 10000 of 10000 complete in 3.4 sec
simulations = mcmc.trace('bernoulli_sim')[:].astype(int)
print('Data shape: {}'.format(simulations.shape))
Data shape: (10000, 23)
plt.title('Simulated datasets using posterior parameters')
for i in range(4):
ax = plt.subplot(4, 1, i+1)
plt.scatter(temperature, simulations[1000*i,:],
color='k', s=50, alpha=.6)