%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
import scipy as sp
from collections import OrderedDict
from theano import shared, tensor as tt
%config InlineBackend.figure_format = 'retina'
plt.style.use(['seaborn-colorblind', 'seaborn-darkgrid'])
trolley_df = pd.read_csv('Data/Trolley.csv', sep=';')
trolley_df.head()
case | response | order | id | age | male | edu | action | intention | contact | story | action2 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | cfaqu | 4 | 2 | 96;434 | 14 | 0 | Middle School | 0 | 0 | 1 | aqu | 1 |
1 | cfbur | 3 | 31 | 96;434 | 14 | 0 | Middle School | 0 | 0 | 1 | bur | 1 |
2 | cfrub | 4 | 16 | 96;434 | 14 | 0 | Middle School | 0 | 0 | 1 | rub | 1 |
3 | cibox | 3 | 32 | 96;434 | 14 | 0 | Middle School | 0 | 1 | 1 | box | 1 |
4 | cibur | 3 | 4 | 96;434 | 14 | 0 | Middle School | 0 | 1 | 1 | bur | 1 |
ax = (trolley_df.response
.value_counts()
.sort_index()
.plot(kind='bar'))
ax.set_xlabel("response", fontsize=14);
ax.set_ylabel("Frequency", fontsize=14);
ax = (trolley_df.response
.value_counts()
.sort_index()
.cumsum()
.div(trolley_df.shape[0])
.plot(marker='o'))
ax.set_xlim(0.9, 7.1);
ax.set_xlabel("response", fontsize=14)
ax.set_ylabel("cumulative proportion", fontsize=14);
resp_lco = (trolley_df.response
.value_counts()
.sort_index()
.cumsum()
.iloc[:-1]
.div(trolley_df.shape[0])
.apply(lambda p: np.log(p / (1. - p))))
ax = resp_lco.plot(marker='o')
ax.set_xlim(0.9, 7);
ax.set_xlabel("response", fontsize=14)
ax.set_ylabel("log-cumulative-odds", fontsize=14);
The following Ordered
transformation is taken from PyMC discourse.
class Ordered(pm.distributions.transforms.ElemwiseTransform):
name = "ordered"
def forward(self, x):
out = tt.zeros(x.shape)
out = tt.inc_subtensor(out[0], x[0])
out = tt.inc_subtensor(out[1:], tt.log(x[1:] - x[:-1]))
return out
def forward_val(self, x, point=None):
x, = pm.distributions.distribution.draw_values([x], point=point)
return self.forward(x)
def backward(self, y):
out = tt.zeros(y.shape)
out = tt.inc_subtensor(out[0], y[0])
out = tt.inc_subtensor(out[1:], tt.exp(y[1:]))
return tt.cumsum(out)
def jacobian_det(self, y):
return tt.sum(y[1:])
with pm.Model() as m11_1:
a = pm.Normal(
'a', 0., 10.,
transform=Ordered(),
shape=6, testval=np.arange(6) - 2.5)
pa = pm.math.sigmoid(a)
p_cum = tt.concatenate([[0.], pa, [1.]])
p = p_cum[1:] - p_cum[:-1]
resp_obs = pm.Categorical(
'resp_obs', p,
observed=trolley_df.response - 1)
with m11_1:
map_11_1 = pm.find_MAP()
logp = -18,941, ||grad|| = 0.45229: 100%|██████████| 14/14 [00:00<00:00, 23.21it/s] ]
map_11_1['a']
array([-1.9160707 , -1.26658298, -0.71862013, 0.24778795, 0.88986631, 1.76937289])
sp.special.expit(map_11_1['a'])
array([ 0.12830038, 0.21984275, 0.32769691, 0.56163196, 0.70886258, 0.85437967])
with m11_1:
trace_11_1 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 2000/2000 [00:16<00:00, 160.45it/s]
pm.df_summary(trace_11_1, varnames=['a'], alpha=.11).round(2)
mean | sd | mc_error | hpd_5.5 | hpd_94.5 | n_eff | Rhat | |
---|---|---|---|---|---|---|---|
a__0 | -1.92 | 0.03 | 0.0 | -1.97 | -1.87 | 2000.0 | 1.0 |
a__1 | -1.27 | 0.02 | 0.0 | -1.31 | -1.23 | 2000.0 | 1.0 |
a__2 | -0.72 | 0.02 | 0.0 | -0.75 | -0.69 | 2000.0 | 1.0 |
a__3 | 0.25 | 0.02 | 0.0 | 0.21 | 0.28 | 2000.0 | 1.0 |
a__4 | 0.89 | 0.02 | 0.0 | 0.86 | 0.92 | 2000.0 | 1.0 |
a__5 | 1.77 | 0.03 | 0.0 | 1.73 | 1.82 | 2000.0 | 1.0 |
def ordered_logistic_proba(a):
pa = sp.special.expit(a)
p_cum = np.concatenate(([0.], pa, [1.]))
return p_cum[1:] - p_cum[:-1]
ordered_logistic_proba(trace_11_1['a'].mean(axis=0))
array([ 0.12753048, 0.09170783, 0.10820073, 0.2341595 , 0.14767958, 0.14570807, 0.14501382])
(ordered_logistic_proba(trace_11_1['a'].mean(axis=0)) \
* (1 + np.arange(7))).sum()
4.1999293593514384
ordered_logistic_proba(trace_11_1['a'].mean(axis=0) - 0.5)
array([ 0.08143763, 0.06409094, 0.08244469, 0.20927244, 0.15948963, 0.18473514, 0.21852952])
(ordered_logistic_proba(trace_11_1['a'].mean(axis=0) - 0.5) \
* (1 + np.arange(7))).sum()
4.7296090400791702
action = shared(trolley_df.action.values)
intention = shared(trolley_df.intention.values)
contact = shared(trolley_df.contact.values)
with pm.Model() as m11_2:
a = pm.Normal(
'a', 0., 10.,
transform=Ordered(),
shape=6,
testval=trace_11_1['a'].mean(axis=0)
)
bA = pm.Normal('bA', 0., 10.)
bI = pm.Normal('bI', 0., 10.)
bC = pm.Normal('bC', 0., 10.)
phi = bA * action + bI * intention + bC * contact
pa = pm.math.sigmoid(
tt.shape_padleft(a) - tt.shape_padright(phi)
)
p_cum = tt.concatenate([
tt.zeros_like(tt.shape_padright(pa[:, 0])),
pa,
tt.ones_like(tt.shape_padright(pa[:, 0]))
], axis=1)
p = p_cum[:, 1:] - p_cum[:, :-1]
resp_obs = pm.Categorical(
'resp_obs', p,
observed=trolley_df.response - 1
)
with m11_2:
map_11_2 = pm.find_MAP()
logp = -18,565, ||grad|| = 3.7472: 100%|██████████| 17/17 [00:00<00:00, 58.64it/s]
with pm.Model() as m11_3:
a = pm.Normal(
'a', 0., 10.,
transform=Ordered(),
shape=6,
testval=trace_11_1['a'].mean(axis=0)
)
bA = pm.Normal('bA', 0., 10.)
bI = pm.Normal('bI', 0., 10.)
bC = pm.Normal('bC', 0., 10.)
bAI = pm.Normal('bAI', 0., 10.)
bCI = pm.Normal('bCI', 0., 10.)
phi = phi = bA * action + bI * intention + bC * contact \
+ bAI * action * intention \
+ bCI * contact * intention
pa = pm.math.sigmoid(
tt.shape_padleft(a) - tt.shape_padright(phi)
)
p_cum = tt.concatenate([
tt.zeros_like(tt.shape_padright(pa[:, 0])),
pa,
tt.ones_like(tt.shape_padright(pa[:, 0]))
], axis=1)
p = p_cum[:, 1:] - p_cum[:, :-1]
resp_obs = pm.Categorical(
'resp_obs', p,
observed=trolley_df.response - 1
)
with m11_3:
map_11_3 = pm.find_MAP()
logp = -18,489, ||grad|| = 0.91902: 100%|██████████| 26/26 [00:00<00:00, 110.84it/s]
def get_coefs(map_est):
coefs = OrderedDict()
for i, ai in enumerate(map_est['a']):
coefs['a_{}'.format(i)] = ai
coefs['bA'] = map_est.get('bA', np.nan)
coefs['bI'] = map_est.get('bI', np.nan)
coefs['bC'] = map_est.get('bC', np.nan)
coefs['bAI'] = map_est.get('bAI', np.nan)
coefs['bCI'] = map_est.get('bCI', np.nan)
return coefs
(pd.DataFrame.from_dict(
OrderedDict([
('m11_1', get_coefs(map_11_1)),
('m11_2', get_coefs(map_11_2)),
('m11_3', get_coefs(map_11_3))
]))
.astype(np.float64)
.round(2))
m11_1 | m11_2 | m11_3 | |
---|---|---|---|
a_0 | -1.92 | -2.84 | -2.63 |
a_1 | -1.27 | -2.16 | -1.94 |
a_2 | -0.72 | -1.57 | -1.34 |
a_3 | 0.25 | -0.55 | -0.31 |
a_4 | 0.89 | 0.12 | 0.36 |
a_5 | 1.77 | 1.02 | 1.27 |
bA | NaN | -0.71 | -0.47 |
bAI | NaN | NaN | -0.45 |
bC | NaN | -0.96 | -0.33 |
bCI | NaN | NaN | -1.27 |
bI | NaN | -0.72 | -0.28 |
with m11_2:
trace_11_2 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 2000/2000 [03:19<00:00, 7.63it/s]
with m11_3:
trace_11_3 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 2000/2000 [04:16<00:00, 7.37it/s]
comp_df = pm.compare([trace_11_1, trace_11_2, trace_11_3],
[m11_1, m11_2, m11_3])
comp_df.loc[:,'model'] = pd.Series(['m11.1', 'm11.2', 'm11.3'])
comp_df = comp_df.set_index('model')
comp_df
WAIC | pWAIC | dWAIC | weight | SE | dSE | warning | |
---|---|---|---|---|---|---|---|
model | |||||||
m11.3 | 36929 | 10.88 | 0 | 0.95 | 81.38 | 0 | 0 |
m11.2 | 37090.3 | 9.16 | 161.28 | 0 | 76.42 | 25.88 | 0 |
m11.1 | 37854.4 | 5.94 | 925.44 | 0.05 | 57.69 | 62.95 | 0 |
pp_df = pd.DataFrame(
np.array([[0, 0, 0],
[0, 0, 1],
[1, 0, 0],
[1, 0, 1],
[0, 1, 0],
[0, 1, 1]]),
columns=['action', 'contact', 'intention'])
pp_df
action | contact | intention | |
---|---|---|---|
0 | 0 | 0 | 0 |
1 | 0 | 0 | 1 |
2 | 1 | 0 | 0 |
3 | 1 | 0 | 1 |
4 | 0 | 1 | 0 |
5 | 0 | 1 | 1 |
action.set_value(pp_df.action.values)
contact.set_value(pp_df.contact.values)
intention.set_value(pp_df.intention.values)
with m11_3:
pp_trace_11_3 = pm.sample_ppc(trace_11_3, samples=1500)
100%|██████████| 1500/1500 [00:17<00:00, 87.99it/s]
PP_COLS = ['pp_{}'.format(i) for i, _ in enumerate(pp_trace_11_3['resp_obs'])]
pp_df = pd.concat((pp_df,
pd.DataFrame(pp_trace_11_3['resp_obs'].T, columns=PP_COLS)),
axis=1)
pp_cum_df = (pd.melt(
pp_df,
id_vars=['action', 'contact', 'intention'],
value_vars=PP_COLS, value_name='resp'
)
.groupby(['action', 'contact', 'intention', 'resp'])
.size()
.div(1500)
.rename('proba')
.reset_index()
.pivot_table(
index=['action', 'contact', 'intention'],
values='proba',
columns='resp'
)
.cumsum(axis=1)
.iloc[:, :-1])
pp_cum_df
resp | 0 | 1 | 2 | 3 | 4 | 5 | ||
---|---|---|---|---|---|---|---|---|
action | contact | intention | ||||||
0 | 0 | 0 | 0.069333 | 0.128000 | 0.208000 | 0.418667 | 0.568667 | 0.757333 |
1 | 0.076667 | 0.145333 | 0.238667 | 0.470000 | 0.648000 | 0.818667 | ||
1 | 0 | 0.089333 | 0.171333 | 0.266667 | 0.514667 | 0.687333 | 0.838000 | |
1 | 0.330000 | 0.484667 | 0.642000 | 0.834667 | 0.906667 | 0.956667 | ||
1 | 0 | 0 | 0.109333 | 0.191333 | 0.286667 | 0.544000 | 0.705333 | 0.861333 |
1 | 0.179333 | 0.308000 | 0.466667 | 0.697333 | 0.822000 | 0.916667 |
for (plot_action, plot_contact), plot_df in pp_cum_df.groupby(level=['action', 'contact']):
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot([0, 1], plot_df, c='C0');
ax.plot([0, 1], [0, 0], '--', c='C0');
ax.plot([0, 1], [1, 1], '--', c='C0');
ax.set_xlim(0, 1);
ax.set_xlabel("intention");
ax.set_ylim(-0.05, 1.05);
ax.set_ylabel("probability");
ax.set_title(
"action = {action}, contact = {contact}".format(
action=plot_action, contact=plot_contact
)
);
# define parameters
PROB_DRINK = 0.2 # 20% of days
RATE_WORK = 1. # average 1 manuscript per day
# sample one year of production
N = 365
drink = np.random.binomial(1, PROB_DRINK, size=N)
y = (1 - drink) * np.random.poisson(RATE_WORK, size=N)
drink_zeros = drink.sum()
work_zeros = (y == 0).sum() - drink_zeros
bins = np.arange(y.max() + 1) - 0.5
plt.hist(y, bins=bins);
plt.bar(0., drink_zeros, width=1., bottom=work_zeros, color='C1', alpha=.5);
plt.xticks(bins + 0.5);
plt.xlabel("manuscripts completed");
plt.ylabel("Frequency");
with pm.Model() as m11_4:
ap = pm.Normal('ap', 0., 1.)
p = pm.math.sigmoid(ap)
al = pm.Normal('al', 0., 10.)
lambda_ = tt.exp(al)
y_obs = pm.ZeroInflatedPoisson('y_obs', 1. - p, lambda_, observed=y)
with m11_4:
map_11_4 = pm.find_MAP()
logp = -462.79, ||grad|| = 0.00047532: 100%|██████████| 12/12 [00:00<00:00, 211.49it/s]s]
map_11_4
{'al': array(0.05019473453063987), 'ap': array(-1.426975323508402)}
sp.special.expit(map_11_4['ap']) # probability drink
0.19357040138820736
np.exp(map_11_4['al']) # rate finish manuscripts, when not drinking
1.0514758350937541
def dzip(x, p, lambda_, log=True):
like = p**(x == 0) + (1 - p) * sp.stats.poisson.pmf(x, lambda_)
return np.log(like) if log else like
PBAR = 0.5
THETA = 5.
a = PBAR * THETA
b = (1 - PBAR) * THETA
p = np.linspace(0, 1, 100)
plt.plot(p, sp.stats.beta.pdf(p, a, b));
plt.xlim(0, 1);
plt.xlabel("probability");
plt.ylabel("Density");
admit_df = pd.read_csv('Data/UCBadmit.csv', sep=';')
admit_df.head()
dept | applicant.gender | admit | reject | applications | |
---|---|---|---|---|---|
1 | A | male | 512 | 313 | 825 |
2 | A | female | 89 | 19 | 108 |
3 | B | male | 353 | 207 | 560 |
4 | B | female | 17 | 8 | 25 |
5 | C | male | 120 | 205 | 325 |
with pm.Model() as m11_5:
a = pm.Normal('a', 0., 2.)
pbar = pm.Deterministic('pbar', pm.math.sigmoid(a))
theta = pm.Exponential('theta', 1.)
admit_obs = pm.BetaBinomial(
'admit_obs',
pbar * theta, (1. - pbar) * theta,
admit_df.applications.values,
observed=admit_df.admit.values
)
with m11_5:
trace_11_5 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 2000/2000 [00:02<00:00, 718.29it/s]
pm.summary(trace_11_5, alpha=.11).round(2)
mean | sd | mc_error | hpd_5.5 | hpd_94.5 | n_eff | Rhat | |
---|---|---|---|---|---|---|---|
a | -0.37 | 0.31 | 0.01 | -0.86 | 0.12 | 1860.0 | 1.0 |
pbar | 0.41 | 0.07 | 0.00 | 0.30 | 0.53 | 1863.0 | 1.0 |
theta | 2.78 | 0.99 | 0.02 | 1.25 | 4.22 | 1641.0 | 1.0 |
np.percentile(trace_11_5['pbar'], [2.5, 50., 97.5])
array([ 0.28260569, 0.40670155, 0.56068546])
pbar_hat = trace_11_5['pbar'].mean()
theta_hat = trace_11_5['theta'].mean()
p_plot = np.linspace(0, 1, 100)
plt.plot(
p_plot,
sp.stats.beta.pdf(p_plot, pbar_hat * theta_hat, (1. - pbar_hat) * theta_hat)
);
plt.plot(
p_plot,
sp.stats.beta.pdf(
p_plot[:, np.newaxis],
trace_11_5['pbar'][:100] * trace_11_5['theta'][:100],
(1. - trace_11_5['pbar'][:100]) * trace_11_5['theta'][:100]
),
c='C0', alpha=0.1
);
plt.xlim(0., 1.);
plt.xlabel("probability admit");
plt.ylim(0., 3.);
plt.ylabel("Density");
with m11_5:
pp_trace_11_5 = pm.sample_ppc(trace_11_5)
100%|██████████| 1000/1000 [00:02<00:00, 355.73it/s]
x_case = np.arange(admit_df.shape[0])
plt.scatter(
x_case,
pp_trace_11_5['admit_obs'].mean(axis=0) \
/ admit_df.applications.values
);
plt.scatter(x_case, admit_df.admit / admit_df.applications);
high = np.percentile(pp_trace_11_5['admit_obs'], 95, axis=0) \
/ admit_df.applications.values
plt.scatter(x_case, high, marker='x', c='k');
low = np.percentile(pp_trace_11_5['admit_obs'], 5, axis=0) \
/ admit_df.applications.values
plt.scatter(x_case, low, marker='x', c='k');
mu = 3.
theta = 1.
x = np.linspace(0, 10, 100)
plt.plot(x, sp.stats.gamma.pdf(x, mu / theta, scale=theta));
import sys, IPython, scipy, matplotlib, platform
print("This notebook was createad on a computer %s running %s and using:\nPython %s\nIPython %s\nPyMC3 %s\nNumPy %s\nPandas %s\nSciPy %s\nMatplotlib %s\n" % (platform.machine(), ' '.join(platform.linux_distribution()[:2]), sys.version[:5], IPython.__version__, pm.__version__, np.__version__, pd.__version__, scipy.__version__, matplotlib.__version__))
This notebook was createad on a computer x86_64 running debian stretch/sid and using: Python 3.6.2 IPython 6.1.0 PyMC3 3.2 NumPy 1.13.3 Pandas 0.20.3 SciPy 0.19.1 Matplotlib 2.1.0