In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
import scipy as sp
from collections import OrderedDict
from theano import shared, tensor as tt

%config InlineBackend.figure_format = 'retina'
plt.style.use(['seaborn-colorblind', 'seaborn-darkgrid'])

Code 11.1

In [2]:
trolley_df = pd.read_csv('Data/Trolley.csv', sep=';')
trolley_df.head()
Out[2]:
case response order id age male edu action intention contact story action2
0 cfaqu 4 2 96;434 14 0 Middle School 0 0 1 aqu 1
1 cfbur 3 31 96;434 14 0 Middle School 0 0 1 bur 1
2 cfrub 4 16 96;434 14 0 Middle School 0 0 1 rub 1
3 cibox 3 32 96;434 14 0 Middle School 0 1 1 box 1
4 cibur 3 4 96;434 14 0 Middle School 0 1 1 bur 1

Code 11.2

In [3]:
ax = (trolley_df.response
                .value_counts()
                .sort_index()
                .plot(kind='bar'))

ax.set_xlabel("response", fontsize=14);
ax.set_ylabel("Frequency", fontsize=14);

Code 11.3

In [4]:
ax = (trolley_df.response
                .value_counts()
                .sort_index()
                .cumsum()
                .div(trolley_df.shape[0])
                .plot(marker='o'))

ax.set_xlim(0.9, 7.1);
ax.set_xlabel("response", fontsize=14)
ax.set_ylabel("cumulative proportion", fontsize=14);

Code 11.4

In [5]:
resp_lco = (trolley_df.response
                      .value_counts()
                      .sort_index()
                      .cumsum()
                      .iloc[:-1]
                      .div(trolley_df.shape[0])
                      .apply(lambda p: np.log(p / (1. - p))))
In [6]:
ax = resp_lco.plot(marker='o')

ax.set_xlim(0.9, 7);
ax.set_xlabel("response", fontsize=14)
ax.set_ylabel("log-cumulative-odds", fontsize=14);

Code 11.5

The following Ordered transformation is taken from PyMC discourse.

In [7]:
class Ordered(pm.distributions.transforms.ElemwiseTransform):
    name = "ordered"

    def forward(self, x):
        out = tt.zeros(x.shape)
        out = tt.inc_subtensor(out[0], x[0])
        out = tt.inc_subtensor(out[1:], tt.log(x[1:] - x[:-1]))
        return out
    
    def forward_val(self, x, point=None):
        x, = pm.distributions.distribution.draw_values([x], point=point)
        return self.forward(x)

    def backward(self, y):
        out = tt.zeros(y.shape)
        out = tt.inc_subtensor(out[0], y[0])
        out = tt.inc_subtensor(out[1:], tt.exp(y[1:]))
        return tt.cumsum(out)

    def jacobian_det(self, y):
        return tt.sum(y[1:])
In [8]:
with pm.Model() as m11_1:
    a = pm.Normal(
        'a', 0., 10.,
        transform=Ordered(),
        shape=6, testval=np.arange(6) - 2.5)

    pa = pm.math.sigmoid(a)

    p_cum = tt.concatenate([[0.], pa, [1.]])
    p = p_cum[1:] - p_cum[:-1]
    
    resp_obs = pm.Categorical(
        'resp_obs', p,
        observed=trolley_df.response - 1)
In [9]:
with m11_1:
    map_11_1 = pm.find_MAP()
logp = -18,941, ||grad|| = 0.45229: 100%|██████████| 14/14 [00:00<00:00, 23.21it/s] ]

Code 11.6

In [10]:
map_11_1['a']
Out[10]:
array([-1.9160707 , -1.26658298, -0.71862013,  0.24778795,  0.88986631,
        1.76937289])

Code 11.7

In [11]:
sp.special.expit(map_11_1['a'])
Out[11]:
array([ 0.12830038,  0.21984275,  0.32769691,  0.56163196,  0.70886258,
        0.85437967])

Code 11.8

In [12]:
with m11_1:
    trace_11_1 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [00:16<00:00, 160.45it/s]
In [13]:
pm.df_summary(trace_11_1, varnames=['a'], alpha=.11).round(2)
Out[13]:
mean sd mc_error hpd_5.5 hpd_94.5 n_eff Rhat
a__0 -1.92 0.03 0.0 -1.97 -1.87 2000.0 1.0
a__1 -1.27 0.02 0.0 -1.31 -1.23 2000.0 1.0
a__2 -0.72 0.02 0.0 -0.75 -0.69 2000.0 1.0
a__3 0.25 0.02 0.0 0.21 0.28 2000.0 1.0
a__4 0.89 0.02 0.0 0.86 0.92 2000.0 1.0
a__5 1.77 0.03 0.0 1.73 1.82 2000.0 1.0

Code 11.9

In [14]:
def ordered_logistic_proba(a):
    pa = sp.special.expit(a)
    p_cum = np.concatenate(([0.], pa, [1.]))
    
    return p_cum[1:] - p_cum[:-1]
In [15]:
ordered_logistic_proba(trace_11_1['a'].mean(axis=0))
Out[15]:
array([ 0.12753048,  0.09170783,  0.10820073,  0.2341595 ,  0.14767958,
        0.14570807,  0.14501382])

Code 11.10

In [16]:
(ordered_logistic_proba(trace_11_1['a'].mean(axis=0)) \
     * (1 + np.arange(7))).sum()
Out[16]:
4.1999293593514384

Code 11.11

In [17]:
ordered_logistic_proba(trace_11_1['a'].mean(axis=0) - 0.5)
Out[17]:
array([ 0.08143763,  0.06409094,  0.08244469,  0.20927244,  0.15948963,
        0.18473514,  0.21852952])

Code 11.12

In [18]:
(ordered_logistic_proba(trace_11_1['a'].mean(axis=0) - 0.5) \
     * (1 + np.arange(7))).sum()
Out[18]:
4.7296090400791702

Code 11.13

In [19]:
action = shared(trolley_df.action.values)
intention = shared(trolley_df.intention.values)
contact = shared(trolley_df.contact.values)

with pm.Model() as m11_2:
    a = pm.Normal(
        'a', 0., 10.,
        transform=Ordered(),
        shape=6,
        testval=trace_11_1['a'].mean(axis=0)
    )
    
    bA = pm.Normal('bA', 0., 10.)
    bI = pm.Normal('bI', 0., 10.)
    bC = pm.Normal('bC', 0., 10.)
    phi = bA * action + bI * intention + bC * contact

    pa = pm.math.sigmoid(
        tt.shape_padleft(a) - tt.shape_padright(phi)
    )
    p_cum = tt.concatenate([
        tt.zeros_like(tt.shape_padright(pa[:, 0])),
        pa,
        tt.ones_like(tt.shape_padright(pa[:, 0]))
    ], axis=1)
    p = p_cum[:, 1:] - p_cum[:, :-1]
    
    resp_obs = pm.Categorical(
        'resp_obs', p,
        observed=trolley_df.response - 1
    )
In [20]:
with m11_2:
    map_11_2 = pm.find_MAP()
logp = -18,565, ||grad|| = 3.7472: 100%|██████████| 17/17 [00:00<00:00, 58.64it/s]  

Code 11.14

In [21]:
with pm.Model() as m11_3:
    a = pm.Normal(
        'a', 0., 10.,
        transform=Ordered(),
        shape=6,
        testval=trace_11_1['a'].mean(axis=0)
    )
    
    bA = pm.Normal('bA', 0., 10.)
    bI = pm.Normal('bI', 0., 10.)
    bC = pm.Normal('bC', 0., 10.)
    bAI = pm.Normal('bAI', 0., 10.)
    bCI = pm.Normal('bCI', 0., 10.)
    phi = phi = bA * action + bI * intention + bC * contact \
            + bAI * action * intention \
            + bCI * contact * intention

    pa = pm.math.sigmoid(
        tt.shape_padleft(a) - tt.shape_padright(phi)
    )
    p_cum = tt.concatenate([
        tt.zeros_like(tt.shape_padright(pa[:, 0])),
        pa,
        tt.ones_like(tt.shape_padright(pa[:, 0]))
    ], axis=1)
    p = p_cum[:, 1:] - p_cum[:, :-1]
    
    resp_obs = pm.Categorical(
        'resp_obs', p,
        observed=trolley_df.response - 1
    )
In [22]:
with m11_3:
    map_11_3 = pm.find_MAP()
logp = -18,489, ||grad|| = 0.91902: 100%|██████████| 26/26 [00:00<00:00, 110.84it/s]  

Code 11.15

In [23]:
def get_coefs(map_est):
    coefs = OrderedDict()
    
    for i, ai in enumerate(map_est['a']):
        coefs['a_{}'.format(i)] = ai
        
    coefs['bA'] = map_est.get('bA', np.nan)
    coefs['bI'] = map_est.get('bI', np.nan)
    coefs['bC'] = map_est.get('bC', np.nan)
    coefs['bAI'] = map_est.get('bAI', np.nan)
    coefs['bCI'] = map_est.get('bCI', np.nan)
        
    return coefs
In [24]:
(pd.DataFrame.from_dict(
    OrderedDict([
        ('m11_1', get_coefs(map_11_1)),
        ('m11_2', get_coefs(map_11_2)),
        ('m11_3', get_coefs(map_11_3))
    ]))
   .astype(np.float64)
   .round(2))
Out[24]:
m11_1 m11_2 m11_3
a_0 -1.92 -2.84 -2.63
a_1 -1.27 -2.16 -1.94
a_2 -0.72 -1.57 -1.34
a_3 0.25 -0.55 -0.31
a_4 0.89 0.12 0.36
a_5 1.77 1.02 1.27
bA NaN -0.71 -0.47
bAI NaN NaN -0.45
bC NaN -0.96 -0.33
bCI NaN NaN -1.27
bI NaN -0.72 -0.28

Code 11.16

In [25]:
with m11_2:
    trace_11_2 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [03:19<00:00,  7.63it/s]
In [26]:
with m11_3:
    trace_11_3 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [04:16<00:00,  7.37it/s]
In [27]:
comp_df = pm.compare([trace_11_1, trace_11_2, trace_11_3],
                     [m11_1, m11_2, m11_3])

comp_df.loc[:,'model'] = pd.Series(['m11.1', 'm11.2', 'm11.3'])
comp_df = comp_df.set_index('model')
comp_df
Out[27]:
WAIC pWAIC dWAIC weight SE dSE warning
model
m11.3 36929 10.88 0 0.95 81.38 0 0
m11.2 37090.3 9.16 161.28 0 76.42 25.88 0
m11.1 37854.4 5.94 925.44 0.05 57.69 62.95 0

Code 11.17-19

In [28]:
pp_df = pd.DataFrame(
    np.array([[0, 0, 0],
              [0, 0, 1],
              [1, 0, 0],
              [1, 0, 1],
              [0, 1, 0],
              [0, 1, 1]]),
    columns=['action', 'contact', 'intention'])
In [29]:
pp_df
Out[29]:
action contact intention
0 0 0 0
1 0 0 1
2 1 0 0
3 1 0 1
4 0 1 0
5 0 1 1
In [30]:
action.set_value(pp_df.action.values)
contact.set_value(pp_df.contact.values)
intention.set_value(pp_df.intention.values)

with m11_3:
    pp_trace_11_3 = pm.sample_ppc(trace_11_3, samples=1500)
100%|██████████| 1500/1500 [00:17<00:00, 87.99it/s] 
In [31]:
PP_COLS = ['pp_{}'.format(i) for i, _ in enumerate(pp_trace_11_3['resp_obs'])]

pp_df = pd.concat((pp_df,
                   pd.DataFrame(pp_trace_11_3['resp_obs'].T, columns=PP_COLS)),
                  axis=1)
In [32]:
pp_cum_df = (pd.melt(
                    pp_df,
                    id_vars=['action', 'contact', 'intention'],
                    value_vars=PP_COLS, value_name='resp'
                )
               .groupby(['action', 'contact', 'intention', 'resp'])
               .size()
               .div(1500)
               .rename('proba')
               .reset_index()
               .pivot_table(
                   index=['action', 'contact', 'intention'],
                   values='proba',
                   columns='resp'
                )
               .cumsum(axis=1)
               .iloc[:, :-1])
In [33]:
pp_cum_df
Out[33]:
resp 0 1 2 3 4 5
action contact intention
0 0 0 0.069333 0.128000 0.208000 0.418667 0.568667 0.757333
1 0.076667 0.145333 0.238667 0.470000 0.648000 0.818667
1 0 0.089333 0.171333 0.266667 0.514667 0.687333 0.838000
1 0.330000 0.484667 0.642000 0.834667 0.906667 0.956667
1 0 0 0.109333 0.191333 0.286667 0.544000 0.705333 0.861333
1 0.179333 0.308000 0.466667 0.697333 0.822000 0.916667
In [34]:
for (plot_action, plot_contact), plot_df in pp_cum_df.groupby(level=['action', 'contact']):
    fig, ax = plt.subplots(figsize=(8, 6))
    
    ax.plot([0, 1], plot_df, c='C0');
    ax.plot([0, 1], [0, 0], '--', c='C0');
    ax.plot([0, 1], [1, 1], '--', c='C0');
    
    ax.set_xlim(0, 1);
    ax.set_xlabel("intention");
    
    ax.set_ylim(-0.05, 1.05);
    ax.set_ylabel("probability");
    
    ax.set_title(
        "action = {action}, contact = {contact}".format(
            action=plot_action, contact=plot_contact
        )
    );

Code 11.20

In [35]:
# define parameters
PROB_DRINK = 0.2 # 20% of days
RATE_WORK = 1. # average 1 manuscript per day

# sample one year of production
N = 365
In [36]:
drink = np.random.binomial(1, PROB_DRINK, size=N)
y = (1 - drink) * np.random.poisson(RATE_WORK, size=N)

Code 11.21

In [37]:
drink_zeros = drink.sum()
work_zeros = (y == 0).sum() - drink_zeros
In [38]:
bins = np.arange(y.max() + 1) - 0.5

plt.hist(y, bins=bins);
plt.bar(0., drink_zeros, width=1., bottom=work_zeros, color='C1', alpha=.5);

plt.xticks(bins + 0.5);
plt.xlabel("manuscripts completed");

plt.ylabel("Frequency");

Code 11.22

In [39]:
with pm.Model() as m11_4:
    ap = pm.Normal('ap', 0., 1.)
    p = pm.math.sigmoid(ap)
    
    al = pm.Normal('al', 0., 10.)
    lambda_ = tt.exp(al)
    
    y_obs = pm.ZeroInflatedPoisson('y_obs', 1. - p, lambda_, observed=y)
In [40]:
with m11_4:
    map_11_4 = pm.find_MAP()
logp = -462.79, ||grad|| = 0.00047532: 100%|██████████| 12/12 [00:00<00:00, 211.49it/s]s]
In [41]:
map_11_4
Out[41]:
{'al': array(0.05019473453063987), 'ap': array(-1.426975323508402)}

Code 11.23

In [42]:
sp.special.expit(map_11_4['ap']) # probability drink
Out[42]:
0.19357040138820736
In [43]:
np.exp(map_11_4['al']) # rate finish manuscripts, when not drinking
Out[43]:
1.0514758350937541

Code 11.24

In [44]:
def dzip(x, p, lambda_, log=True):
    like = p**(x == 0) + (1 - p) * sp.stats.poisson.pmf(x, lambda_)
    
    return np.log(like) if log else like

Code 11.25

In [45]:
PBAR = 0.5
THETA = 5.
In [46]:
a = PBAR * THETA
b = (1 - PBAR) * THETA
In [47]:
p = np.linspace(0, 1, 100)

plt.plot(p, sp.stats.beta.pdf(p, a, b));

plt.xlim(0, 1);
plt.xlabel("probability");

plt.ylabel("Density");

Code 11.26

In [48]:
admit_df = pd.read_csv('Data/UCBadmit.csv', sep=';')
admit_df.head()
Out[48]:
dept applicant.gender admit reject applications
1 A male 512 313 825
2 A female 89 19 108
3 B male 353 207 560
4 B female 17 8 25
5 C male 120 205 325
In [49]:
with pm.Model() as m11_5:
    a = pm.Normal('a', 0., 2.)
    pbar = pm.Deterministic('pbar', pm.math.sigmoid(a))

    theta = pm.Exponential('theta', 1.)
    
    admit_obs = pm.BetaBinomial(
        'admit_obs',
        pbar * theta, (1. - pbar) * theta,
        admit_df.applications.values,
        observed=admit_df.admit.values
    )
In [50]:
with m11_5:
    trace_11_5 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [00:02<00:00, 718.29it/s]

Code 11.27

In [51]:
pm.summary(trace_11_5, alpha=.11).round(2)
Out[51]:
mean sd mc_error hpd_5.5 hpd_94.5 n_eff Rhat
a -0.37 0.31 0.01 -0.86 0.12 1860.0 1.0
pbar 0.41 0.07 0.00 0.30 0.53 1863.0 1.0
theta 2.78 0.99 0.02 1.25 4.22 1641.0 1.0

Code 11.28

In [52]:
np.percentile(trace_11_5['pbar'], [2.5, 50., 97.5])
Out[52]:
array([ 0.28260569,  0.40670155,  0.56068546])

Code 11.29

In [53]:
pbar_hat = trace_11_5['pbar'].mean()
theta_hat = trace_11_5['theta'].mean()

p_plot = np.linspace(0, 1, 100)

plt.plot(
    p_plot,
    sp.stats.beta.pdf(p_plot, pbar_hat * theta_hat, (1. - pbar_hat) * theta_hat)
);
plt.plot(
    p_plot,
    sp.stats.beta.pdf(
        p_plot[:, np.newaxis],
        trace_11_5['pbar'][:100] * trace_11_5['theta'][:100],
        (1. - trace_11_5['pbar'][:100]) * trace_11_5['theta'][:100]
    ),
    c='C0', alpha=0.1
);

plt.xlim(0., 1.);
plt.xlabel("probability admit");

plt.ylim(0., 3.);
plt.ylabel("Density");

Code 11.30

In [54]:
with m11_5:
    pp_trace_11_5 = pm.sample_ppc(trace_11_5)
100%|██████████| 1000/1000 [00:02<00:00, 355.73it/s]
In [55]:
x_case = np.arange(admit_df.shape[0])

plt.scatter(
    x_case,
    pp_trace_11_5['admit_obs'].mean(axis=0) \
        / admit_df.applications.values
);
plt.scatter(x_case, admit_df.admit / admit_df.applications);

high = np.percentile(pp_trace_11_5['admit_obs'], 95, axis=0) \
        / admit_df.applications.values
plt.scatter(x_case, high, marker='x', c='k');

low = np.percentile(pp_trace_11_5['admit_obs'], 5, axis=0) \
        / admit_df.applications.values
plt.scatter(x_case, low, marker='x', c='k');

Code 11.31

In [56]:
mu = 3.
theta = 1.

x = np.linspace(0, 10, 100)
plt.plot(x, sp.stats.gamma.pdf(x, mu / theta, scale=theta));
In [57]:
import sys, IPython, scipy, matplotlib, platform
print("This notebook was createad on a computer %s running %s and using:\nPython %s\nIPython %s\nPyMC3 %s\nNumPy %s\nPandas %s\nSciPy %s\nMatplotlib %s\n" % (platform.machine(), ' '.join(platform.linux_distribution()[:2]), sys.version[:5], IPython.__version__, pm.__version__, np.__version__, pd.__version__, scipy.__version__, matplotlib.__version__))
This notebook was createad on a computer x86_64 running debian stretch/sid and using:
Python 3.6.2
IPython 6.1.0
PyMC3 3.2
NumPy 1.13.3
Pandas 0.20.3
SciPy 0.19.1
Matplotlib 2.1.0