import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc as pm
import seaborn as sns
#import warnings
#warnings.filterwarnings("ignore", category=FutureWarning)
from IPython.display import Image
from matplotlib import gridspec
plt.style.use('seaborn-white')
color = '#87ceeb'
%load_ext watermark
%watermark -p arviz,matplotlib,numpy,pandas,pymc,seaborn
arviz : 0.14.0 matplotlib: 3.5.1 numpy : 1.23.1 pandas : 1.4.3 pymc : 5.0.0 seaborn : 0.12.2
# data from coin #1
n_heads1 = 3
n_tails1 = 12
n_flips1 = n_heads1 + n_tails1
data1 = np.repeat([1, 0], [n_heads1, n_tails1])
# coin #2
n_heads2 = 4
n_tails2 = 1
n_flips2 = n_heads2 + n_tails2
data2 = np.repeat([1, 0], [n_heads2, n_tails2])
# now pack all those outcomes into a single vector
outcomes = np.hstack((data1, data2))
# and create a vector of 'coin IDs' so that we can connect each flip
# with the corresponding coin's value of theta
coin = np.hstack((np.zeros_like(data1), np.ones_like(data2)))
outcomes
array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0])
# build the model
with pm.Model() as oneminttwocoins_model:
omega = pm.Beta("omega", 2.0, 2.0)
# Kruschke uses these parameters in the chapter, but they yield very poor sampling
# kappa_minus2 = pm.Gamma('kappa_minus2', 0.01, 0.01)
kappa_minus2 = pm.Gamma("kappa_minus2", 0.5, 0.5)
kappa = pm.Deterministic("kappa", kappa_minus2 + 2)
theta = pm.Beta(
"theta",
alpha=omega * (kappa - 2) + 1,
beta=(1 - omega) * (kappa - 2) + 1,
shape=2,
)
y = pm.Bernoulli("y", theta[coin], observed=outcomes)
pm.model_to_graphviz(oneminttwocoins_model)
# sample!
with oneminttwocoins_model:
idata_1m2c = pm.sample(5000, target_accept=0.9)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [omega, kappa_minus2, theta] Sampling 4 chains for 1_000 tune and 5_000 draw iterations (4_000 + 20_000 draws total) took 6 seconds.
az.plot_trace(idata_1m2c, var_names=["~kappa_minus2"])
plt.tight_layout();
az.summary(idata_1m2c)
mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
---|---|---|---|---|---|---|---|---|---|
omega | 0.489 | 0.213 | 0.105 | 0.874 | 0.002 | 0.001 | 14235.0 | 10575.0 | 1.0 |
kappa_minus2 | 0.875 | 1.204 | 0.000 | 3.003 | 0.010 | 0.007 | 8910.0 | 6626.0 | 1.0 |
theta[0] | 0.245 | 0.101 | 0.065 | 0.431 | 0.001 | 0.001 | 13963.0 | 9605.0 | 1.0 |
theta[1] | 0.694 | 0.159 | 0.413 | 0.975 | 0.001 | 0.001 | 13944.0 | 9962.0 | 1.0 |
kappa | 2.875 | 1.204 | 2.000 | 5.003 | 0.010 | 0.007 | 8910.0 | 6626.0 | 1.0 |
idata_1m2c.posterior['omega'].sel(draw=0)
<xarray.DataArray 'omega' (chain: 4)> array([0.94458231, 0.59855322, 0.7121015 , 0.60482133]) Coordinates: * chain (chain) int64 0 1 2 3 draw int64 0
Given that this is our first legitimate MCMC trace object, let's see how one can interogate the raw trace to extract information we might want.
idata_1m2c
<xarray.Dataset> Dimensions: (chain: 4, draw: 5000, theta_dim_0: 2) Coordinates: * chain (chain) int64 0 1 2 3 * draw (draw) int64 0 1 2 3 4 5 6 ... 4994 4995 4996 4997 4998 4999 * theta_dim_0 (theta_dim_0) int64 0 1 Data variables: omega (chain, draw) float64 0.9446 0.03773 0.782 ... 0.8624 0.8764 kappa_minus2 (chain, draw) float64 0.1657 0.3463 0.4307 ... 1.194 0.1706 theta (chain, draw, theta_dim_0) float64 0.08418 0.7549 ... 0.7013 kappa (chain, draw) float64 2.166 2.346 2.431 ... 3.145 3.194 2.171 Attributes: created_at: 2023-03-04T16:14:45.659138 arviz_version: 0.14.0 inference_library: pymc inference_library_version: 5.0.0 sampling_time: 6.138545513153076 tuning_steps: 1000
<xarray.Dataset> Dimensions: (chain: 4, draw: 5000) Coordinates: * chain (chain) int64 0 1 2 3 * draw (draw) int64 0 1 2 3 4 5 ... 4995 4996 4997 4998 4999 Data variables: (12/17) perf_counter_diff (chain, draw) float64 0.0003155 ... 0.0007621 process_time_diff (chain, draw) float64 0.0003156 ... 0.0007625 index_in_trajectory (chain, draw) int64 2 -4 6 6 4 0 ... 10 -5 9 -3 -2 2 tree_depth (chain, draw) int64 2 3 3 3 3 2 3 3 ... 3 4 4 4 4 4 3 lp (chain, draw) float64 -21.4 -20.69 ... -17.56 -18.2 diverging (chain, draw) bool False False False ... False False ... ... max_energy_error (chain, draw) float64 -0.1459 -0.2902 ... 0.293 energy (chain, draw) float64 23.61 24.48 ... 19.03 19.11 acceptance_rate (chain, draw) float64 1.0 0.9896 ... 0.9346 0.8853 largest_eigval (chain, draw) float64 nan nan nan nan ... nan nan nan n_steps (chain, draw) float64 3.0 7.0 7.0 ... 15.0 15.0 7.0 step_size (chain, draw) float64 0.5166 0.5166 ... 0.3903 0.3903 Attributes: created_at: 2023-03-04T16:14:45.675453 arviz_version: 0.14.0 inference_library: pymc inference_library_version: 5.0.0 sampling_time: 6.138545513153076 tuning_steps: 1000
<xarray.Dataset> Dimensions: (y_dim_0: 20) Coordinates: * y_dim_0 (y_dim_0) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Data variables: y (y_dim_0) int64 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 Attributes: created_at: 2023-03-04T16:14:45.677605 arviz_version: 0.14.0 inference_library: pymc inference_library_version: 5.0.0
# let's look at the first MCMC sample
idata_1m2c.posterior.sel(draw=0, chain=0)
<xarray.Dataset> Dimensions: (theta_dim_0: 2) Coordinates: chain int64 0 draw int64 0 * theta_dim_0 (theta_dim_0) int64 0 1 Data variables: omega float64 0.9446 kappa_minus2 float64 0.1657 theta (theta_dim_0) float64 0.08418 0.7549 kappa float64 2.166 Attributes: created_at: 2023-03-04T16:14:45.659138 arviz_version: 0.14.0 inference_library: pymc inference_library_version: 5.0.0 sampling_time: 6.138545513153076 tuning_steps: 1000
Note that there are 2 values of theta in this first step (one for each coin). They are stored together, because we asked for an array of theta parameters. Let's calculate the mean value of the $\theta$ parameter associated with the first coin.
idata_1m2c.posterior['theta'].mean(dim=['draw','chain'])[0]
<xarray.DataArray 'theta' ()> array(0.24490743) Coordinates: theta_dim_0 int64 0
And for the second coin.
idata_1m2c.posterior['theta'].mean(dim=['draw','chain'])[1]
<xarray.DataArray 'theta' ()> array(0.69440541) Coordinates: theta_dim_0 int64 1
What if we have 'reference' values against which we wish to compare our parameter estimate? We can, among other things, ask what proportion of samples took on a value greater than/less than that reference value. In the case of coins, which might wish to compare $\theta$ to a value of 0.5 (i.e., to ask how likely it is that each coin is 'fair').
# coin 1
(idata_1m2c.posterior['theta']>.5).mean(dim=['draw','chain'])[0]
<xarray.DataArray 'theta' ()> array(0.0132) Coordinates: theta_dim_0 int64 0
# coin 2
(idata_1m2c.posterior['theta']>.5).mean(dim=['draw','chain'])[1]
<xarray.DataArray 'theta' ()> array(0.8729) Coordinates: theta_dim_0 int64 1
Now let's ask about our mint parameter, $\omega$. Here too we calculate the mean posterior estimate.
idata_1m2c.posterior['omega'].mean(dim=['draw','chain'])
<xarray.DataArray 'omega' ()> array(0.48896697)
We can also again compare $\omega$ against our 'reference' value of 0.5. The interpretation of $\omega$ is somewhat different than the interpretation of $\theta$. Asking about $p(\omega > 0.5)$ tells us whether the mint tends to generate fair coins.
(idata_1m2c.posterior['omega']>.5).mean(dim=['draw','chain'])
<xarray.DataArray 'omega' ()> array(0.482)
We can also inspect $\kappa$, which will tell us how consistent the mint is in generating coins with values of $\theta$ near $\omega$.
idata_1m2c.posterior['kappa'].mean(dim=['draw','chain'])
<xarray.DataArray 'kappa' ()> array(2.8750322)
df = pd.read_csv("data/TherapeuticTouchData.csv", dtype={"s": "category"})
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 280 entries, 0 to 279 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 y 280 non-null int64 1 s 280 non-null category dtypes: category(1), int64(1) memory usage: 2.8 KB
df.head()
y | s | |
---|---|---|
0 | 1 | S01 |
1 | 0 | S01 |
2 | 0 | S01 |
3 | 0 | S01 |
4 | 0 | S01 |
df_proportions = df.groupby("s")["y"].apply(lambda x: x.sum() / len(x))
ax = sns.displot(df_proportions, bins=8, color="gray")
ax.set(xlabel="Proportion Correct", ylabel="# Practitioners");
Image('images/fig9_7.png', width=200)
# create a factor for practitioner ID
practitioner_idx, practitioner_codes = pd.factorize(df["s"])
n_practitioners = df["s"].nunique()
with pm.Model() as therapeutictouch_model:
omega = pm.Beta("omega", 1.0, 1.0)
kappa_minus2 = pm.Gamma("kappa_minus2", 0.05, 0.05)
kappa = pm.Deterministic("kappa", kappa_minus2 + 2)
theta = pm.Beta(
"theta",
alpha=omega * (kappa - 2) + 1,
beta=(1 - omega) * (kappa - 2) + 1,
shape=n_practitioners,
)
y = pm.Bernoulli("y", theta[practitioner_idx], observed=df["y"])
pm.model_to_graphviz(therapeutictouch_model)
with therapeutictouch_model:
idata_tt = pm.sample(5000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [omega, kappa_minus2, theta] Sampling 4 chains for 1_000 tune and 5_000 draw iterations (4_000 + 20_000 draws total) took 7 seconds.
az.plot_trace(idata_tt, var_names=['omega','kappa', 'theta'])
plt.tight_layout();
az.summary(idata_tt)
mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
---|---|---|---|---|---|---|---|---|---|
omega | 0.432 | 0.043 | 0.350 | 0.511 | 0.000 | 0.000 | 9305.0 | 12376.0 | 1.0 |
kappa_minus2 | 21.880 | 15.419 | 2.229 | 49.612 | 0.289 | 0.213 | 3241.0 | 4697.0 | 1.0 |
theta[0] | 0.323 | 0.096 | 0.137 | 0.495 | 0.001 | 0.001 | 9949.0 | 11267.0 | 1.0 |
theta[1] | 0.358 | 0.094 | 0.178 | 0.530 | 0.001 | 0.001 | 14217.0 | 12034.0 | 1.0 |
theta[2] | 0.392 | 0.093 | 0.213 | 0.562 | 0.001 | 0.000 | 17563.0 | 12283.0 | 1.0 |
theta[3] | 0.393 | 0.093 | 0.219 | 0.568 | 0.001 | 0.001 | 16857.0 | 12282.0 | 1.0 |
theta[4] | 0.392 | 0.094 | 0.219 | 0.569 | 0.001 | 0.000 | 19531.0 | 13241.0 | 1.0 |
theta[5] | 0.393 | 0.094 | 0.214 | 0.567 | 0.001 | 0.000 | 19346.0 | 12920.0 | 1.0 |
theta[6] | 0.392 | 0.093 | 0.217 | 0.567 | 0.001 | 0.000 | 18621.0 | 12593.0 | 1.0 |
theta[7] | 0.392 | 0.093 | 0.213 | 0.562 | 0.001 | 0.001 | 16700.0 | 12687.0 | 1.0 |
theta[8] | 0.392 | 0.094 | 0.213 | 0.566 | 0.001 | 0.000 | 18461.0 | 13237.0 | 1.0 |
theta[9] | 0.392 | 0.094 | 0.216 | 0.568 | 0.001 | 0.000 | 17684.0 | 12168.0 | 1.0 |
theta[10] | 0.426 | 0.092 | 0.252 | 0.602 | 0.001 | 0.000 | 18931.0 | 12931.0 | 1.0 |
theta[11] | 0.426 | 0.093 | 0.250 | 0.602 | 0.001 | 0.000 | 21075.0 | 13544.0 | 1.0 |
theta[12] | 0.426 | 0.093 | 0.252 | 0.604 | 0.001 | 0.000 | 21353.0 | 14090.0 | 1.0 |
theta[13] | 0.426 | 0.093 | 0.245 | 0.595 | 0.001 | 0.000 | 19969.0 | 13935.0 | 1.0 |
theta[14] | 0.426 | 0.092 | 0.249 | 0.596 | 0.001 | 0.000 | 23769.0 | 12982.0 | 1.0 |
theta[15] | 0.462 | 0.094 | 0.282 | 0.634 | 0.001 | 0.000 | 18895.0 | 14055.0 | 1.0 |
theta[16] | 0.461 | 0.093 | 0.289 | 0.642 | 0.001 | 0.000 | 19901.0 | 13972.0 | 1.0 |
theta[17] | 0.462 | 0.094 | 0.281 | 0.636 | 0.001 | 0.000 | 21700.0 | 14377.0 | 1.0 |
theta[18] | 0.460 | 0.094 | 0.282 | 0.635 | 0.001 | 0.000 | 21224.0 | 14588.0 | 1.0 |
theta[19] | 0.461 | 0.094 | 0.286 | 0.645 | 0.001 | 0.000 | 20032.0 | 13546.0 | 1.0 |
theta[20] | 0.461 | 0.095 | 0.278 | 0.633 | 0.001 | 0.000 | 22139.0 | 13607.0 | 1.0 |
theta[21] | 0.461 | 0.094 | 0.289 | 0.644 | 0.001 | 0.000 | 21210.0 | 14212.0 | 1.0 |
theta[22] | 0.495 | 0.096 | 0.318 | 0.677 | 0.001 | 0.001 | 17342.0 | 13390.0 | 1.0 |
theta[23] | 0.495 | 0.096 | 0.319 | 0.679 | 0.001 | 0.001 | 18370.0 | 13357.0 | 1.0 |
theta[24] | 0.530 | 0.099 | 0.347 | 0.717 | 0.001 | 0.001 | 15174.0 | 13050.0 | 1.0 |
theta[25] | 0.530 | 0.099 | 0.342 | 0.711 | 0.001 | 0.001 | 13885.0 | 12395.0 | 1.0 |
theta[26] | 0.530 | 0.098 | 0.350 | 0.717 | 0.001 | 0.001 | 13383.0 | 11788.0 | 1.0 |
theta[27] | 0.564 | 0.103 | 0.372 | 0.759 | 0.001 | 0.001 | 11089.0 | 11820.0 | 1.0 |
kappa | 23.880 | 15.419 | 4.229 | 51.612 | 0.289 | 0.213 | 3241.0 | 4697.0 | 1.0 |
idata_tt.posterior["theta"].mean(dim=["chain", "draw"]).sel(theta_dim_0=0)
<xarray.DataArray 'theta' ()> array(0.32320841) Coordinates: theta_dim_0 int64 0
plt.figure(figsize=(10, 12))
# Define gridspec
gs = gridspec.GridSpec(4, 6)
ax1 = plt.subplot(gs[0, :3])
ax2 = plt.subplot(gs[0, 3:])
ax3 = plt.subplot(gs[1, :2])
ax4 = plt.subplot(gs[1, 2:4])
ax5 = plt.subplot(gs[1, 4:6])
ax6 = plt.subplot(gs[2, :2])
ax7 = plt.subplot(gs[2, 2:4])
ax8 = plt.subplot(gs[2, 4:6])
ax9 = plt.subplot(gs[3, :2])
ax10 = plt.subplot(gs[3, 2:4])
ax11 = plt.subplot(gs[3, 4:6])
# thetas and theta pairs to plot
thetas = (0, 13, 27)
theta_pairs = ((0, 13), (0, 27), (13, 27))
font_d = {"size": 14}
# kappa & omega posterior plots
for var, ax in zip(["kappa", "omega"], [ax1, ax2]):
az.plot_posterior(
idata_tt.posterior[var], point_estimate="mode", ax=ax, color=color, round_to=2
)
ax.set_xlabel("$\{}$".format(var), fontdict={"size": 20, "weight": "bold"})
ax1.set(xlim=(0, 500))
# theta posterior plots
for var, ax in zip(thetas, [ax3, ax7, ax11]):
az.plot_posterior(
idata_tt.posterior["theta"].sel(theta_dim_0=var),
point_estimate="mode",
ax=ax,
color=color,
)
ax.set_xlabel("theta[{}]".format(var), fontdict=font_d)
# theta scatter plots
for var, ax in zip(theta_pairs, [ax6, ax9, ax10]):
ax.scatter(
idata_tt.posterior["theta"].sel(theta_dim_0=var[0]),
idata_tt.posterior["theta"].sel(theta_dim_0=var[1]),
alpha=0.02,
color=color,
facecolor="none",
)
ax.plot([0, 1], [0, 1], ":k", transform=ax.transAxes, alpha=0.5)
ax.set_xlabel("theta[{}]".format(var[0]), fontdict=font_d)
ax.set_ylabel("theta[{}]".format(var[1]), fontdict=font_d)
ax.set(xlim=(0, 1), ylim=(0, 1), aspect="equal")
# theta posterior differences plots
for var, ax in zip(theta_pairs, [ax4, ax5, ax8]):
az.plot_posterior(
idata_tt.posterior["theta"].sel(theta_dim_0=var[0])
- idata_tt.posterior["theta"].sel(theta_dim_0=var[1]),
point_estimate="mode",
ax=ax,
color=color,
)
ax.set_xlabel("theta[{}] - theta[{}]".format(*var), fontdict=font_d)
plt.tight_layout()
Let's create a model with just the theta estimations per practitioner, without the influence of a higher level distribution. Then we can compare the theta values with the hierarchical model above.
with pm.Model() as unpooled_model:
theta = pm.Beta("theta", 1, 1, shape=n_practitioners)
y = pm.Bernoulli("y", theta[practitioner_idx], observed=df["y"])
pm.model_to_graphviz(unpooled_model)
with unpooled_model:
unpooled_idata = pm.sample(5000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [theta] Sampling 4 chains for 1_000 tune and 5_000 draw iterations (4_000 + 20_000 draws total) took 3 seconds.
Here we concatenate the trace results (thetas) from both models into a dataframe. Next we shape the data into a format that we can use with Seaborn's pointplot.
df_shrinkage = pd.concat(
[az.summary(unpooled_idata).iloc[:, 0], az.summary(idata_tt).iloc[2:-1, 0]], axis=1
).reset_index()
df_shrinkage.columns = ["theta", "unpooled", "hierarchical"]
df_shrinkage = pd.melt(
df_shrinkage, "theta", ["unpooled", "hierarchical"], var_name="Model"
)
df_shrinkage.head()
theta | Model | value | |
---|---|---|---|
0 | theta[0] | unpooled | 0.166 |
1 | theta[1] | unpooled | 0.250 |
2 | theta[2] | unpooled | 0.333 |
3 | theta[3] | unpooled | 0.333 |
4 | theta[4] | unpooled | 0.333 |
The below plot shows that the theta estimates on practitioner level are pulled towards the group mean of the hierarchical model.
plt.figure(figsize=(10, 9))
plt.scatter(
1,
az.summary(idata_tt).iloc[0, 0],
s=100,
c="r",
marker="x",
zorder=999,
label="Group mean",
)
sns.pointplot(x="Model", y="value", hue="theta", data=df_shrinkage)
<AxesSubplot:xlabel='Model', ylabel='value'>
df2 = pd.read_csv(
"data/BattingAverage.csv", usecols=[0, 1, 2, 3], dtype={"PriPos": "category"}
)
df2.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 948 entries, 0 to 947 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Player 948 non-null object 1 PriPos 948 non-null category 2 Hits 948 non-null int64 3 AtBats 948 non-null int64 dtypes: category(1), int64(2), object(1) memory usage: 23.6+ KB
The DataFrame contains records for 948 players in the 2012 regular season of Major League Baseball.
df2["BatAv"] = df2.Hits.divide(df2.AtBats)
df2.head(10)
Player | PriPos | Hits | AtBats | BatAv | |
---|---|---|---|---|---|
0 | Fernando Abad | Pitcher | 1 | 7 | 0.142857 |
1 | Bobby Abreu | Left Field | 53 | 219 | 0.242009 |
2 | Tony Abreu | 2nd Base | 18 | 70 | 0.257143 |
3 | Dustin Ackley | 2nd Base | 137 | 607 | 0.225700 |
4 | Matt Adams | 1st Base | 21 | 86 | 0.244186 |
5 | Nathan Adcock | Pitcher | 0 | 1 | 0.000000 |
6 | Jeremy Affeldt | Pitcher | 0 | 1 | 0.000000 |
7 | Brandon Allen | 1st Base | 2 | 20 | 0.100000 |
8 | Yonder Alonso | 1st Base | 150 | 549 | 0.273224 |
9 | Jose Altuve | 2nd Base | 167 | 576 | 0.289931 |
# batting average by primary field positions calculated from the data
df2.groupby("PriPos")[["Hits", "AtBats"]].sum().pipe(lambda x: x.Hits / x.AtBats)
PriPos 1st Base 0.258851 2nd Base 0.255676 3rd Base 0.265036 Catcher 0.247404 Center Field 0.263513 Left Field 0.259077 Pitcher 0.129148 Right Field 0.263555 Shortstop 0.255186 dtype: float64
Image('images/fig9_13.png', width=300)
pripos_idx, pripos_codes = pd.factorize(df2["PriPos"])
n_pripos = df2["PriPos"].nunique()
n_players = df2["Player"].nunique()
with pm.Model() as hierarchical_model2:
# hyper parameters
omega = pm.Beta("omega", 1, 1)
kappa_minus2 = pm.Gamma("kappa_minus2", 0.1, 0.1)
kappa = pm.Deterministic("kappa", kappa_minus2 + 2)
# parameters for categories (Primary field positions)
omega_c = pm.Beta(
"omega_c",
omega * (kappa - 2) + 1,
(1 - omega) * (kappa - 2) + 1,
shape=n_pripos,
)
kappa_c_minus2 = pm.Gamma("kappa_c_minus2", 0.01, 0.01, shape=n_pripos)
kappa_c = pm.Deterministic("kappa_c", kappa_c_minus2 + 2)
# parameter for individual players
theta = pm.Beta(
"theta",
omega_c[pripos_idx] * (kappa_c[pripos_idx] - 2) + 1,
(1 - omega_c[pripos_idx]) * (kappa_c[pripos_idx] - 2) + 1,
shape=n_players,
)
y2 = pm.Binomial("y2", n=df2.AtBats.values, p=theta, observed=df2["Hits"])
pm.model_to_graphviz(hierarchical_model2)
with hierarchical_model2:
idata2 = pm.sample(5000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [omega, kappa_minus2, omega_c, kappa_c_minus2, theta] Sampling 4 chains for 1_000 tune and 5_000 draw iterations (4_000 + 20_000 draws total) took 27 seconds.
az.plot_trace(idata2, var_names=["omega", "kappa", "omega_c", "kappa_c"])
plt.tight_layout()
az.plot_posterior(idata2.posterior["omega"], point_estimate="mode", color=color)
plt.title("Overall", fontdict={"fontsize": 16, "fontweight": "bold"})
plt.xlabel("omega", fontdict={"fontsize": 14});
fig, axes = plt.subplots(3, 3, figsize=(14, 8))
for i, ax in enumerate(axes.T.flatten()):
az.plot_posterior(
idata2.posterior["omega_c"].sel(omega_c_dim_0=i),
ax=ax,
point_estimate="mode",
color=color,
)
ax.set_title(pripos_codes[i], fontdict={"fontsize": 16, "fontweight": "bold"})
ax.set_xlabel("omega_c__{}".format(i), fontdict={"fontsize": 14})
ax.set_xlim(0.10, 0.30)
plt.tight_layout(h_pad=3);