#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')


# In[2]:


from collections import Counter
from tqdm import trange


# In[3]:


from matplotlib.ticker import StrMethodFormatter
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats


# In[4]:


sns.set()
pct_formatter = StrMethodFormatter('{x:.1%}')


# We'll use [approximate Bayesian computation](https://en.wikipedia.org/wiki/Approximate_Bayesian_computation) (ABC) to solve the [Game of Ur](https://www.allendowney.com/blog/2018/10/21/the-game-of-ur-problem/) problem that Allen Downey [posed on Twitter](https://twitter.com/AllenDowney/status/1054053993566081026) recently.
# 
# Our observation is that the piece is on the thirteenth square, and we want to infer the posterior distribution of the number of turns it took to get there.

# In[5]:


OBS = 13


# As with all Bayesian inference, ABC requires both a prior distribution on the number of rolls and a data generation process (likelihood).  Since it must take at least four rolls to reach spot 13, we place a $U(4, 40)$ prior on the number of rolls.  Note that while the number of rolls needed to reach spot 13 is theoretically unbounded, it would be extremely unlucky not to pass spot 13 within 40 rolls, so we use this truncated uniform prior for computational simplicity.

# In[6]:


min_rolls = int(np.ceil(OBS / 4))
prior = stats.randint(min_rolls, 10 * min_rolls)


# Each turn results in moving a number of spaces drawn from a $\textrm{Bin}\left(4, \frac{1}{2}\right)$ distribution.

# In[7]:


dice = stats.binom(4, 0.5)

def simulate_spot(rolls, dice):
    return dice.rvs(size=rolls).sum()


# The simplest version of ABC generates posterior samples by the following algorithm.
# 
# 1. Generate a sample from the prior.
# 2. Simulate the data generating process given the prior sample.
# 3. If the simulated data matches the observed data, save the sample.  If not, repeat the above process.
# 
# The following function implements this algorithm.

# In[8]:


def generate_abc_sample(obs, prior, dice):
    while True:
        rolls = prior.rvs()
        spot = simulate_spot(rolls, dice)
        
        if spot == obs:
            return rolls


# We now generate 5,000 samples from the posterior using ABC.

# In[9]:


N_SAMPLES = 5000


# In[10]:


post_sample_counts = Counter(generate_abc_sample(OBS, prior, dice) for _ in trange(N_SAMPLES))


# In[11]:


post_df = (pd.DataFrame.from_dict(post_sample_counts, orient='index')
             .rename(columns={0: 'samples'})
             .sort_index())


# In[12]:


post_df.head()


# Below we visualize the posterior distribution of these samples.  The fact that the largest observed sample is much smaller than the point at which we truncated our prior (40) is circumstantial evidence that truncation has not had too large an effect.

# In[13]:


ax = (post_df.div(N_SAMPLES)
             .plot(kind='bar', legend=False, rot=0))

ax.set_xlabel("Number of rolls");

ax.yaxis.set_major_formatter(pct_formatter);
ax.set_ylabel("Posterior probability");