#!/usr/bin/env python
# coding: utf-8

# # Bayesian Zig Zag
# 
# Developing probabilistic models using grid methods and MCMC.
# 
# Thanks to Chris Fonnesback for his help with this notebook, and to Colin Carroll, who added features to pymc3 to support some of these examples.
# 
# Copyright 2018 Allen Downey
# 
# MIT License: https://opensource.org/licenses/MIT

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
get_ipython().run_line_magic('config', "InteractiveShell.ast_node_interactivity='last_expr_or_assign'")

import numpy as np
import pymc3 as pm

import matplotlib.pyplot as plt


# ## Simulating hockey
# 
# I'll model hockey as a Poisson process, where each team has some long-term average scoring rate, `lambda`, in goals per game.
# 
# For the first example, I'll assume that `lambda` is somehow known to be 2.7.  Since regulation play (as opposed to overtime) is 60 minutes, we can compute the goal scoring rate per minute.

# In[2]:


lam_per_game = 2.7
min_per_game = 60
lam_per_min = lam_per_game / min_per_game
lam_per_min, lam_per_min**2


# If we assume that a goal is equally likely during any minute of the game, and we ignore the possibility of scoring more than one goal in the same minute, we can simulate a game by generating one random value each minute.

# In[3]:


def simulate_game(p, n=60):
    goals = np.random.choice([0, 1], n, p=[1-p, p])
    return np.sum(goals)


# And simulate 10 games.

# In[4]:


size = 10
sample = [simulate_game(lam_per_min) for i in range(size)]


# If we simulate 1000 games, we can see what the distribution looks like.  The average of this sample should be close to `lam_per_game`.

# In[5]:


size = 1000
sample_sim = [simulate_game(lam_per_min) for i in range(size)]
np.mean(sample_sim), lam_per_game


# ## PMFs
# 
# To visualize distributions, I'll start with a probability mass function (PMF), which I'll implement using a `Counter`.
# 
# 

# In[6]:


from collections import Counter

class Pmf(Counter):
    
    def normalize(self):
        """Normalizes the PMF so the probabilities add to 1."""
        total = sum(self.values())
        for key in self:
            self[key] /= total
            
    def sorted_items(self):
        """Returns the outcomes and their probabilities."""
        return zip(*sorted(self.items()))


# Here are some functions for plotting PMFs.

# In[7]:


plot_options = dict(linewidth=3, alpha=0.6)

def underride(options):
    """Add key-value pairs to d only if key is not in d.

    options: dictionary
    """

    for key, val in plot_options.items():
        options.setdefault(key, val)
    return options

def plot(xs, ys, **options):
    """Line plot with plot_options."""
    plt.plot(xs, ys, **underride(options))

def bar(xs, ys, **options):
    """Bar plot with plot_options."""
    plt.bar(xs, ys, **underride(options))

def plot_pmf(sample, **options):
    """Compute and plot a PMF."""
    pmf = Pmf(sample)
    pmf.normalize()
    xs, ps = pmf.sorted_items()
    bar(xs, ps, **options)
    
def decorate_pmf_goals():
    """Decorate the axes."""
    plt.xlabel('Number of goals')
    plt.ylabel('PMF')
    plt.title('Distribution of goals scored')
    legend()
    
def legend(**options):
    """Draw a legend only if there are labeled items.
    """
    ax = plt.gca()
    handles, labels = ax.get_legend_handles_labels()
    if len(labels):
        plt.legend(**options)


# Here's what the results from the simulation look like.

# In[8]:


plot_pmf(sample_sim, label='simulation')
decorate_pmf_goals()

plt.savefig('zigzag1.png', dpi=150)


# ## Poisson process
# 
# For large values of `n`, the process we just simulated converges to the Poisson distribution with parameter `mu = n * p`, which is also `mu = lam_per_game`.
# 
# We can use NumPy to generate a sample from a Poisson distribution.

# In[9]:


n = 60
p = lam_per_min
mu = n * p
sample_poisson = np.random.poisson(mu, size)
np.mean(sample_poisson)


# And confirm that the results are similar to what we got from the model.

# In[10]:


plot_pmf(sample_sim, label='simulation')
plot_pmf(sample_poisson, label='Poisson')
decorate_pmf_goals()

plt.savefig('zigzag2.png', dpi=150)


# But plotting PMFs is a bad way to compare distributions.  It's better to use the cumulative distribution function (CDF).

# In[11]:


def plot_cdf(sample, **options):
    """Compute and plot the CDF of a sample."""
    pmf = Pmf(sample)
    xs, freqs = pmf.sorted_items()
    ps = np.cumsum(freqs, dtype=np.float)
    ps /= ps[-1]
    plot(xs, ps, **options)
    
def decorate_cdf_rates():
    """Decorate the axes."""
    plt.xlabel('Goal scoring rate (mu)')
    plt.ylabel('CDF')
    plt.title('Distribution of goal scoring rate')
    legend()

def decorate_cdf_goals():
    """Decorate the axes."""
    plt.xlabel('Number of goals')
    plt.ylabel('CDF')
    plt.title('Distribution of goals scored')
    legend()

def plot_cdfs(*sample_seq, **options):
    """Plot multiple CDFs."""
    for sample in sample_seq:
        plot_cdf(sample, **options)
    decorate_cdf_goals()


# Comparing CDFs makes it clearer that the results from the simulation are consistent with the Poisson model.

# In[12]:


plot_cdf(sample_sim, label='simulation')
plot_cdf(sample_poisson, label='Poisson')
decorate_cdf_goals()

plt.savefig('zigzag3.png', dpi=150)


# ## Warming up PyMC
# 
# Soon we will want to use `pymc3` to do inference, which is really what it's for.  But just to get warmed up, I will use it to generate a sample from a Poisson distribution.

# In[13]:


model = pm.Model()

with model:
    goals = pm.Poisson('goals', mu)
    trace = pm.sample_prior_predictive(1000)


# In[14]:


len(trace['goals'])


# In[15]:


sample_pm = trace['goals']
np.mean(sample_pm)


# This example is like using a cannon to kill a fly.  But it help us learn to use the cannon.

# In[16]:


plot_cdf(sample_sim, label='simulation')
plot_cdf(sample_pm, label='PyMC')
decorate_cdf_goals()


# ## Evaluating the Poisson distribution
# 
# One of the nice things about the Poisson distribution is that we can compute its PMF analytically, and SciPy provides an implementation.

# In[17]:


from scipy.stats import poisson

xs = np.arange(11)
ps = poisson.pmf(xs, mu)
bar(xs, ps, label='Poisson PMF')
decorate_pmf_goals()


# And here's a function that computes the probability of scoring a given number of goals in a game, for a known value of `mu`.

# In[18]:


def poisson_likelihood(goals, mu):
    """Probability of goals given scoring rate.
    
    goals: observed number of goals (scalar or sequence)
    mu: hypothetical goals per game
    
    returns: probability
    """
    return np.prod(poisson.pmf(goals, mu))


# Here's the probability of scoring 6 goals in a game if the long-term rate is 2.7 goals per game.

# In[19]:


poisson_likelihood(goals=6, mu=2.7)


# Here's the probability of scoring 3 goals.

# In[20]:


poisson_likelihood(goals=3, mu=2.7)


# This function also works with a sequence of goals, so we can compute the probability of scoring 6 goals in the first game and 3 in the second.

# In[21]:


poisson_likelihood(goals=[6, 3], mu=2.7)


# ## Bayesian inference with grid approximation
# 
# Ok, it's finally time to do some inference!  The function we just wrote computes the likelihood of the data, given a hypothetical value of `mu`:
# 
# $\mathrm{Prob}~(x ~|~ \mu)$
# 
# But what we really want is the distribution of `mu`, given the data:
# 
# $\mathrm{Prob}~(\mu ~|~ x)$
# 
# If only there were some theorem that relates these probabilities!
# 
# The following class implements Bayes's theorem.

# In[22]:


class Suite(Pmf):
    """Represents a set of hypotheses and their probabilities."""
    
    def bayes_update(self, data, like_func):
        """Perform a Bayesian update.
        
        data:      some representation of observed data
        like_func: likelihood function that takes (data, hypo), where
                   hypo is the hypothetical value of some parameter,
                   and returns P(data | hypo)
        """
        for hypo in self:
            self[hypo] *= like_func(data, hypo)
        self.normalize()
        
    def plot(self, **options):
        """Plot the hypotheses and their probabilities."""
        xs, ps = self.sorted_items()
        plot(xs, ps, **options)
        

def decorate_pdf_rate():
    """Decorate the axes."""
    plt.xlabel('Goals per game (mu)')
    plt.ylabel('PDF')
    plt.title('Distribution of goal scoring rate')
    legend()


# I'll start with a uniform prior just to keep things simple.  We'll choose a better prior later.

# In[23]:


hypo_mu = np.linspace(0, 15, num=51)
hypo_mu


# Initially `uniform_prior` represents the prior distribution of `mu`.

# In[24]:


uniform_prior = Suite(hypo_mu)
uniform_prior.normalize()
uniform_prior.plot(label='prior')
decorate_pdf_rate()

plt.savefig('zigzag4.png', dpi=150)


# Now we can update it with the data and plot the posterior.

# In[25]:


uniform_prior.bayes_update(data=3, like_func=poisson_likelihood)
uniform_prior.plot(label='posterior')
decorate_pdf_rate()

plt.savefig('zigzag5.png', dpi=150)


# With a uniform prior, the posterior is the likelihood function, and the MAP is the value of `mu` that maximizes likelihood, which is the observed number of goals, 6.
# 
# This result is probably not reasonable, because the prior was not reasonable.

# ## A better prior
# 
# To construct a better prior, I'll use a Gamma distribution with parameters chosen to be consistent with previous playoff games.

# We can use `make_gamma_dist` to construct a prior suite with the given parameters.

# In[26]:


from scipy.stats import gamma

def make_gamma_suite(xs, alpha, beta):
    """Makes a suite based on a gamma distribution.
    
    xs: places to evaluate the PDF
    alpha, beta: parameters of the distribution
    
    returns: Suite
    """
    ps = gamma(a=alpha, scale=1/beta).pdf(xs)
    prior = Suite(dict(zip(xs, ps)))
    prior.normalize()
    return prior


# Here's what it looks like.

# In[27]:


alpha = 9
beta = 3
hypo_mu = np.linspace(0, 15, num=101)

gamma_prior = make_gamma_suite(hypo_mu, alpha, beta)

gamma_prior.plot(label='gamma prior')
decorate_pdf_rate()

plt.savefig('zigzag6.png', dpi=150)


# And we can update this prior using the observed data.  If we only score 1 goal in a game, our estimate for the goal scoring rate shifts left.

# In[28]:


posterior = gamma_prior.copy()
posterior.bayes_update(data=1, like_func=poisson_likelihood)

gamma_prior.plot(label='prior')
posterior.plot(label='posterior')
decorate_pdf_rate()

plt.savefig('zigzag7.png', dpi=150)


# If we go back to the original prior, and score 6 goals in a game, the estimate shifts to the right.

# In[29]:


posterior = gamma_prior.copy()
posterior.bayes_update(data=6, like_func=poisson_likelihood)

gamma_prior.plot(label='prior')
posterior.plot(label='posterior')
decorate_pdf_rate()

plt.savefig('zigzag8.png', dpi=150)


# If we score 3 goals in a game, the estimate doesn't shift much, but it gets pointier, which means we are more certain about the goal scoring rate.

# In[30]:


posterior = gamma_prior.copy()
posterior.bayes_update(data=3, like_func=poisson_likelihood)

gamma_prior.plot(label='prior')
posterior.plot(label='posterior')
decorate_pdf_rate()

plt.savefig('zigzag8.png', dpi=150)


# ## Posterior predictive distribution
# 
# Now, let's get to what is usually the point of this whole exercise, making predictions.
# 
# The prior represents what we believe about the distribution of `mu` based on the data (and our prior beliefs).
# 
# Each value of `mu` is a possible goal scoring rate.
# 
# For a given value of `mu`, we can generate a distribution of goals scored in a particular game, which is Poisson.
# 
# But we don't have a given value of `mu`, we have a whole bunch of values for `mu`, with different probabilities.
# 
# So the posterior predictive distribution is a mixture of Poissons with different parameters.
# 
# The simplest way to generate the posterior predictive distribution is to
# 
# 1. Draw a random `mu` from the posterior distribution.
# 
# 2. Draw a random number of goals from `Poisson(mu)`.
# 
# 3. Repeat.
# 
# Here's a function that draws a sample from a posterior `Suite`:

# In[31]:


def sample_suite(suite, size):
    """Draw a random sample from a Suite
    
    suite: Suite object
    size: sample size
    """
    xs, ps = np.transpose(list(suite.items()))
    return np.random.choice(xs, size, replace=True, p=ps)


# Here's a sample of `mu` drawn from the posterior distribution (after one game).

# In[32]:


size = 10000
sample_post = sample_suite(posterior, size)
np.mean(sample_post)


# Now for each value of `mu` in the posterior sample we draw one sample from `Poisson(mu)`

# In[33]:


sample_post_pred = np.random.poisson(sample_post)
np.mean(sample_post_pred)


# The result is a sample from the posterior predictive distribution.  Here's what it looks like.

# In[34]:


plot_pmf(sample_post_pred, label='posterior predictive sample')
decorate_pmf_goals()

plt.savefig('zigzag9.png', dpi=150)


# The prior and posterior are distributions of mu, which is a continuous value.
# 
# The prior predictive and posterior predictive are distributions of goals, which are discrete.
# 
# To help keep them straight, I will plot distributions of mu as CDFs, and distributions of goals as PMFs.
# 

# ## Back to PyMC
# 
# Previously we used PyMC to draw a sample from a Poisson distribution with known `mu`.
# 
# Now we'll use it to draw a sample from a gamma distribution of `mu`, with known `alpha` and `beta`.
# 
# And then use that value of `mu` to draw a sample from a Poisson distribution.
# 
# Here are the values of the parameters:

# In[35]:


print(alpha, beta)


# Here's what the model looks like:

# In[36]:


model = pm.Model()

with model:
    mu = pm.Gamma('mu', alpha, beta)
    goals = pm.Poisson('goals', mu)
    trace = pm.sample_prior_predictive(1000)


# The distribution of `mu` from this model is the prior.  Let's see what it looks like:

# In[37]:


sample_prior_pm = trace['mu']
np.mean(sample_prior_pm)


# And compare it to a sample from the gamma prior.

# In[38]:


sample_prior = sample_suite(gamma_prior, 1000)
np.mean(sample_prior)


# In[39]:


plot_cdf(sample_prior, label='prior')
plot_cdf(sample_prior_pm, label='prior pymc')
decorate_cdf_rates()

plt.savefig('zigzag10.png', dpi=150)


# It looks pretty good.

# The distributions of `goals` from this model is the prior predictive.

# In[40]:


sample_prior_pred_pm = trace['goals']
np.mean(sample_prior_pred_pm)


# And let's compare it to a prior predictive distribution estimated by sampling.

# In[41]:


sample_prior_pred = np.random.poisson(sample_prior)
np.mean(sample_prior_pred)


# In[42]:


plot_cdf(sample_prior_pred, label='prior pred')
plot_cdf(sample_prior_pred_pm, label='prior pred pymc')
decorate_cdf_goals()

plt.savefig('zigzag11.png', dpi=150)


# Looks good.

# ## Now with PyMC
# 
# Finally, we are ready to use PyMC for actual inference.  We just have to make one small change.
# 
# Instead of generating `goals`, we'll mark goals as `observed` and provide the observed data, `3`.
# 
# And instead of called `sample_prior_predictive`, we'll call `sample`, which is understood to sample from the posterior distribution of `mu`.

# In[43]:


model = pm.Model()

with model:
    mu = pm.Gamma('mu', alpha, beta)
    goals = pm.Poisson('goals', mu, observed=3)
    trace = pm.sample(1000, tune=2000)


# With `goals` fixed, the only unknown is `mu`, so `trace` contains a sample drawn from the posterior distribution of `mu`.  We can plot the posterior using a function provided by PyMC:

# In[44]:


pm.plot_posterior(trace)
decorate_pdf_rate()


# And we can extract a sample from the posterior of `mu`

# In[45]:


sample_post_pm = trace['mu']
np.mean(sample_post_pm)


# And compare it to the sample we drew from the grid approximation:

# In[46]:


plot_cdf(sample_post, label='posterior grid')
plot_cdf(sample_post_pm, label='posterior pymc')
decorate_cdf_rates()

plt.savefig('zigzag12.png', dpi=150)


# Again, it looks pretty good.
# 
# To sample from the posterior predictive distribution, we can use `sample_posterior_predictive`:

# In[47]:


with model:
    post_pred = pm.sample_posterior_predictive(trace, samples=1000)


# Here's what it looks like:

# In[48]:


sample_post_pred_pm = post_pred['goals']
sample_post_pred_pm.shape


# In[49]:


sample_post_pred_pm = post_pred['goals']
np.mean(sample_post_pred_pm)


# In[50]:


plot_cdf(sample_post_pred, label='posterior pred grid')
plot_cdf(sample_post_pred_pm, label='posterior pred pm')
decorate_cdf_goals()

plt.savefig('zigzag13.png', dpi=150)


# Looks pretty good!

# ## Two teams
# 
# We can extend the model to estimate different values of `mu` for the two teams.

# In[51]:


model = pm.Model()

with model:
    mu_BOS = pm.Gamma('mu_BOS', alpha, beta)
    mu_ANA = pm.Gamma('mu_ANA', alpha, beta)
    goals_BOS = pm.Poisson('goals_BOS', mu_BOS, observed=3)
    goals_ANA = pm.Poisson('goals_ANA', mu_ANA, observed=1)
    trace = pm.sample(1000, tune=2000)


# We can use `traceplot` to review the results and do some visual diagnostics.

# In[52]:


pm.traceplot(trace);


# Here are the posterior distribitions for `mu_BOS` and `mu_ANA`.

# In[53]:


mu_BOS = trace['mu_BOS']
plot_cdf(mu_BOS, label='mu_BOS posterior')

mu_ANA = trace['mu_ANA']
plot_cdf(mu_ANA, label='mu_ANA posterior')

decorate_cdf_rates()
np.mean(mu_BOS), np.mean(mu_ANA)

plt.savefig('zigzag14.png', dpi=150)


# On the basis of one game here's the probability that Boston is the better team.

# In[54]:


np.mean(mu_BOS > mu_ANA)


# In[55]:


np.mean(mu_BOS == mu_ANA)


# ## Predictions
# 
# Even if Boston is the better team, that doesn't mean they'll win the next game.
# 
# We can use `sample_posterior_predictive` to generate predictions.

# In[56]:


with model:
    post_pred = pm.sample_posterior_predictive(trace, samples=1000)


# Here are the posterior predictive distributions of goals scored.

# In[57]:


goals_BOS = post_pred['goals_BOS'].flatten()
goals_ANA = post_pred['goals_ANA'].flatten()

plot_cdf(goals_BOS, label='BOS')
plot_cdf(goals_ANA, label='ANA')
decorate_cdf_goals()

plt.savefig('zigzag15.png', dpi=150)


# Here's the chance that Boston wins the next game.

# In[58]:


win = np.mean(goals_BOS > goals_ANA)
win


# The chance that they lose.

# In[59]:


lose = np.mean(goals_ANA > goals_BOS)
lose


# And the chance of a tie.

# In[60]:


tie = np.mean(goals_BOS == goals_ANA)
tie


# ## Overtime!
# 
# In the event of a tie, the teams play a five-minute overtime period during which the team that scores first wins.
# 
# In a Poisson process with rate parameter `mu`, the time until the next event is exponential with parameter `lam = 1/mu`.
# 
# So we can take a sample from the posterior distributions of `mu`:

# In[61]:


mu_ANA = trace['mu_ANA']
mu_BOS = trace['mu_BOS']


# And generate time to score,`tts`, for each team:

# In[62]:


tts_ANA  = np.random.exponential(1/mu_ANA)
np.mean(tts_ANA)


# In[63]:


tts_BOS  = np.random.exponential(1/mu_BOS)
np.mean(tts_BOS)


# Here's the chance that Boston wins in overtime.

# In[64]:


win_ot = np.mean(tts_BOS < tts_ANA)
win_ot


# Since `tts` is continuous, ties are unlikely.

# In[65]:


total_win = win + tie * win_ot
total_win