#!/usr/bin/env python
# coding: utf-8

# Bayesian Statistics Made Simple
# ===
# 
# Code and exercises from my workshop on Bayesian statistics in Python.
# 
# Copyright 2016 Allen Downey
# 
# MIT License: https://opensource.org/licenses/MIT

# In[1]:


# If we're running on Colab, install empiricaldist
# https://pypi.org/project/empiricaldist/

import sys
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    get_ipython().system('pip install empiricaldist')


# In[2]:


import numpy as np
import pandas as pd

import seaborn as sns
sns.set_style('white')
sns.set_context('talk')

import matplotlib.pyplot as plt

from empiricaldist import Pmf


# ### Working with Pmfs
# 
# Create a Pmf object to represent a six-sided die.

# In[3]:


d6 = Pmf()


# A Pmf is a map from possible outcomes to their probabilities.

# In[4]:


for x in [1,2,3,4,5,6]:
    d6[x] = 1


# Initially the probabilities don't add up to 1.

# In[5]:


d6


# `normalize` adds up the probabilities and divides through.  The return value is the total probability before normalizing.

# In[6]:


d6.normalize()


# Now the Pmf is normalized.

# In[7]:


d6


# And we can compute its mean (which only works if it's normalized).

# In[8]:


d6.mean()


# `choice` chooses a random values from the Pmf.

# In[9]:


d6.choice(size=10)


# `bar` plots the Pmf as a bar chart

# In[10]:


def decorate_dice(title):
    """Labels the axes.
    
    title: string
    """
    plt.xlabel('Outcome')
    plt.ylabel('PMF')
    plt.title(title)


# In[11]:


d6.bar()
decorate_dice('One die')


# `d6.add_dist(d6)` creates a new `Pmf` that represents the sum of two six-sided dice.

# In[12]:


twice = d6.add_dist(d6)
twice


# **Exercise 1:**  Plot `twice` and compute its mean.

# In[13]:


# Solution

twice.bar()
decorate_dice('Two dice')
twice.mean()


# **Exercise 2:** Suppose I roll two dice and tell you the result is greater than 3.
# 
# Plot the `Pmf` of the remaining possible outcomes and compute its mean.

# In[20]:


# Solution

twice_gt3 = d6.add_dist(d6)
twice_gt3[2] = 0
twice_gt3[3] = 0
twice_gt3.normalize()

twice_gt3.bar()
decorate_dice('Two dice, greater than 3')
twice_gt3.mean()


# **Bonus exercise:** In Dungeons and Dragons, the amount of damage a [goblin](https://www.dndbeyond.com/monsters/goblin) can withstand is the sum of two six-sided dice.  The amount of damage you inflict with a [short sword](https://www.dndbeyond.com/equipment/shortsword) is determined by rolling one six-sided die.
# 
# Suppose you are fighting a goblin and you have already inflicted 3 points of damage.  What is your probability of defeating the goblin with your next successful attack?
# 
# Hint: `Pmf` provides comparator functions like `gt_dist` and `le_dist`, which compare two distributions and return a probability.

# In[23]:


# Solution

damage = d6.add_dist(3)
damage.bar()
decorate_dice('Total Damage')


# In[24]:


# Solution

hit_points = d6.add_dist(d6)
damage.ge_dist(hit_points)


# ### The cookie problem
# 
# `Pmf.from_seq` makes a `Pmf` object from a sequence of values.
# 
# Here's how we can use it to create a `Pmf` with two equally likely hypotheses.

# In[25]:


cookie = Pmf.from_seq(['Bowl 1', 'Bowl 2'])
cookie


# Now we can update each hypothesis with the likelihood of the data (a vanilla cookie).

# In[26]:


cookie['Bowl 1'] *= 0.75
cookie['Bowl 2'] *= 0.5
cookie.normalize()


# And display the posterior probabilities.

# In[27]:


cookie


# **Exercise 3:** Suppose we put the first cookie back, stir, choose again from the same bowl, and get a chocolate cookie.  
# 
# What are the posterior probabilities after the second cookie?
# 
# Hint: The posterior (after the first cookie) becomes the prior (before the second cookie).

# In[28]:


# Solution

cookie['Bowl 1'] *= 0.25
cookie['Bowl 2'] *= 0.5
cookie.normalize()
cookie


# **Exercise 4:** Instead of doing two updates, what if we collapse the two pieces of data into one update?
# 
# Re-initialize `Pmf` with two equally likely hypotheses and perform one update based on two pieces of data, a vanilla cookie and a chocolate cookie.
# 
# The result should be the same regardless of how many updates you do (or the order of updates).

# In[29]:


# Solution

cookie = Pmf.from_seq(['Bowl 1', 'Bowl 2'])
cookie['Bowl 1'] *= 0.75 * 0.25
cookie['Bowl 2'] *= 0.5 * 0.5
cookie.normalize()
cookie


# In[ ]: