#!/usr/bin/env python # coding: utf-8 # Bayesian Statistics Made Simple # === # # Code and exercises from my workshop on Bayesian statistics in Python. # # Copyright 2016 Allen Downey # # MIT License: https://opensource.org/licenses/MIT # In[1]: # If we're running on Colab, install empiricaldist # https://pypi.org/project/empiricaldist/ import sys IN_COLAB = 'google.colab' in sys.modules if IN_COLAB: get_ipython().system('pip install empiricaldist') # In[2]: import numpy as np import pandas as pd import seaborn as sns sns.set_style('white') sns.set_context('talk') import matplotlib.pyplot as plt from empiricaldist import Pmf # ### Working with Pmfs # # Create a Pmf object to represent a six-sided die. # In[3]: d6 = Pmf() # A Pmf is a map from possible outcomes to their probabilities. # In[4]: for x in [1,2,3,4,5,6]: d6[x] = 1 # Initially the probabilities don't add up to 1. # In[5]: d6 # `normalize` adds up the probabilities and divides through. The return value is the total probability before normalizing. # In[6]: d6.normalize() # Now the Pmf is normalized. # In[7]: d6 # And we can compute its mean (which only works if it's normalized). # In[8]: d6.mean() # `choice` chooses a random values from the Pmf. # In[9]: d6.choice(size=10) # `bar` plots the Pmf as a bar chart # In[10]: def decorate_dice(title): """Labels the axes. title: string """ plt.xlabel('Outcome') plt.ylabel('PMF') plt.title(title) # In[11]: d6.bar() decorate_dice('One die') # `d6.add_dist(d6)` creates a new `Pmf` that represents the sum of two six-sided dice. # In[12]: twice = d6.add_dist(d6) twice # **Exercise 1:** Plot `twice` and compute its mean. # In[13]: # Solution twice.bar() decorate_dice('Two dice') twice.mean() # **Exercise 2:** Suppose I roll two dice and tell you the result is greater than 3. # # Plot the `Pmf` of the remaining possible outcomes and compute its mean. # In[20]: # Solution twice_gt3 = d6.add_dist(d6) twice_gt3[2] = 0 twice_gt3[3] = 0 twice_gt3.normalize() twice_gt3.bar() decorate_dice('Two dice, greater than 3') twice_gt3.mean() # **Bonus exercise:** In Dungeons and Dragons, the amount of damage a [goblin](https://www.dndbeyond.com/monsters/goblin) can withstand is the sum of two six-sided dice. The amount of damage you inflict with a [short sword](https://www.dndbeyond.com/equipment/shortsword) is determined by rolling one six-sided die. # # Suppose you are fighting a goblin and you have already inflicted 3 points of damage. What is your probability of defeating the goblin with your next successful attack? # # Hint: `Pmf` provides comparator functions like `gt_dist` and `le_dist`, which compare two distributions and return a probability. # In[23]: # Solution damage = d6.add_dist(3) damage.bar() decorate_dice('Total Damage') # In[24]: # Solution hit_points = d6.add_dist(d6) damage.ge_dist(hit_points) # ### The cookie problem # # `Pmf.from_seq` makes a `Pmf` object from a sequence of values. # # Here's how we can use it to create a `Pmf` with two equally likely hypotheses. # In[25]: cookie = Pmf.from_seq(['Bowl 1', 'Bowl 2']) cookie # Now we can update each hypothesis with the likelihood of the data (a vanilla cookie). # In[26]: cookie['Bowl 1'] *= 0.75 cookie['Bowl 2'] *= 0.5 cookie.normalize() # And display the posterior probabilities. # In[27]: cookie # **Exercise 3:** Suppose we put the first cookie back, stir, choose again from the same bowl, and get a chocolate cookie. # # What are the posterior probabilities after the second cookie? # # Hint: The posterior (after the first cookie) becomes the prior (before the second cookie). # In[28]: # Solution cookie['Bowl 1'] *= 0.25 cookie['Bowl 2'] *= 0.5 cookie.normalize() cookie # **Exercise 4:** Instead of doing two updates, what if we collapse the two pieces of data into one update? # # Re-initialize `Pmf` with two equally likely hypotheses and perform one update based on two pieces of data, a vanilla cookie and a chocolate cookie. # # The result should be the same regardless of how many updates you do (or the order of updates). # In[29]: # Solution cookie = Pmf.from_seq(['Bowl 1', 'Bowl 2']) cookie['Bowl 1'] *= 0.75 * 0.25 cookie['Bowl 2'] *= 0.5 * 0.5 cookie.normalize() cookie # In[ ]: