#!/usr/bin/env python
# coding: utf-8

# ### Reminder: use Shift-ENTER to run each cell; feel free to edit, etc, before and after running!

# ## The Monty Hall problem
# 
# You, lucky contestant, are on a game show!  You stand a chance to win a car (with a trade-in value of $50k)!
# 
# The host, one Monty Hall, describes the game to you as follows:
# 
#    1. Here before you, you see three doors -- A, B, and C!  Behind one of them is the car.
# 
#    2. I will start by having you pick one door, but I **do not open it**.
# 
#    3. Then, I will reveal what is behind one of the two doors you did not choose (needless to say, it will not be the car!)
# 
#    4. At that point, you may either keep your first choice, OR you may switch your choice to another door.
# 
# If you can identify which door has the car behind it, you get to keep the car (or the money).
# 
# The question for you is this: as a winning strategy, should you:
# 
# (a) always stay with your original door choice
# 
# (b) always switch
# 
# or
# 
# (c) does it not matter?

# ### Monty Hall Monte Carlo
# 
# You, being an accomplished computational scientist, decide that rather than thinking hard about this, you're just going to simulate it using the Monte Carlo method ([http://en.wikipedia.org/wiki/Monte_Carlo_method](http://en.wikipedia.org/wiki/Monte_Carlo_method)). This lets you explore the more complex emergent properties of this system by only worrying about the simple rules defining the system. ([http://en.wikipedia.org/wiki/Agent-based_model](http://en.wikipedia.org/wiki/Agent-based_model))
# 
# Let's start by setting up the problem.

# In[34]:


# grab the pseudo-random # generator module in Python: http://blog.doughellmann.com/2010/10/pymotw-random-pseudorandom-number.html
import random

# now, build a function to create 3 doors, with one car behind them.
# this function will return a dictionary with keys A, B, and C, containing associated values 'empty' or 'car'.
def create_doors():
    doors = {}
    doors['A'] = 'empty'
    doors['B'] = 'empty'
    doors['C'] = 'empty'
    
    # randomly choose *one* of the three doors
    keylist = list(doors.keys())
    car_is_behind = random.choice(keylist)
    doors[car_is_behind] = 'car'
    
    return doors


# Let's take a look at the output of this function:

# In[35]:


print(create_doors())
print(create_doors())
print(create_doors())


# ## Simulating some choices with two functions
# 
# Next, here are two functions, one which simulates choice (a) -- staying with your original door -- and one which simulates choice (b) -- switch.  If the function returns true, you've won! If the function returns false, you've lost.

# In[36]:


def always_switch():
    # set up the problem
    doors_d = create_doors()
    
    # pick a door
    doors = ['A', 'B', 'C']
    my_choice = random.choice(doors)
    
    # remove it from Monty's consideration -- he will never choose this one
    doors.remove(my_choice)
    assert len(doors) == 2, "you should only have two doors left..."
    
    # now Monty Hall picks a door:
    while 1:
        monty_choice = random.choice(doors)
        if doors_d[monty_choice] != 'car': # he'll never reveal the car!
            break
    
    doors.remove(monty_choice)
    
    # now, because you always switch, you're left with monty's non-choice:
    assert len(doors) == 1, "you should only have one door left..."
    
    my_choice = doors[0]
    
    if doors_d[my_choice] == 'car':
        return True                    # you win!
    return False                       # you lose :(


# In[37]:


def never_switch():
    # set up the problem
    doors_d = create_doors()
    
    # pick a door
    doors = ['A', 'B', 'C']
    my_choice = random.choice(doors)
    doors.remove(my_choice)
    assert len(doors) == 2, "you should only have two doors left..."
    
    # now Monty Hall picks a door:
    while 1:
        monty_choice = random.choice(doors)
        if doors_d[monty_choice] != 'car': # he'll never reveal the car!
            break
    
    doors.remove(monty_choice)
    
    # now, because you never switch, you're left with your original choice:
    assert len(doors) == 1, "you should only have one door left..."
    
    # you stick with your original choice:
    if doors_d[my_choice] == 'car':
        return True                    # you win!
    return False                       # you lose :(


# In[38]:


# ok, let's try this out!
print(always_switch())
print(always_switch())
print(always_switch())

print(never_switch())
print(never_switch())
print(never_switch())


# OK, a couple of things:
# 
# Question: *Is* this a true implementation of the Monty Hall scenario? Why or hwy not?
# 
# Second, note that every time you run this, you will get a different set of answers -- because we're using a random number generator.

# ## The First Question: is it better to always switch? Or should you never switch? Or is it the same?
# 
# To answer this question, let's run 'always_switch()' a bunch of times, and calculate how often we win.

# In[39]:


won_count = 0
N = 1000
for i in range(N):
    if always_switch():
        won_count += 1
        
print('With always_switch(), I won', won_count, 'of', N, 'tries')


# We can do the same with never switching:

# In[40]:


won_count = 0
N = 1000
for i in range(N):
    if never_switch():
        won_count += 1
        
print('With never_switch(), I won', won_count, 'of', N, 'tries')


# OK, so according to our simulation, we should always switch.  ...can we get this answer some other way?

# ## Another question: what happens if you increase the number of doors, and Monty opens n-1 doors?
# 
# Since all our doors are virtual, we can change their number quite easily.  (Well, it involves a little more programming...)

# In[41]:


# write a function to create M doors, not just 3.
def create_multi_doors(M):
    doors = {}
    for i in range(M):
        doors[i] = 'empty'
        
    # randomly choose *one* of the three doors
    keylist = list(doors.keys())
    car_is_behind = random.choice(keylist)
    doors[car_is_behind] = 'car'
    
    return doors


# We'll also need to update our always_switch/never_switch functions to use create_multi_doors():

# In[42]:


def always_switch2(M):
    # set up the problem
    doors_d = create_multi_doors(M)
    
    # pick a door
    doors = list(doors_d.keys())
    my_choice = random.choice(doors)
    
    doors.remove(my_choice)
    
    while len(doors) > 1:
        door = random.choice(doors)
        if doors_d[door] != 'car':
            doors.remove(door)
    
    assert len(doors) == 1, doors
    
    # now pick the one that's left:
    my_choice = doors[0]
    
    if doors_d[my_choice] == 'car':
        return True                    # you win!
    return False                       # you lose :(

def never_switch2(M):
    # set up the problem
    doors_d = create_multi_doors(M)
    
    # pick a door
    doors = list(doors_d.keys())
    my_choice = random.choice(doors)
    
    doors.remove(my_choice)
    
    while len(doors) > 1:
        door = random.choice(doors)
        if doors_d[door] != 'car':
            doors.remove(door)
    
    assert len(doors) == 1, doors
    
    # ...but we're sticking with our original choice.    
    if doors_d[my_choice] == 'car':
        return True                    # you win!
    return False                       # you lose :(


# In[43]:


print(always_switch2(3))


# In[44]:


print(always_switch2(100))
print(always_switch2(100))
print(always_switch2(100))

print(never_switch2(100))
print(never_switch2(100))
print(never_switch2(100))


# OK, the functions don't obviously break... what happens if we run them a bunch?

# In[45]:


won_count = 0
N = 1000 # num trials
M = 3 # doors

for i in range(N):
    if always_switch2(M):
        won_count += 1
        
print('With always_switch2() and', M, 'doors, I won', won_count, 'of', N, 'tries')

won_count = 0

for i in range(N):
    if never_switch2(M):
        won_count += 1
        
print('With never_switch2() and', M, 'doors, I won', won_count, 'of', N, 'tries')


# ## A few questions to explore --
# 
# How do we know always_switch2() and never_switch2() work properly?
# 
# What's the actual mathematical formula underlying the results?

# # Visualization!

# In[46]:


import math

# calculate the avg & standard deviation
def calc_stddev(series):
    avg = sum(series) / float(len(series))
    devs = [ i - avg for i in series ]
    devs = [ i**2 for i in devs ]
    stddev = math.sqrt(sum(devs) / float(len(series)))
    return avg, stddev


# run the given function with parameter M (# doors), N times; return average
def try_N_times(fn, M, N):
    won = 0
    for i in range(N):
        if fn(M):
            won += 1
    return won / float(N)


# ### Explore the simulation
# 
# What does `N` represent and what happens as you change `N` in different ways?

# _(Answer/Notes here. Double-click to edit.)_

# What does `n_trials` represent and what happens as you change `n_trials` in different ways?

# _(Answer/Notes here. Double-click to edit.)_

# What does `M` represent and what happens as you change `M` in different ways?

# _(Answer/Notes here. Double-click to edit.)_

# In[47]:


get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.mlab as mlab
import pylab

# Parameters we've made easy to change
#################
n_trials = 500  # How many times to gather statics from groups of N samples
M = 3           # How many doors to use (originally 3)
N = 100         # How many samples from which to gather statistics (like mean of N coin flips)
#################

# Try M doors N times; then run n_trials times, and track.
always_list = [ try_N_times(always_switch2, M, N) for _ in range(n_trials) ]
never_list = [ try_N_times(never_switch2, M, N) for _ in range(n_trials) ]

# get the actual and fitted distribution for the 'always switch' decision
n1, bins1, patches1 = pylab.hist(always_list, facecolor='green', alpha=0.5, range=(0, 1), bins=50, normed=1, label='always')
always_avg, always_stddev = calc_stddev(always_list)
y1 = mlab.normpdf(bins1, always_avg, always_stddev)
pylab.plot(bins1, y1, 'g--')

# get the actual and fitted distribution for the 'never switch' decision
n2, bins2, patches2 = pylab.hist(never_list, facecolor='red', alpha=0.5, range=(0, 1), bins=50, normed=1, label='never')
never_avg, never_stddev = calc_stddev(never_list)
y2 = mlab.normpdf(bins2, never_avg, never_stddev) # fit
pylab.plot(bins2, y2, 'r--')

# label the plot
pylab.xlabel('probability of winning')
pylab.ylabel('distribution of p for multiple trials')
pylab.title('Monty Hall problem (%d doors)' % M)
pylab.legend(loc='upper center')


# In[48]:


pylab.plot(bins1, y1, 'g', label='never switch')
pylab.plot(bins2, y2, 'r', label='always switch')
pylab.axis(ymax=12)
pylab.legend()
pylab.xlabel('probability of winning')
pylab.ylabel('distribution of p for multiple trials')
pylab.title('Monty Hall problem (%d doors)' % M)
print('average win rate, always switching:', always_avg)
print('average win rate, never switch:', never_avg)


# **Bonus Question**: What are the assumptions we're making in this simulation compared to the real game show?

# In[ ]:


# In[ ]: