#!/usr/bin/env python
# coding: utf-8

# ## Problem setup:
# 
# 1. 13,066,047 voters arrive to the polls.
# 2. $p_{yes}\%$ of them intend to vote "Yes", $(1-p_{yes})\%$ of them intend to vote "No."
# 3. Each voter casts an invalid (unmarked or void) ballot with probability $p_{invalid}\%$.
# 4. Of the valid ballots, the poll workers misclassify the vote with probability $p_{misclassification}\%$.
# 5. Majority vote wins.

# In[29]:


get_ipython().run_line_magic('matplotlib', 'inline')


# In[30]:


import numpy as np


# In[37]:


YES_BALLOTS = 6377482
NO_BALLOTS = 6431376
UNMARKED_BALLOTS = 86243
NULL_BALLOTS = 170946


# In[38]:


TOTAL_VOTES = YES_BALLOTS + NO_BALLOTS + UNMARKED_BALLOTS + NULL_BALLOTS
P_INVALID = .02
P_MISCLASSIFICATION = .01
N_TRIALS = 100000


# In[39]:


def simulate_vote(probability_yes):
    yes_votes = int(TOTAL_VOTES * probability_yes)
    no_votes = TOTAL_VOTES - yes_votes
    
    yes_votes_samples = N_TRIALS * [yes_votes]
    no_votes_samples = N_TRIALS * [no_votes]
    
    invalid_ballots_yes = np.random.binomial(n=yes_votes_samples, p=P_INVALID)
    invalid_ballots_no = np.random.binomial(n=no_votes_samples, p=P_INVALID)
    
    valid_yes_votes = yes_votes - invalid_ballots_yes
    valid_no_votes = no_votes - invalid_ballots_no
    
    yes_votes_from_yes_voters = np.random.binomial(n=valid_yes_votes, p=1-P_MISCLASSIFICATION)
    no_votes_from_yes_voters = valid_yes_votes - yes_votes_from_yes_voters
    
    no_votes_from_no_voters = np.random.binomial(n=valid_no_votes, p=1-P_MISCLASSIFICATION)
    yes_votes_from_no_voters = valid_no_votes - no_votes_from_no_voters
    
    tallied_yes_votes = yes_votes_from_yes_voters + yes_votes_from_no_voters
    tallied_no_votes = no_votes_from_no_voters + no_votes_from_yes_voters
    
    return tallied_yes_votes / (tallied_yes_votes + tallied_no_votes)


# In[40]:


for epsilon in [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7]:
    probability_yes = .5 + epsilon
    percentage_of_tallied_votes_that_were_yes = simulate_vote(probability_yes)
    proportion_of_trials_won_by_no = (percentage_of_tallied_votes_that_were_yes < .5).mean()
    
    results = "p_yes: {:1.6f}% | no_win_percentage: {:1.3f}%"
    print(results.format(100*probability_yes, 100*proportion_of_trials_won_by_no))