Problem setup:

  1. 13,066,047 voters arrive to the polls.
  2. $p_{yes}\%$ of them intend to vote "Yes", $(1-p_{yes})\%$ of them intend to vote "No."
  3. Each voter casts an invalid (unmarked or void) ballot with probability $p_{invalid}\%$.
  4. Of the valid ballots, the poll workers misclassify the vote with probability $p_{misclassification}\%$.
  5. Majority vote wins.
In [29]:
%matplotlib inline
In [30]:
import numpy as np
In [37]:
YES_BALLOTS = 6377482
NO_BALLOTS = 6431376
UNMARKED_BALLOTS = 86243
NULL_BALLOTS = 170946
In [38]:
TOTAL_VOTES = YES_BALLOTS + NO_BALLOTS + UNMARKED_BALLOTS + NULL_BALLOTS
P_INVALID = .02
P_MISCLASSIFICATION = .01
N_TRIALS = 100000
In [39]:
def simulate_vote(probability_yes):
    yes_votes = int(TOTAL_VOTES * probability_yes)
    no_votes = TOTAL_VOTES - yes_votes
    
    yes_votes_samples = N_TRIALS * [yes_votes]
    no_votes_samples = N_TRIALS * [no_votes]
    
    invalid_ballots_yes = np.random.binomial(n=yes_votes_samples, p=P_INVALID)
    invalid_ballots_no = np.random.binomial(n=no_votes_samples, p=P_INVALID)
    
    valid_yes_votes = yes_votes - invalid_ballots_yes
    valid_no_votes = no_votes - invalid_ballots_no
    
    yes_votes_from_yes_voters = np.random.binomial(n=valid_yes_votes, p=1-P_MISCLASSIFICATION)
    no_votes_from_yes_voters = valid_yes_votes - yes_votes_from_yes_voters
    
    no_votes_from_no_voters = np.random.binomial(n=valid_no_votes, p=1-P_MISCLASSIFICATION)
    yes_votes_from_no_voters = valid_no_votes - no_votes_from_no_voters
    
    tallied_yes_votes = yes_votes_from_yes_voters + yes_votes_from_no_voters
    tallied_no_votes = no_votes_from_no_voters + no_votes_from_yes_voters
    
    return tallied_yes_votes / (tallied_yes_votes + tallied_no_votes)
In [40]:
for epsilon in [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7]:
    probability_yes = .5 + epsilon
    percentage_of_tallied_votes_that_were_yes = simulate_vote(probability_yes)
    proportion_of_trials_won_by_no = (percentage_of_tallied_votes_that_were_yes < .5).mean()
    
    results = "p_yes: {:1.6f}% | no_win_percentage: {:1.3f}%"
    print(results.format(100*probability_yes, 100*proportion_of_trials_won_by_no))
p_yes: 60.000000% | no_win_percentage: 0.000%
p_yes: 51.000000% | no_win_percentage: 0.000%
p_yes: 50.100000% | no_win_percentage: 0.000%
p_yes: 50.010000% | no_win_percentage: 0.191%
p_yes: 50.001000% | no_win_percentage: 38.688%
p_yes: 50.000100% | no_win_percentage: 48.791%
p_yes: 50.000010% | no_win_percentage: 50.063%