#!/usr/bin/env python # coding: utf-8 # In[21]: get_ipython().run_line_magic('matplotlib', 'notebook') import collections import numpy as np import numpy.random as rnd import matplotlib.pyplot as plt rnd.seed(0) # In[4]: REAL_DISEASE_FREQ = 0.1 N_SAMPLES = 100000 # In[5]: def simulate_single_answer(): # Honest answer: True if has disease, False otherwise if rnd.random() < 0.5: return rnd.random() < REAL_DISEASE_FREQ # Fake answer. Random choice with probability .5 else: return rnd.random() < 0.5 # In[6]: database = [simulate_single_answer() for i in range(N_SAMPLES)] # ## Frequentist estimation of _p_ # # Probability of answering True: # P(HEADS) * p + P(TAILS) * 0.5 = #True/#Total # # => p = 2*(#True/#Total - 0.25) # In[16]: def estimate_p(db): counter = collections.Counter(db) return 2*(counter[True]/len(db) - 0.25) print('Estimated p:', estimate_p(database)) # ## Convergence plot # In[22]: estimations = [estimate_p(database[:i]) for i in range(1, N_SAMPLES, 100)] plt.plot(estimations)