In [21]:
%matplotlib notebook
import collections

import numpy as np
import numpy.random as rnd

import matplotlib.pyplot as plt

rnd.seed(0)
In [4]:
REAL_DISEASE_FREQ = 0.1

N_SAMPLES = 100000
In [5]:
def simulate_single_answer():
    # Honest answer: True if has disease, False otherwise
    if rnd.random() < 0.5:
        return rnd.random() < REAL_DISEASE_FREQ
    # Fake answer. Random choice with probability .5
    else:
        return rnd.random() < 0.5
In [6]:
database = [simulate_single_answer() for i in range(N_SAMPLES)]

Frequentist estimation of p

Probability of answering True: P(HEADS) p + P(TAILS) 0.5 = #True/#Total

=> p = 2*(#True/#Total - 0.25)

In [16]:
def estimate_p(db):
    counter = collections.Counter(db)
    return 2*(counter[True]/len(db) - 0.25)

print('Estimated p:', estimate_p(database))
Estimated p: 0.09919999999999995

Convergence plot

In [22]:
estimations = [estimate_p(database[:i]) for i in range(1, N_SAMPLES, 100)]

plt.plot(estimations)
Out[22]:
[<matplotlib.lines.Line2D at 0x105e8c828>]