#!/usr/bin/env python
# coding: utf-8

# In[21]:


get_ipython().run_line_magic('matplotlib', 'notebook')
import collections

import numpy as np
import numpy.random as rnd

import matplotlib.pyplot as plt

rnd.seed(0)


# In[4]:


REAL_DISEASE_FREQ = 0.1

N_SAMPLES = 100000


# In[5]:


def simulate_single_answer():
    # Honest answer: True if has disease, False otherwise
    if rnd.random() < 0.5:
        return rnd.random() < REAL_DISEASE_FREQ
    # Fake answer. Random choice with probability .5
    else:
        return rnd.random() < 0.5


# In[6]:


database = [simulate_single_answer() for i in range(N_SAMPLES)]


# ## Frequentist estimation of _p_
# 
# Probability of answering True:
# P(HEADS) * p + P(TAILS) * 0.5 = #True/#Total
# 
# => p = 2*(#True/#Total - 0.25)

# In[16]:


def estimate_p(db):
    counter = collections.Counter(db)
    return 2*(counter[True]/len(db) - 0.25)

print('Estimated p:', estimate_p(database))


# ## Convergence plot

# In[22]:


estimations = [estimate_p(database[:i]) for i in range(1, N_SAMPLES, 100)]

plt.plot(estimations)