%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
import scipy.stats as stats
import random

random.seed(12345)
np.random.seed(5345436)

def two_samples(difference, N=6500, delta_variance=0.):
    As = np.random.normal(6., size=N)
    Bs = np.random.normal(6. + difference, scale=1+delta_variance, size=N)
    return As, Bs

def one_sided_ttest(A, B, equal_var=True):
    t,p = stats.ttest_ind(A, B, equal_var=equal_var)
    # the t-test implemented in scipy is two sided, but we are interested
    # in the one sided p-value, hence this if statement and the divide by two.
    if t < 0:
        p /= 2.
    else:
        p = 1- p/2.

    return p

# There is no difference between the two samples
As, Bs = two_samples(0., N=100)
p_vals = []
for n in xrange(len(As)-1):
    n += 2
    p_vals.append(one_sided_ttest(As[:n], Bs[:n]))

a = plt.axes()
a.plot(np.arange(len(As)-1)+2, p_vals)
a.set_ylabel("Observed p-value")
a.set_xlabel("Number of observations")
a.set_ylim([0., 1.])
a.hlines(0.05, 0, 100)

def repeat_experiment(repeats=10000, diff=0.):
    p_values = []
    for i in xrange(repeats):
        A,B = two_samples(diff, N=100)
        p = one_sided_ttest(A,B)
        p_values.append(p)
    
    plt.hist(p_values, range=(0,1.), bins=20)
    plt.axvspan(0., 0.1, facecolor="red", alpha=0.5)
    plt.xlabel("Observed p-value")
    plt.ylabel("Count")
    
repeat_experiment()

def repeat_early_stop_experiment(repeats=1000, diff=0.):
    p_values = []
    for i in xrange(repeats):
        A,B = two_samples(diff, N=100)

        for n in xrange(len(A)-1):
            n += 2
            p = one_sided_ttest(A[:n], B[:n])
            if p < 0.05:
                break
            
        p_values.append(p)
    
    plt.hist(p_values, range=(0,1.), bins=20)
    plt.axvspan(0., 0.05, facecolor="red", alpha=0.5)
    plt.xlabel("Observed p-value")
    plt.ylabel("Count")
    
repeat_early_stop_experiment()

def keep_or_not(improvement, threshold=0.05, N=100, repeats=1000, early_stop=False):
    keep = 0
    for i in xrange(repeats):
        A,B = two_samples(improvement, N=N)

        if early_stop:
            for n in xrange(len(A)-1):
                n += 2
                p = one_sided_ttest(A[:n], B[:n])
                if p < 0.05:
                    break

        else:
            p = one_sided_ttest(A, B)
            
        if p <= threshold:
            keep += 1
            
    return float(keep)/repeats

def power_plot(improvements, normal_keeps, early_keeps):
    plt.plot(improvements, normal_keeps, "bo", label="normal")
    plt.plot(improvements, early_keeps, "r^", label="early")
    
    plt.legend(loc="best")
    plt.ylim((0, 100))
    plt.xlim((0, improvements[-1]*1.1))
    plt.grid()
    plt.xlabel("Size of the improvement")
    plt.ylabel("% decided to change")
    plt.axhline(5)

improvements = np.linspace(1., 40, 9)

keeps = []
early_keeps = []
for improvement in improvements:
    keeps.append(keep_or_not(improvement/100.)*100)
    early_keeps.append(keep_or_not(improvement/100., early_stop=True)*100)

power_plot(improvements, keeps, early_keeps)

def false_positives(repeats=1000, early_stop=False, threshold=0.05):
    switches = 0
    for i in xrange(repeats):
        A,B = two_samples(0., N=100)

        if early_stop:
            for n in xrange(len(A)-1):
                n += 2
                p = one_sided_ttest(A[:n], B[:n])
                if p < threshold:
                    break
                    
        else:
            p = one_sided_ttest(A, B)
        
        if p < threshold:
            switches += 1

    return float(switches)/repeats

print "Normal stopping strategy:", false_positives()
print "Early stopping strategy:", false_positives(early_stop=True)

thresholds = (0.0025, 0.005, 0.01, 0.02, 0.03)
fp_rates = [false_positives(threshold=p, early_stop=True) for p in thresholds]

plt.plot(thresholds, fp_rates, "bo")
plt.xlabel("p-value threshold")
plt.ylabel("False positive rate")
plt.grid()

improvements = np.linspace(1., 40, 9)

keeps = []
early_keeps = []
for improvement in improvements:
    keeps.append(keep_or_not(improvement/100.)*100)
    early_keeps.append(keep_or_not(improvement/100.,
                                   early_stop=True,
                                   threshold=0.005)*100)

power_plot(improvements, keeps, early_keeps)