%matplotlib inline import numpy as np import matplotlib.pyplot as plt import scipy as sp import scipy.stats as stats import random random.seed(12345) np.random.seed(5345436) def two_samples(difference, N=6500, delta_variance=0.): As = np.random.normal(6., size=N) Bs = np.random.normal(6. + difference, scale=1+delta_variance, size=N) return As, Bs def one_sided_ttest(A, B, equal_var=True): t,p = stats.ttest_ind(A, B, equal_var=equal_var) # the t-test implemented in scipy is two sided, but we are interested # in the one sided p-value, hence this if statement and the divide by two. if t < 0: p /= 2. else: p = 1- p/2. return p # There is no difference between the two samples As, Bs = two_samples(0., N=100) p_vals = [] for n in xrange(len(As)-1): n += 2 p_vals.append(one_sided_ttest(As[:n], Bs[:n])) a = plt.axes() a.plot(np.arange(len(As)-1)+2, p_vals) a.set_ylabel("Observed p-value") a.set_xlabel("Number of observations") a.set_ylim([0., 1.]) a.hlines(0.05, 0, 100) def repeat_experiment(repeats=10000, diff=0.): p_values = [] for i in xrange(repeats): A,B = two_samples(diff, N=100) p = one_sided_ttest(A,B) p_values.append(p) plt.hist(p_values, range=(0,1.), bins=20) plt.axvspan(0., 0.1, facecolor="red", alpha=0.5) plt.xlabel("Observed p-value") plt.ylabel("Count") repeat_experiment() def repeat_early_stop_experiment(repeats=1000, diff=0.): p_values = [] for i in xrange(repeats): A,B = two_samples(diff, N=100) for n in xrange(len(A)-1): n += 2 p = one_sided_ttest(A[:n], B[:n]) if p < 0.05: break p_values.append(p) plt.hist(p_values, range=(0,1.), bins=20) plt.axvspan(0., 0.05, facecolor="red", alpha=0.5) plt.xlabel("Observed p-value") plt.ylabel("Count") repeat_early_stop_experiment() def keep_or_not(improvement, threshold=0.05, N=100, repeats=1000, early_stop=False): keep = 0 for i in xrange(repeats): A,B = two_samples(improvement, N=N) if early_stop: for n in xrange(len(A)-1): n += 2 p = one_sided_ttest(A[:n], B[:n]) if p < 0.05: break else: p = one_sided_ttest(A, B) if p <= threshold: keep += 1 return float(keep)/repeats def power_plot(improvements, normal_keeps, early_keeps): plt.plot(improvements, normal_keeps, "bo", label="normal") plt.plot(improvements, early_keeps, "r^", label="early") plt.legend(loc="best") plt.ylim((0, 100)) plt.xlim((0, improvements[-1]*1.1)) plt.grid() plt.xlabel("Size of the improvement") plt.ylabel("% decided to change") plt.axhline(5) improvements = np.linspace(1., 40, 9) keeps = [] early_keeps = [] for improvement in improvements: keeps.append(keep_or_not(improvement/100.)*100) early_keeps.append(keep_or_not(improvement/100., early_stop=True)*100) power_plot(improvements, keeps, early_keeps) def false_positives(repeats=1000, early_stop=False, threshold=0.05): switches = 0 for i in xrange(repeats): A,B = two_samples(0., N=100) if early_stop: for n in xrange(len(A)-1): n += 2 p = one_sided_ttest(A[:n], B[:n]) if p < threshold: break else: p = one_sided_ttest(A, B) if p < threshold: switches += 1 return float(switches)/repeats print "Normal stopping strategy:", false_positives() print "Early stopping strategy:", false_positives(early_stop=True) thresholds = (0.0025, 0.005, 0.01, 0.02, 0.03) fp_rates = [false_positives(threshold=p, early_stop=True) for p in thresholds] plt.plot(thresholds, fp_rates, "bo") plt.xlabel("p-value threshold") plt.ylabel("False positive rate") plt.grid() improvements = np.linspace(1., 40, 9) keeps = [] early_keeps = [] for improvement in improvements: keeps.append(keep_or_not(improvement/100.)*100) early_keeps.append(keep_or_not(improvement/100., early_stop=True, threshold=0.005)*100) power_plot(improvements, keeps, early_keeps)