Purpose of this notebook: to check whether the statistical analysis in the paper "We look like our names" holds.
class HypothesisTest(object):
def __init__(self, prob):
self.prob = prob
self.MakeModel()
#self.actual = self.TestStatistic(data)
def PValue(self, iters=1000):
self.test_stats = [self.TestStatistic(self.RunModel())
for _ in range(iters)]
count = sum(1 for x in self.test_stats if x >= self.actual)
return count / iters
def TestStatistic(self, data):
raise UnimplementedMethodException()
def MakeModel(self):
pass
def RunModel(self):
raise UnimplementedMethodException()
from collections import Counter
import random
class FaceNameTest1A(HypothesisTest):
def TestStatistic(self):
return test_stat
def RunModel(self):
n = 20
sample = [random.choice('TFFFF') for _ in range(20)]
counter = Counter(sample)
data = counter['T'], counter['F']
return data
test1A = FaceNameTest1A(0.2)
test1A.RunModel()
(2, 18)
Important data:
Let's write a function for the outcome of an individual experiment.
def one_person_experiment():
"Returns proportion score."
sample = [random.choice('TFFFF') for _ in range(20)]
counter = Counter(sample)
trues = counter['T']
return trues / 20
one_person_experiment()
0.35
Now, we can define a group experiment.
def group_experiment():
"Returns mean proportion score."
samples = [one_person_experiment() for _ in range(121)]
return sum(samples) / len(samples)
group_experiment()
0.1975206611570246
Let's now do this 10000 times and plot a cumulative histogram.
outcomes = [group_experiment() for _ in range(10000)]
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('bmh')
plt.hist(outcomes, bins=50);
plt.xlabel('mean proportion')
<matplotlib.text.Text at 0x833e588>
plt.hist(outcomes, bins=50, range=(0.1, 0.3), cumulative=True, normed=True, label='sampled distribution\nof test statistic');
plt.xlabel('mean proportion')
plt.vlines(0.28, 0, 1, label='measured')
plt.legend()
<matplotlib.legend.Legend at 0x9937cc0>
Ruling out popularity.
def one_person_experiment():
"Returns proportion score."
sample = [random.choice('TFFF') for _ in range(25)]
counter = Counter(sample)
trues = counter['T']
return trues / 25
one_person_experiment()
0.28
def group_experiment():
"Returns mean proportion score."
samples = [one_person_experiment() for _ in range(64)]
return sum(samples) / len(samples)
group_experiment()
0.25437499999999996
outcomes = [group_experiment() for _ in range(10000)]
plt.hist(outcomes, bins=50, range=(0.1, 0.3), cumulative=True, normed=True, label='sampled distribution\nof test statistic');
plt.xlabel('mean proportion')
plt.vlines(0.2991, 0, 1, label='measured')
plt.legend()
<matplotlib.legend.Legend at 0x9508d68>