# For generating random variables.
import numpy as np
# For handling data.
import pandas as pd
# For plotting.
import matplotlib.pyplot as plt
# For t-tests and ANOVA.
import scipy.stats as stats
# Make the plots bigger.
plt.rcParams['figure.figsize'] = (20.0, 10.0)
# Set parameters for two populations.
popA = {'m': 1.6, 's': 0.1}
popB = {'m': 1.8, 's': 0.1}
# Create two samples, one from each population.
sampA = np.random.normal(popA['m'], popA['s'], 100)
sampB = np.random.normal(popB['m'], popB['s'], 100)
# x values for plotting.
x = np.linspace(1.25, 2.25, 1000)
# The probability density functions (PDFs) for the two populations.
pdfA = stats.norm.pdf(x, popA['m'], popA['s'])
pdfB = stats.norm.pdf(x, popB['m'], popB['s'])
# Plot the population PDFs as shaded regions.
plt.fill_between(x, pdfA, color='g', alpha=0.25, label="Population A")
plt.fill_between(x, pdfB, color='b', alpha=0.25, label="Population B")
# Plot histograms of the two samples.
plt.hist(sampA, density=True, color='g', alpha=0.25, label="Sample A")
plt.hist(sampB, density=True, color='b', alpha=0.25, label="Sample B")
# Display a legend.
plt.legend()
plt.show()
# Calculate the independent samples t-statistic for the samples.
# We also get the probability of seeing samples at least as different as these given the population means are equal.
stats.ttest_ind(sampA, sampB)
df = pd.read_csv('https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv')
df
s = df[df['species'] == 'setosa']
r = df[df['species'] == 'versicolor']
a = df[df['species'] == 'virginica']
print(stats.ttest_ind(s['petal_length'], r['petal_length']))
print(stats.ttest_ind(s['petal_length'], a['petal_length']))
print(stats.ttest_ind(r['petal_length'], a['petal_length']))
print(stats.ttest_ind(s['petal_width'], r['petal_width']))
print(stats.ttest_ind(s['petal_width'], a['petal_width']))
print(stats.ttest_ind(r['petal_width'], a['petal_width']))
print(stats.ttest_ind(s['sepal_length'], r['sepal_length']))
print(stats.ttest_ind(s['sepal_length'], a['sepal_length']))
print(stats.ttest_ind(r['sepal_length'], a['sepal_length']))
print(stats.ttest_ind(s['sepal_width'], r['sepal_width']))
print(stats.ttest_ind(s['sepal_width'], a['sepal_width']))
print(stats.ttest_ind(r['sepal_width'], a['sepal_width']))
Some links about the main problem we encounter with t-testing.
Website: Multiple t tests and Type I error
http://grants.hhp.coe.uh.edu/doconnor/PEP6305/Multiple%20t%20tests.htm
Webpage about multiple t tests and Type I errors.
Wikipedia: Multiple Comparisons Problem
https://en.wikipedia.org/wiki/Multiple_comparisons_problem
Wikipedia page about the multiple comparisons problem.
plt.hist(r['sepal_length'], label='Versicolor Sepal Length')
plt.hist(a['sepal_length'], label='Virginica Sepal Length')
plt.legend()
plt.show()
1- ((0.95)**12)
stats.f_oneway(s['petal_length'], r['petal_length'], a['petal_length'])
plt.hist(s['petal_length'], label='Setosa Petal Length')
plt.hist(r['petal_length'], label='Versicolor Petal Length')
plt.hist(a['petal_length'], label='Virginica Petal Length')
plt.legend()
plt.show()
When performing an independent sample t-test we assume that there is a given difference between the means of two populations, usually a difference of zero.
We then look at the samples to investigate how different they are, calculating their t-statistic.
We then ask, given the hypothesised difference (usually zero) what was the probability of seeing a t-statistic at least this extreme.
If it's too extreme (say, less that 5% chance of seeing it) then we say our hypothesis about the difference must be wrong.
Of course, we might, by random chance, see a t-statistic that extreme.
We might reject the hypothesis incorrectly - the populations might have the hypothesised difference and the samples just randomly happened to be as different as they are.
We call that a Type I error.
We also might not reject the hypothesis when it's not true - that's a Type II error.
From the WikiPedia pages for Student's t-test and Variance.
Note that we are using the calculations for two samples, with equal variances, and possibly different sample sizes.
$$ {\displaystyle t={\frac {{\bar {X}}_{1}-{\bar {X}}_{2}}{s_{p}\cdot {\sqrt {{\frac {1}{n_{1}}}+{\frac {1}{n_{2}}}}}}}} $$$$ {\displaystyle s_{p}={\sqrt {\frac {\left(n_{1}-1\right)s_{X_{1}}^{2}+\left(n_{2}-1\right)s_{X_{2}}^{2}}{n_{1}+n_{2}-2}}}} $$$$ {\displaystyle s^{2}={\frac {1}{n-1}}\sum _{i=1}^{n}\left(Y_{i}-{\overline {Y}}\right)^{2}} $$# Count the samples.
nA = float(len(sampA))
nB = float(len(sampB))
# Calculate the means.
mA = sampA.sum() / nA
mB = sampB.sum() / nB
# Sample variances.
varA = ((sampA - mA)**2).sum() / (nA - 1.0)
varB = ((sampB - mB)**2).sum() / (nB - 1.0)
# Pooled standard deviation.
sp = np.sqrt(((nA - 1.0) * varA + (nB - 1.0) * varB) / (nA + nB - 2.0))
# t-statistic
t = (mA - mB) / (sp * np.sqrt((1.0 / nA) + (1.0 / nB)))
print(f"Mean of sample A: {mA:8.4f}")
print(f"Mean of sample B: {mB:8.4f}")
print(f"Size of sample A: {nA:8.4f}")
print(f"Size of sample B: {nB:8.4f}")
print(f"Variance of sample A: {varA:8.4f}")
print(f"Variance of sample B: {varB:8.4f}")
print(f"Pooled std dev: {sp:8.4f}")
print(f"t-statistic: {t:8.4f}")
For a two-tail test (e.g. $H_0$: the means are equal) we reject the null hypothesis $H_0$ if the value of the t-statistic from the samples is further away from zero than the t-statistic at the ($0.5 / 2.0 =$) $0.025$ level.
# x values for plotting.
x = np.linspace(-5.0, 5.0, 1000)
# The probability density functions (PDFs) for the t distribution.
# The number of degrees of freedom is (nA + nB - 2).
pdf = stats.t.pdf(x, (nA + nB - 2.0))
# Create a dataframe from x and pdf.
df = pd.DataFrame({'x': x, 'y': pdf})
# Plot the overall distribution.
plt.fill_between(df['x'], df['y'], color='g', alpha=0.25)
# Plot the values more extreme than our |t|.
crit = np.abs(stats.t.ppf(0.975, nA + nB - 2.0))
tail1 = df[df['x'] >= crit]
tail2 = df[df['x'] <= -crit]
plt.fill_between(tail1['x'], tail1['y'], color='b', alpha=0.25)
plt.fill_between(tail2['x'], tail2['y'], color='b', alpha=0.25)
print(crit)
plt.show()
At a level of 0.05, there's a one in twenty chance that we incorrectly reject the null hypotheses.
m = 10.0
s = 1.0
t = 1000
a = 0.05
sum([1 if stats.ttest_ind(np.random.normal(m, s, 100), np.random.normal(m, s, 100))[1] <= a else 0 for i in range(t)])