from datascience import *
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
scores = Table.read_table('scores_by_section.csv')
scores
Section | Midterm |
---|---|
1 | 22 |
2 | 12 |
2 | 23 |
2 | 14 |
1 | 20 |
3 | 25 |
4 | 19 |
1 | 24 |
5 | 8 |
6 | 14 |
... (349 rows omitted)
scores.group('Section')
Section | count |
---|---|
1 | 32 |
2 | 32 |
3 | 27 |
4 | 30 |
5 | 33 |
6 | 32 |
7 | 24 |
8 | 29 |
9 | 30 |
10 | 34 |
... (2 rows omitted)
scores.group('Section', np.average).show()
Section | Midterm average |
---|---|
1 | 15.5938 |
2 | 15.125 |
3 | 13.6667 |
4 | 14.7667 |
5 | 17.4545 |
6 | 15.0312 |
7 | 16.625 |
8 | 16.3103 |
9 | 14.5667 |
10 | 15.2353 |
11 | 15.8077 |
12 | 15.7333 |
observed_average = 13.6667
random_sample = scores.sample(27, with_replacement=False)
random_sample
Section | Midterm |
---|---|
4 | 14 |
8 | 15 |
1 | 22 |
9 | 17 |
8 | 7 |
6 | 16 |
9 | 4 |
9 | 21 |
3 | 16 |
5 | 13 |
... (17 rows omitted)
np.average(random_sample.column('Midterm'))
14.814814814814815
# Simulate one value of the test statistic
# under the hypothesis that the section is like a random sample from the class
def random_sample_midterm_avg():
random_sample = scores.sample(27, with_replacement = False)
return np.average(random_sample.column('Midterm'))
# Simulate 50,000 copies of the test statistic
sample_averages = make_array()
for i in np.arange(50000):
sample_averages = np.append(sample_averages, random_sample_midterm_avg())
# Compare the simulated distribution of the statistic
# and the actual observed statistic
averages_tbl = Table().with_column('Random Sample Average', sample_averages)
averages_tbl.hist(bins = 20)
plots.scatter(observed_average, 0, color = 'red', s=40);
sum(sample_averages <= observed_average) / 50000
0.05682
# 5% of 50,000 = 2500
five_percent_point = averages_tbl.sort(0).column(0).item(2500)
five_percent_point
13.592592592592593
averages_tbl.hist(bins = 20)
plots.plot([five_percent_point, five_percent_point], [0, 0.35], color='gold', lw=2)
plots.title('Area to the left of the gold line: 5%');