from datascience import *
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
scores = Table.read_table('scores_by_section.csv')
scores
Section | Midterm |
---|---|
1 | 22 |
2 | 12 |
2 | 23 |
2 | 14 |
1 | 20 |
3 | 25 |
4 | 19 |
1 | 24 |
5 | 8 |
6 | 14 |
... (349 rows omitted)
scores.group('Section')
Section | count |
---|---|
1 | 32 |
2 | 32 |
3 | 27 |
4 | 30 |
5 | 33 |
6 | 32 |
7 | 24 |
8 | 29 |
9 | 30 |
10 | 34 |
... (2 rows omitted)
scores.group('Section', np.average).show()
Section | Midterm average |
---|---|
1 | 15.5938 |
2 | 15.125 |
3 | 13.6667 |
4 | 14.7667 |
5 | 17.4545 |
6 | 15.0312 |
7 | 16.625 |
8 | 16.3103 |
9 | 14.5667 |
10 | 15.2353 |
11 | 15.8077 |
12 | 15.7333 |
random_sample = scores.sample(27, with_replacement = False)
random_sample
Section | Midterm |
---|---|
9 | 18 |
1 | 18 |
4 | 19 |
4 | 21 |
4 | 13 |
5 | 15 |
10 | 20 |
5 | 16 |
5 | 11 |
9 | 19 |
... (17 rows omitted)
np.average(random_sample.column('Midterm'))
16.185185185185187
random_sample = scores.sample(27, with_replacement = False)
np.average(random_sample.column('Midterm'))
13.37037037037037
averages = make_array()
for i in np.arange(50000):
random_sample = scores.sample(27, with_replacement = False)
new_average = np.average(random_sample.column('Midterm'))
averages = np.append(averages, new_average)
observed_average = 13.6667
Table().with_column('Random Sample Average', averages).hist(bins = 25)
plots.scatter(observed_average, 0, color = 'red', s=40);
#################
np.count_nonzero(averages <= observed_average) / 50000
0.0594
np.count_nonzero(averages <= 13.60) / 50000
0.05234
Table().with_column('Random Sample Average', averages).hist(bins = 25)
plots.scatter(observed_average, 0, color='red', s=30)
plots.plot([13.6, 13.6], [0, 0.35], color='gold', lw=2);