from datascience import *
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
scores = Table.read_table('scores_by_section.csv')
scores
scores.group('Section')
scores.group('Section', np.average).show()
random_sample = scores.sample(27, with_replacement = False)
random_sample
np.average(random_sample.column('Midterm'))
random_sample = scores.sample(27, with_replacement = False)
np.average(random_sample.column('Midterm'))
averages = make_array()
for i in np.arange(50000):
random_sample = scores.sample(27, with_replacement = False)
new_average = np.average(random_sample.column('Midterm'))
averages = np.append(averages, new_average)
observed_average = 13.6667
Table().with_column('Random Sample Average', averages).hist(bins = 25)
plots.scatter(observed_average, 0, color = 'red', s=40);
#################
np.count_nonzero(averages <= observed_average) / 50000
np.count_nonzero(averages <= 13.6) / 50000
Table().with_column('Random Sample Average', averages).hist(bins = 25)
plots.scatter(observed_average, 0, color='red', s=30)
plots.plot([13.6, 13.6], [0, 0.35], color='gold', lw=2);