from datascience import *
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
football = Table.read_table('deflategate.csv')
football.show()
combined = (football.column('Blakeman')+football.column('Prioleau'))/2
football = football.drop('Blakeman', 'Prioleau').with_column(
'Combined',
combined)
football.show()
np.ones(5)
initial_pressure = np.append(12.5 * np.ones(11), 13 * np.ones(4))
initial_pressure
drop_values = initial_pressure - football.column(1)
football = football.drop('Combined').with_column('Drop', drop_values)
football.show()
means = football.group('Team', np.average)
means
observed_difference = means.column(1).item(0) - means.column(1).item(1)
observed_difference
def diff_between_means(tbl):
means = tbl.group('Team', np.average).column(1)
return means.item(0) - means.item(1)
drops = football.select('Drop')
shuffled_drops = drops.sample(with_replacement = False).column(0)
shuffled_drops
simulated_football = football.with_column('Drop', shuffled_drops)
simulated_football.show(3)
diff_between_means(simulated_football)
differences = make_array()
for i in np.arange(5000):
shuffled_drops = drops.sample(with_replacement = False).column(0)
simulated_football = football.with_column('Drop', shuffled_drops)
new_diff = diff_between_means(simulated_football)
differences = np.append(differences, new_diff)
Table().with_column('Difference Between Means', differences).hist()
plots.scatter(observed_difference, 0, color='red', s=40);
np.average(differences <= observed_difference)
# Botulinum Toxin A (bta) as a treatment to chronic back pain
# 15 in the treatment group
# 16 in the control group (normal saline)
# trials were run double-blind (neither doctors nor patients knew which group they were in)
# Result = 1 indicates pain relief
bta = Table.read_table('bta.csv')
bta.show()
bta.group('Group', sum)
bta.group('Group', np.average)
observed_outcomes = Table.read_table('observed_outcomes.csv')
observed_outcomes.show()
bta.group('Group', np.average).column(1)
abs(0.125 - 0.6)
def distance_between_group_proportions(tbl):
proportions = tbl.group('Group', np.average).column(1)
return abs(proportions.item(1) - proportions.item(0))
observed_distance = distance_between_group_proportions(bta)
observed_distance
labels = bta.select('Group')
results = bta.select('Result')
# Repeat
distances = make_array()
for i in np.arange(2000):
shuffled_results = results.sample(with_replacement=False).column(0)
simulated = labels.with_column('Shuffled results', shuffled_results)
distance = distance_between_group_proportions(simulated)
distances = np.append(distances, distance)
distances
Table().with_column('Distance', distances).hist(bins = np.arange(0, 1, 0.1))
plots.scatter(observed_distance, 0, color='red', s=40);
np.average(distances >= observed_distance)