In [ ]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

Lecture 20

Deflategate

In [ ]:
football = Table.read_table('deflategate.csv')
football.show()
In [ ]:
combined = (football.column('Blakeman')+football.column('Prioleau'))/2
football = football.drop('Blakeman', 'Prioleau').with_column(
    'Combined', 
    combined)
football.show()
In [ ]:
np.ones(5)
In [ ]:
 
In [ ]:
initial_pressure = np.append(12.5 * np.ones(11), 13 * np.ones(4))
initial_pressure
In [ ]:
drop_values = initial_pressure - football.column(1)
In [ ]:
football = football.drop('Combined').with_column('Drop', drop_values)
In [ ]:
football.show()
In [ ]:
means = football.group('Team', np.average)
means
In [ ]:
observed_difference = means.column(1).item(0) - means.column(1).item(1)
observed_difference
In [ ]:
def diff_between_means(tbl):
    means = tbl.group('Team', np.average).column(1)
    return means.item(0) - means.item(1)
In [ ]:
drops = football.select('Drop')
In [ ]:
shuffled_drops = drops.sample(with_replacement = False).column(0)
shuffled_drops
In [ ]:
simulated_football = football.with_column('Drop', shuffled_drops)
simulated_football.show(3)
In [ ]:
diff_between_means(simulated_football)
In [ ]:
differences = make_array()

for i in np.arange(5000):
    shuffled_drops = drops.sample(with_replacement = False).column(0)
    simulated_football = football.with_column('Drop', shuffled_drops)
    new_diff = diff_between_means(simulated_football)
    differences = np.append(differences, new_diff)
In [ ]:
Table().with_column('Difference Between Means', differences).hist()
plots.scatter(observed_difference, 0, color='red', s=40);
In [ ]:
np.average(differences <= observed_difference)

Analyzing RCTs

In [ ]:
# Botulinum Toxin A (bta) as a treatment to chronic back pain
# 15 in the treatment group
# 16 in the control group (normal saline)
# trials were run double-blind (neither doctors nor patients knew which group they were in)
# Result = 1 indicates pain relief
bta = Table.read_table('bta.csv')
bta.show()
In [ ]:
bta.group('Group', sum)
In [ ]:
bta.group('Group', np.average)
In [ ]:
observed_outcomes = Table.read_table('observed_outcomes.csv')
observed_outcomes.show()
In [ ]:
bta.group('Group', np.average).column(1)
In [ ]:
abs(0.125 - 0.6)
In [ ]:
def distance_between_group_proportions(tbl):
    proportions = tbl.group('Group', np.average).column(1)
    return abs(proportions.item(1) - proportions.item(0))
In [ ]:
observed_distance = distance_between_group_proportions(bta)
observed_distance
In [ ]:
labels = bta.select('Group')
results = bta.select('Result')
In [ ]:
# Repeat
distances = make_array()
for i in np.arange(2000):
    shuffled_results = results.sample(with_replacement=False).column(0)
    simulated = labels.with_column('Shuffled results', shuffled_results)
    distance = distance_between_group_proportions(simulated)
    distances = np.append(distances, distance)

distances
In [ ]:
Table().with_column('Distance', distances).hist(bins = np.arange(0, 1, 0.1))
plots.scatter(observed_distance, 0, color='red', s=40);
In [ ]:
np.average(distances >= observed_distance)
In [ ]:
 
In [ ]: