In [ ]:

from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

Lecture 20¶

Deflategate¶

In [ ]:

football = Table.read_table('deflategate.csv')
football.show()

In [ ]:

combined = (football.column('Blakeman')+football.column('Prioleau'))/2
football = football.drop('Blakeman', 'Prioleau').with_column(
    'Combined', 
    combined)
football.show()

In [ ]:

np.ones(5)

In [ ]:

initial_pressure = np.append(12.5 * np.ones(11), 13 * np.ones(4))
initial_pressure

In [ ]:

drop_values = initial_pressure - football.column(1)

In [ ]:

football = football.drop('Combined').with_column('Drop', drop_values)

In [ ]:

football.show()

In [ ]:

means = football.group('Team', np.average)
means

In [ ]:

observed_difference = means.column(1).item(0) - means.column(1).item(1)
observed_difference

In [ ]:

def diff_between_means(tbl):
    means = tbl.group('Team', np.average).column(1)
    return means.item(0) - means.item(1)

In [ ]:

drops = football.select('Drop')

In [ ]:

shuffled_drops = drops.sample(with_replacement = False).column(0)
shuffled_drops

In [ ]:

simulated_football = football.with_column('Drop', shuffled_drops)
simulated_football.show(3)

In [ ]:

diff_between_means(simulated_football)

In [ ]:

differences = make_array()

for i in np.arange(5000):
    shuffled_drops = drops.sample(with_replacement = False).column(0)
    simulated_football = football.with_column('Drop', shuffled_drops)
    new_diff = diff_between_means(simulated_football)
    differences = np.append(differences, new_diff)

In [ ]:

Table().with_column('Difference Between Means', differences).hist()
plots.scatter(observed_difference, 0, color='red', s=40);

In [ ]:

np.average(differences <= observed_difference)

Analyzing RCTs¶

In [ ]:

# Botulinum Toxin A (bta) as a treatment to chronic back pain
# 15 in the treatment group
# 16 in the control group (normal saline)
# trials were run double-blind (neither doctors nor patients knew which group they were in)
# Result = 1 indicates pain relief
bta = Table.read_table('bta.csv')
bta.show()

In [ ]:

bta.group('Group', sum)

In [ ]:

bta.group('Group', np.average)

In [ ]:

observed_outcomes = Table.read_table('observed_outcomes.csv')
observed_outcomes.show()

In [ ]:

bta.group('Group', np.average).column(1)

In [ ]:

abs(0.125 - 0.6)

In [ ]:

def distance_between_group_proportions(tbl):
    proportions = tbl.group('Group', np.average).column(1)
    return abs(proportions.item(1) - proportions.item(0))

In [ ]:

observed_distance = distance_between_group_proportions(bta)
observed_distance

In [ ]:

labels = bta.select('Group')
results = bta.select('Result')

In [ ]:

# Repeat
distances = make_array()
for i in np.arange(2000):
    shuffled_results = results.sample(with_replacement=False).column(0)
    simulated = labels.with_column('Shuffled results', shuffled_results)
    distance = distance_between_group_proportions(simulated)
    distances = np.append(distances, distance)

distances

In [ ]:

Table().with_column('Distance', distances).hist(bins = np.arange(0, 1, 0.1))
plots.scatter(observed_distance, 0, color='red', s=40);

In [ ]:

np.average(distances >= observed_distance)

In [ ]: