import pandas as pd data = pd.read_csv("nyt1.csv") data.describe() print data.shape data.head() %pylab inline data.hist(figsize=(10,8)) ##YOUR ANSWER HERE data1 = data[data.Impressions > 0] ##YOUR ANSWERS HERE data1['CTR'] = #DEFINE CTR data1['AgeGroup'] = #DEFINE AgeGroup data1.head() print data1.shape data1.describe() impressionsByAgeSignIn = data1.groupby(['AgeGroup','Signed_In'])['Clicks'].sum() impressionsByAgeSignIn.plot(kind='bar') loggedInCTRsByAgeGroup = data1[(data1.CTR > 0) & (data1.Signed_In > 0)].groupby('AgeGroup').CTR loggedInCTRsByAgeGroup.describe() from scipy.stats import ttest_ind groups = [s for s in loggedInCTRsByAgeGroup] def run_pairwise_tests(groups): for g in groups: for g2 in groups: if g[0] < g2[0]: yield g[0], g2[0], ttest_ind(g[1], g2[1])[1] testResults = pd.DataFrame(run_pairwise_tests(groups)) ## YOUR ANSWERS HERE