#!/usr/bin/env python # coding: utf-8 # # Analysis of 5K race results from races run by both Jesse and Hugh # This notebook analyzes 5K races run by Jesse Bloom and Hugh Haddox. # # ### import modules # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import scipy.stats import pandas import matplotlib import matplotlib.pyplot as plt import seaborn seaborn.set_context('notebook', font_scale=2) # ### Read in race results # First, we read in a table of race results manually compiled from the internet. # In[2]: results = pandas.read_csv('race_results.csv') print(results.to_string(index=False)) # ### Plot race results over time # Next, we look at how the race results have changed over time for each runner, drawing a dashed line at the critical threshold of 20 minutes. # In[3]: results_melt = pandas.melt(results, id_vars=['year'], value_vars=['Hugh', 'Jesse'], var_name='runner', value_name='time') seaborn.pointplot(x='year', y='time', data=results_melt, hue='runner', fit_reg=False) plt.ylabel('time (minutes)') plt.axhline(y=20, color='black', linestyle='--', linewidth=1) plt.show() # ### Examine if there is a significant difference between runners # We perform statistical tests to determine if the runners have significantly different times. # # First, we plot the distributions for the two runners. # In[4]: seaborn.stripplot(data=results_melt, x='runner', y='time', s=10) plt.ylabel('time (minutes)') plt.show() # Then we test if they are significantly different using the non-parametric [Mann-Whitney test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test). # In[5]: scipy.stats.mannwhitneyu(results['Hugh'], results['Jesse']) # Therefore, we can conclude with *P < 0.05* that Jesse's times are significantly faster than Hugh's. # In[ ]: