#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd np=pd.np from sdd_api.api import Api from credentials import * import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') api = Api(username=username, password=password, client_id=client_id, client_secret=client_secret) # # Overview # For now, this notebook just covers how to use our new drives and game info tables. In the future we'll add more advanced commands and datasets # ## Drives # In[68]: drives=api.get_dataframe('drives', season_start=2016) drives.sample(4) # In[69]: #Calculate time of position and drive_features=drives.groupby(["matchup_id","team_name"], as_index=False).agg(sum)[['team_name','matchup_id','drive_time','num_plays','net_yds']] drive_features['drive_time(m)']=drive_features['drive_time']/60 drive_features.sample(10) # In[70]: drive_features['yards_per_play']=drive_features['net_yds']/drive_features['num_plays'] drive_features.groupby("team_name").agg([np.min, np.max, np.median,np.average])[['yards_per_play','drive_time(m)']] # ### Taking Drive Features and Creating In Season Stats # In[71]: team_game_logs=api.get_dataframe("team_game_logs", season_start=2016) # In[72]: team_game_logs=team_game_logs[(team_game_logs['game_type']=='Regular')&(team_game_logs['season']==2016)] # In[73]: #we're working to add matchup id and ensure the api knows the proper format. #for now use this def matchup_id(row): if row['game_location'] is None: home_name=row['team_name'] away_name=row['opp_name'] else: home_name=row['opp_name'] away_name=row['team_name'] return str(row['season'])+row['game_datetime'].strftime("%m%d")+"%s@%s"%(away_name, home_name) team_game_logs['game_datetime']=pd.to_datetime(team_game_logs['game_datetime']*1000000) team_game_logs['matchup_id']=team_game_logs.apply(matchup_id, axis=1) # In[78]: team_features=[] #we'll create team level stats past on current season performance for team_name, team_games in team_game_logs.groupby(["team_name"]): for week_num in team_games['week_num'].unique(): in_season_games=team_games[team_games['week_num'](weather_rankings[col]['mean'].max()-5)][col] combined=pd.concat([bottom_5, top_5]) display(combined[['mean']].sort_values(by="mean").rename(columns={"mean": col})) for col in['humidity','temperature','wind']: showRankings(col) # Surprisingly, Chicago is 3rd and Kansas City is ranked 2nd for coldest. NOTE, this uses all games played, not just home games. # # Scoring vs Weather # # In[86]: matchups=api.get_dataframe("matchups") matchups['total_pts']=matchups['home_pts']+matchups['away_pts'] weather_matchups=matchups.merge(game_info) # In[87]: import seaborn as sns sns.jointplot(data=weather_matchups, x="temperature", y="over_under") plt.title("Over/Under vs Temperature") # In[88]: sns.jointplot(data=weather_matchups.dropna(subset=["temperature", "total_pts"]), x="temperature", y="total_pts") # There looks to be a small correlation but it's fuzzy at best. It would be better to look at "hardiness" of teams and see if weather makes them perform better or worse than their typical games without adjusting for weather. We'll leave the model building to you for now but may release a more comprehensive example soon! # In[ ]: