#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
np=pd.np
from sdd_api.api import Api
from credentials import *
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
api = Api(username=username, password=password, client_id=client_id, client_secret=client_secret)


# # Overview  
# For now, this notebook just covers how to use our new drives and game info tables. In the future we'll add more advanced commands and datasets

# ## Drives

# In[68]:


drives=api.get_dataframe('drives', season_start=2016)
drives.sample(4)


# In[69]:


#Calculate time of position and 
drive_features=drives.groupby(["matchup_id","team_name"], as_index=False).agg(sum)[['team_name','matchup_id','drive_time','num_plays','net_yds']]
drive_features['drive_time(m)']=drive_features['drive_time']/60
drive_features.sample(10)


# In[70]:


drive_features['yards_per_play']=drive_features['net_yds']/drive_features['num_plays']
drive_features.groupby("team_name").agg([np.min, np.max, np.median,np.average])[['yards_per_play','drive_time(m)']]


# ### Taking Drive Features and Creating In Season Stats

# In[71]:


team_game_logs=api.get_dataframe("team_game_logs", season_start=2016)


# In[72]:


team_game_logs=team_game_logs[(team_game_logs['game_type']=='Regular')&(team_game_logs['season']==2016)]


# In[73]:


#we're working to add matchup id and ensure the api knows the proper format.
#for now use this
def matchup_id(row):
    if row['game_location'] is None:
        home_name=row['team_name']
        away_name=row['opp_name']
    else:
        home_name=row['opp_name']
        away_name=row['team_name']
    return str(row['season'])+row['game_datetime'].strftime("%m%d")+"%s@%s"%(away_name, home_name)
team_game_logs['game_datetime']=pd.to_datetime(team_game_logs['game_datetime']*1000000)
team_game_logs['matchup_id']=team_game_logs.apply(matchup_id, axis=1)


# In[78]:


team_features=[]
#we'll create team level stats past on current season performance
for team_name, team_games in team_game_logs.groupby(["team_name"]):
    for week_num in team_games['week_num'].unique():
        in_season_games=team_games[team_games['week_num']<week_num]
        if len(in_season_games)<1:
            continue#could handle using last season stats
        matchup_ids=in_season_games['matchup_id'].unique()
        team_drives=drives[(drives['matchup_id'].isin(matchup_ids))&(drives['team_name']==team_name)]
        
        features={}
        drive_agg=team_drives.agg([np.mean,np.sum])
        features['average_drive_time']=drive_agg.loc['mean']['drive_time']
        totals=team_drives.agg(np.sum)
        features['time_of_posession']=totals['drive_time']
        features['yards_per_play']=totals['net_yds']/totals['num_plays']
        features['matchup_id']=team_games[team_games['week_num']==week_num]['matchup_id'].iloc[0]
        features['team_name']=team_name
        team_features.append(features)
team_features_df=pd.DataFrame(team_features).dropna()
team_features_df.sample(10)


# You can take "team_features_df" and merge with the matchups table to make predictions. You can also add home/away information by using the team_game_logs table or the matchups table

# ### Game Info  
# We've added a game info table. For now it just contains weather and grass information.

# In[80]:


game_info=api.get_dataframe("game_info")
game_info.sample(10)


# We're going to show how you can use this data to see which team plays in the coldest weather.

# In[81]:


team_games_with_weather=team_game_logs.merge(game_info)
team_games_with_weather


# In[82]:


weather_stats=team_games_with_weather[['team_name','humidity','temperature','wind']].groupby("team_name").agg([np.mean,np.min,np.max])
weather_stats


# In[83]:


#pandas has a great rank function that ranks a dataframe in ascending order.
weather_rankings=weather_stats.rank()
weather_rankings


# In[84]:


weather_rankings['wind']


# In[85]:


from IPython.display import display
def showRankings(col):
    bottom_5=weather_rankings[weather_rankings[col]['mean']<=5][col]
    top_5=weather_rankings[weather_rankings[col]['mean']>(weather_rankings[col]['mean'].max()-5)][col]
    combined=pd.concat([bottom_5, top_5])
    display(combined[['mean']].sort_values(by="mean").rename(columns={"mean": col}))
for col in['humidity','temperature','wind']:
    showRankings(col)


# Surprisingly, Chicago is 3rd and Kansas City is ranked 2nd for coldest. NOTE, this uses all games played, not just home games.

# # Scoring vs Weather  
# 

# In[86]:


matchups=api.get_dataframe("matchups")
matchups['total_pts']=matchups['home_pts']+matchups['away_pts']
weather_matchups=matchups.merge(game_info)


# In[87]:


import seaborn as sns
sns.jointplot(data=weather_matchups, x="temperature", y="over_under")
plt.title("Over/Under vs Temperature")


# In[88]:


sns.jointplot(data=weather_matchups.dropna(subset=["temperature", "total_pts"]), x="temperature", y="total_pts")


# There looks to be a small correlation but it's fuzzy at best. It would be better to look at "hardiness" of teams and see if weather makes them perform better or worse than their typical games without adjusting for weather. We'll leave the model building to you for now but may release a more comprehensive example soon!

# In[ ]: