In [37]:
import pandas as pd
np=pd.np
from sdd_api.api import Api
from credentials import *
import matplotlib.pyplot as plt
%matplotlib inline
pd.options.display.max_columns=None
api = Api(username=username, password=password, client_id=client_id, client_secret=client_secret)
In [38]:
matchups=api.get_dataframe("matchups",season_start=2016)
matchups.sort_values(by="game_datetime")
Out[38]:
season home_name away_name full_game_type away_first_downs away_pass_yds away_pts away_rush_yds away_to away_yards game_datetime game_location game_outcome game_time game_type had_overtime home_first_downs home_pass_yds home_pts home_rush_yds home_to home_yards teams week_num away_conference conference_play home_line over_under handicap_difference home_conference line_moves matchup_id
76 2016 DEN CAR Week 1 21.0 176.0 20.0 157.0 1.0 333.0 1473367200000 None W 8:40PM ET Regular False 21.0 159.0 21.0 148.0 3.0 307.0 CAR,DEN 1.0 NFC Inter 3.0 40.5 3.0 AFC 19 [email protected]
119 2016 JAC GB Week 1 18.0 199.0 27.0 95.0 NaN 294.0 1473599040000 None L 1:04PM ET Regular False 20.0 300.0 23.0 48.0 1.0 348.0 GB,JAC 1.0 NFC Inter 3.5 47.0 0.5 AFC 24 [email protected]
18 2016 BAL BUF Week 1 11.0 95.0 7.0 65.0 NaN 160.0 1473599040000 None W 1:04PM ET Regular False 18.0 225.0 13.0 83.0 1.0 308.0 BAL,BUF 1.0 AFC AFC -3.0 44.5 0.5 AFC 4 [email protected]
17 2016 ATL TB Week 1 20.0 281.0 31.0 90.0 1.0 371.0 1473599100000 None L 1:05PM ET Regular False 19.0 322.0 24.0 52.0 NaN 374.0 ATL,TB 1.0 NFC NFC -2.5 47.0 0.5 NFC 18 [email protected]
183 2016 NO OAK Week 1 25.0 319.0 35.0 167.0 NaN 486.0 1473599100000 None L 1:05PM ET Regular False 27.0 419.0 34.0 88.0 1.0 507.0 NO,OAK 1.0 AFC Inter -3.0 50.5 2.0 NFC 25 [email protected]
211 2016 PHI CLE Week 1 14.0 168.0 10.0 120.0 1.0 288.0 1473599100000 None W 1:05PM ET Regular False 23.0 270.0 29.0 133.0 NaN 403.0 CLE,PHI 1.0 AFC Inter -4.0 41.5 3.0 NFC 36 [email protected]
127 2016 KC LAC Week 1 25.0 233.0 27.0 155.0 NaN 388.0 1473599100000 None W 1:05PM ET Regular True 26.0 330.0 33.0 83.0 1.0 413.0 KC,LAC 1.0 AFC AFC -6.5 46.0 0.5 AFC 12 [email protected]
196 2016 NYJ CIN Week 1 18.0 324.0 23.0 57.0 1.0 381.0 1473599100000 None L 1:05PM ET Regular False 22.0 188.0 22.0 152.0 1.0 340.0 CIN,NYJ 1.0 AFC AFC 1.0 42.0 0.0 AFC 30 [email protected]
258 2016 TEN MIN Week 1 15.0 236.0 25.0 65.0 NaN 301.0 1473599100000 None L 1:05PM ET Regular False 19.0 252.0 16.0 64.0 3.0 316.0 MIN,TEN 1.0 NFC Inter 2.5 40.0 0.5 AFC 32 [email protected]
100 2016 HOU CHI Week 1 14.0 185.0 14.0 73.0 1.0 258.0 1473599100000 None W 1:05PM ET Regular False 17.0 215.0 23.0 129.0 1.0 344.0 CHI,HOU 1.0 NFC Inter -5.5 42.5 0.0 AFC 27 [email protected]
233 2016 SEA MIA Week 1 11.0 150.0 10.0 64.0 NaN 214.0 1473610080000 None W 4:08PM ET Regular False 21.0 240.0 12.0 112.0 2.0 352.0 MIA,SEA 1.0 AFC Inter -10.5 44.0 3.0 NFC 40 [email protected]
71 2016 DAL NYG Week 1 18.0 203.0 20.0 113.0 1.0 316.0 1473611220000 None L 4:27PM ET Regular False 24.0 227.0 19.0 101.0 NaN 328.0 DAL,NYG 1.0 NFC NFC 1.0 48.0 4.5 NFC 33 [email protected]
110 2016 IND DET Week 1 28.0 332.0 39.0 116.0 NaN 448.0 1473611220000 None L 4:27PM ET Regular False 25.0 368.0 35.0 82.0 NaN 450.0 DET,IND 1.0 NFC Inter -2.5 51.0 3.0 AFC 31 [email protected]
1 2016 ARI NE Week 1 19.0 257.0 23.0 106.0 2.0 363.0 1473622200000 None L 7:30PM ET Regular False 21.0 252.0 21.0 92.0 NaN 344.0 ARI,NE 1.0 AFC Inter -9.0 44.5 9.0 NFC 19 [email protected]
267 2016 WAS PIT Week 1 23.0 290.0 38.0 147.0 1.0 437.0 1473707460000 None L 7:11PM ET Regular False 18.0 329.0 16.0 55.0 2.0 384.0 PIT,WAS 1.0 AFC Inter 2.5 49.0 0.5 NFC 13 [email protected]
238 2016 SF LAR Week 1 10.0 120.0 0.0 65.0 2.0 185.0 1473718800000 None W 10:20PM ET Regular False 28.0 170.0 28.0 150.0 1.0 320.0 LAR,SF 1.0 NFC NFC 2.5 43.0 0.5 NFC 9 [email protected]
31 2016 BUF NYJ Week 2 28.0 370.0 37.0 123.0 1.0 493.0 1473971160000 None L 8:26PM ET Regular False 16.0 307.0 31.0 86.0 1.0 393.0 BUF,NYJ 2.0 AFC AFC -1.0 40.5 1.5 AFC 29 [email protected]
105 2016 HOU KC Week 2 14.0 172.0 12.0 119.0 3.0 291.0 1474203720000 None W 1:02PM ET Regular False 15.0 254.0 19.0 97.0 2.0 351.0 HOU,KC 2.0 AFC AFC -1.0 42.0 0.5 AFC 28 [email protected]
89 2016 DET TEN Week 2 22.0 224.0 16.0 139.0 1.0 363.0 1474203720000 None L 1:02PM ET Regular False 22.0 238.0 15.0 137.0 1.0 375.0 DET,TEN 2.0 AFC Inter -6.0 48.0 0.5 NFC 17 [email protected]
58 2016 CLE BAL Week 2 22.0 302.0 25.0 80.0 2.0 382.0 1474203720000 None L 1:02PM ET Regular False 17.0 242.0 20.0 145.0 2.0 387.0 BAL,CLE 2.0 AFC AFC 4.0 42.0 1.0 AFC 26 [email protected]
173 2016 NE MIA Week 2 23.0 387.0 24.0 70.0 4.0 457.0 1474203720000 None W 1:02PM ET Regular False 29.0 302.0 31.0 161.0 1.0 463.0 MIA,NE 2.0 AFC AFC -5.5 42.5 1.0 AFC 15 [email protected]
219 2016 PIT CIN Week 2 21.0 366.0 16.0 46.0 2.0 412.0 1474203720000 None W 1:02PM ET Regular False 19.0 250.0 24.0 124.0 2.0 374.0 CIN,PIT 2.0 AFC AFC -3.0 48.5 0.5 AFC 10 [email protected]
191 2016 NYG NO Week 2 16.0 247.0 13.0 41.0 NaN 288.0 1474203780000 None W 1:03PM ET Regular False 22.0 353.0 16.0 64.0 3.0 417.0 NO,NYG 2.0 NFC NFC -3.5 54.0 0.5 NFC 16 [email protected]
262 2016 WAS DAL Week 2 24.0 278.0 27.0 102.0 1.0 380.0 1474203780000 None L 1:03PM ET Regular False 24.0 350.0 23.0 82.0 1.0 432.0 DAL,WAS 2.0 NFC NFC -3.5 47.0 0.0 NFC 20 [email protected]
40 2016 CAR SF Week 2 16.0 237.0 27.0 65.0 3.0 302.0 1474203780000 None W 1:03PM ET Regular False 26.0 353.0 46.0 176.0 4.0 529.0 CAR,SF 2.0 NFC NFC -12.0 44.5 0.5 NFC 25 [email protected]
6 2016 ARI TB Week 2 21.0 221.0 7.0 85.0 5.0 306.0 1474203900000 None W 1:05PM ET Regular False 20.0 311.0 40.0 105.0 NaN 416.0 ARI,TB 2.0 NFC NFC -7.0 49.5 0.5 NFC 15 [email protected]
148 2016 LAR SEA Week 2 17.0 239.0 3.0 67.0 1.0 306.0 1474214880000 None W 4:08PM ET Regular False 17.0 219.0 9.0 64.0 NaN 283.0 LAR,SEA 2.0 NFC NFC 5.5 38.0 2.0 NFC 40 [email protected]
135 2016 LAC JAC Week 2 20.0 319.0 14.0 69.0 3.0 388.0 1474215900000 None W 4:25PM ET Regular False 25.0 207.0 38.0 150.0 1.0 357.0 JAC,LAC 2.0 AFC AFC -3.0 47.5 0.5 AFC 13 [email protected]
78 2016 DEN IND Week 2 19.0 170.0 20.0 83.0 2.0 253.0 1474215900000 None W 4:25PM ET Regular False 24.0 266.0 34.0 134.0 1.0 400.0 DEN,IND 2.0 AFC AFC -6.0 47.0 2.0 AFC 14 [email protected]
202 2016 OAK ATL Week 2 27.0 389.0 35.0 139.0 1.0 528.0 1474215900000 None L 4:25PM ET Regular False 29.0 299.0 28.0 155.0 NaN 454.0 ATL,OAK 2.0 NFC Inter -4.0 47.5 2.5 AFC 14 [email protected]
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
123 2017 KC MIA Week 16 15.0 286.0 13.0 59.0 2.0 345.0 1514120400000 W 1:00PM ET Regular False 23.0 301.0 29.0 103.0 NaN 404.0 KC,MIA 16.0 AFC AFC -11.0 43.5 1.0 AFC None [email protected]
250 2017 WAS DEN Week 16 18.0 171.0 11.0 159.0 3.0 330.0 1514120400000 W 1:00PM ET Regular False 19.0 299.0 27.0 87.0 2.0 386.0 DEN,WAS 16.0 AFC Inter -3.0 39.5 1.0 NFC None [email protected]
39 2017 CAR TB Week 16 20.0 326.0 19.0 66.0 3.0 392.0 1514120400000 W 1:00PM ET Regular False 20.0 140.0 22.0 115.0 1.0 255.0 CAR,TB 16.0 NFC NFC -10.0 47.0 1.0 NFC None [email protected]
245 2017 TEN LAR Week 16 20.0 286.0 27.0 116.0 1.0 402.0 1514120400000 L 1:00PM ET Regular False 19.0 269.0 23.0 97.0 1.0 366.0 LAR,TEN 16.0 NFC Inter 5.5 46.5 1.0 AFC None [email protected]
42 2017 CHI CLE Week 16 12.0 178.0 3.0 75.0 3.0 253.0 1514120400000 W 1:00PM ET Regular False 16.0 161.0 20.0 97.0 NaN 258.0 CHI,CLE 16.0 AFC Inter -6.0 37.0 1.0 NFC None [email protected]
161 2017 NE BUF Week 16 17.0 242.0 16.0 84.0 NaN 326.0 1514120400000 W 1:00PM ET Regular False 28.0 218.0 37.0 193.0 1.0 411.0 BUF,NE 16.0 AFC AFC -11.0 47.5 0.5 AFC None [email protected]
168 2017 NO ATL Week 16 15.0 264.0 13.0 67.0 2.0 331.0 1514120400000 W 1:00PM ET Regular False 15.0 229.0 23.0 86.0 1.0 315.0 ATL,NO 16.0 NFC NFC -5.5 52.0 0.5 NFC None [email protected]
189 2017 NYJ LAC Week 16 21.0 290.0 14.0 89.0 NaN 379.0 1514120400000 L 1:00PM ET Regular False 13.0 98.0 7.0 197.0 3.0 295.0 LAC,NYJ 16.0 AFC AFC 7.0 42.5 0.5 AFC None [email protected]
52 2017 CIN DET Week 16 15.0 189.0 17.0 87.0 1.0 276.0 1514120400000 W 1:00PM ET Regular False 28.0 222.0 26.0 142.0 1.0 364.0 CIN,DET 16.0 NFC Inter 3.0 43.5 0.0 AFC None [email protected]
227 2017 SF JAC Week 16 32.0 380.0 33.0 92.0 3.0 472.0 1514131500000 W 4:05PM ET Regular False 26.0 238.0 44.0 131.0 1.0 369.0 JAC,SF 16.0 AFC Inter 4.0 43.0 1.5 NFC None [email protected]
70 2017 DAL SEA Week 16 15.0 60.0 21.0 76.0 NaN 136.0 1514132700000 L 4:25PM ET Regular False 21.0 155.0 12.0 128.0 3.0 283.0 DAL,SEA 16.0 NFC NFC -4.5 47.5 1.5 NFC None [email protected]
3 2017 ARI NYG Week 16 12.0 250.0 0.0 43.0 3.0 293.0 1514132700000 W 4:25PM ET Regular False 19.0 215.0 23.0 74.0 2.0 289.0 ARI,NYG 16.0 NFC NFC -3.0 39.0 2.0 NFC None [email protected]
101 2017 HOU PIT Week 16 20.0 226.0 34.0 104.0 NaN 330.0 1514219400000 L 4:30PM ET Regular False 13.0 51.0 6.0 176.0 2.0 227.0 HOU,PIT 16.0 AFC AFC 9.0 45.5 1.0 AFC None [email protected]
205 2017 PHI OAK Week 16 13.0 137.0 10.0 137.0 5.0 274.0 1514233800000 W 8:30PM ET Regular False 12.0 138.0 19.0 78.0 2.0 216.0 OAK,PHI 16.0 AFC Inter -10.0 46.0 2.0 NFC None [email protected]
202 2017 PHI DAL Week 17 NaN NaN NaN NaN NaN NaN 1514725200000 1:00PM ET Regular False NaN NaN NaN NaN NaN NaN DAL,PHI 17.0 NFC NFC NaN NaN NaN NFC None [email protected]
210 2017 PIT CLE Week 17 NaN NaN NaN NaN NaN NaN 1514725200000 1:00PM ET Regular False NaN NaN NaN NaN NaN NaN CLE,PIT 17.0 AFC AFC NaN NaN NaN AFC None [email protected]
167 2017 NE NYJ Week 17 NaN NaN NaN NaN NaN NaN 1514725200000 1:00PM ET Regular False NaN NaN NaN NaN NaN NaN NE,NYJ 17.0 AFC AFC NaN NaN NaN AFC None [email protected]
153 2017 MIN CHI Week 17 NaN NaN NaN NaN NaN NaN 1514725200000 1:00PM ET Regular False NaN NaN NaN NaN NaN NaN CHI,MIN 17.0 NFC NFC NaN NaN NaN NFC None [email protected]
85 2017 DET GB Week 17 NaN NaN NaN NaN NaN NaN 1514725200000 1:00PM ET Regular False NaN NaN NaN NaN NaN NaN DET,GB 17.0 NFC NFC NaN NaN NaN NFC None [email protected]
183 2017 NYG WAS Week 17 NaN NaN NaN NaN NaN NaN 1514725200000 1:00PM ET Regular False NaN NaN NaN NaN NaN NaN NYG,WAS 17.0 NFC NFC NaN NaN NaN NFC None [email protected]
107 2017 IND HOU Week 17 NaN NaN NaN NaN NaN NaN 1514725200000 1:00PM ET Regular False NaN NaN NaN NaN NaN NaN HOU,IND 17.0 AFC AFC NaN NaN NaN AFC None [email protected]
74 2017 DEN KC Week 17 NaN NaN NaN NaN NaN NaN 1514737500000 4:25PM ET Regular False NaN NaN NaN NaN NaN NaN DEN,KC 17.0 AFC AFC NaN NaN NaN AFC None [email protected]
142 2017 LAR SF Week 17 NaN NaN NaN NaN NaN NaN 1514737500000 4:25PM ET Regular False NaN NaN NaN NaN NaN NaN LAR,SF 17.0 NFC NFC NaN NaN NaN NFC None [email protected]
9 2017 ATL CAR Week 17 NaN NaN NaN NaN NaN NaN 1514737500000 4:25PM ET Regular False NaN NaN NaN NaN NaN NaN ATL,CAR 17.0 NFC NFC NaN NaN NaN NFC None [email protected]
17 2017 BAL CIN Week 17 NaN NaN NaN NaN NaN NaN 1514737500000 4:25PM ET Regular False NaN NaN NaN NaN NaN NaN BAL,CIN 17.0 AFC AFC NaN NaN NaN AFC None [email protected]
133 2017 LAC OAK Week 17 NaN NaN NaN NaN NaN NaN 1514737500000 4:25PM ET Regular False NaN NaN NaN NaN NaN NaN LAC,OAK 17.0 AFC AFC NaN NaN NaN AFC None [email protected]
237 2017 TB NO Week 17 NaN NaN NaN NaN NaN NaN 1514737500000 4:25PM ET Regular False NaN NaN NaN NaN NaN NaN NO,TB 17.0 NFC NFC NaN NaN NaN NFC None [email protected]
216 2017 SEA ARI Week 17 NaN NaN NaN NaN NaN NaN 1514737500000 4:25PM ET Regular False NaN NaN NaN NaN NaN NaN ARI,SEA 17.0 NFC NFC NaN NaN NaN NFC None [email protected]
244 2017 TEN JAC Week 17 NaN NaN NaN NaN NaN NaN 1514737500000 4:25PM ET Regular False NaN NaN NaN NaN NaN NaN JAC,TEN 17.0 AFC AFC NaN NaN NaN AFC None [email protected]
144 2017 MIA BUF Week 17 NaN NaN NaN NaN NaN NaN 1514737500000 4:25PM ET Regular False NaN NaN NaN NaN NaN NaN BUF,MIA 17.0 AFC AFC NaN NaN NaN AFC None [email protected]

524 rows × 32 columns

In [39]:
matchups.sort_values(by="game_datetime").groupby("season").agg(len)
Out[39]:
home_name away_name full_game_type away_first_downs away_pass_yds away_pts away_rush_yds away_to away_yards game_datetime game_location game_outcome game_time game_type had_overtime home_first_downs home_pass_yds home_pts home_rush_yds home_to home_yards teams week_num away_conference conference_play home_line over_under handicap_difference home_conference line_moves matchup_id
season
2016 268 268 268 268.0 268.0 268.0 268.0 268.0 268.0 268 268 268 268 268 268 268.0 268.0 268.0 268.0 268.0 268.0 268 268.0 268 268 268.0 268.0 268.0 268 268 268
2017 256 256 256 256.0 256.0 256.0 256.0 256.0 256.0 256 256 256 256 256 256 256.0 256.0 256.0 256.0 256.0 256.0 256 256.0 256 256 256.0 256.0 256.0 256 256 256

Lets Predict the Game Winner

We'll use data from 2011 season and on since we have line information for these games

In [41]:
matchups=matchups[(matchups['game_type'].isin(['Regular','Playoffs']))]
#drop any unscored/unplayed games
matchups=matchups.dropna(subset=["home_pts","away_pts"])
matchups=matchups.sort_values(by=["game_datetime"])
matchups.tail(20)
Out[41]:
season home_name away_name full_game_type away_first_downs away_pass_yds away_pts away_rush_yds away_to away_yards game_datetime game_location game_outcome game_time game_type had_overtime home_first_downs home_pass_yds home_pts home_rush_yds home_to home_yards teams week_num away_conference conference_play home_line over_under handicap_difference home_conference line_moves matchup_id
214 2017 PIT NE Week 15 21.0 283.0 27.0 77.0 1.0 360.0 1513527900000 L 4:25PM ET Regular False 21.0 270.0 24.0 143.0 1.0 413.0 NE,PIT 15.0 AFC AFC 2.5 52.5 1.5 AFC None [email protected]
231 2017 SF TEN Week 15 23.0 238.0 23.0 90.0 1.0 328.0 1513527900000 W 4:25PM ET Regular False 23.0 363.0 25.0 51.0 NaN 414.0 SF,TEN 15.0 AFC Inter -2.5 44.5 2.5 NFC None [email protected]
193 2017 OAK DAL Week 15 21.0 204.0 20.0 126.0 2.0 330.0 1513542600000 L 8:30PM ET Regular False 19.0 171.0 17.0 122.0 1.0 293.0 DAL,OAK 15.0 NFC Inter 3.0 46.5 1.5 AFC None [email protected]
232 2017 TB ATL Week 15 24.0 209.0 24.0 201.0 NaN 410.0 1513629000000 L 8:30PM ET Regular False 22.0 289.0 21.0 84.0 1.0 373.0 ATL,TB 15.0 NFC NFC 7.0 49.5 3.0 NFC None [email protected]
21 2017 BAL IND Week 16 17.0 203.0 16.0 93.0 NaN 296.0 1514046600000 W 4:30PM ET Regular False 23.0 220.0 23.0 103.0 NaN 323.0 BAL,IND 16.0 AFC AFC -13.5 41.0 1.0 AFC None [email protected]
92 2017 GB MIN Week 16 15.0 124.0 16.0 112.0 NaN 236.0 1514061000000 L 8:30PM ET Regular False 12.0 126.0 0.0 113.0 2.0 239.0 GB,MIN 16.0 NFC NFC 9.0 41.0 0.0 NFC None [email protected]
39 2017 CAR TB Week 16 20.0 326.0 19.0 66.0 3.0 392.0 1514120400000 W 1:00PM ET Regular False 20.0 140.0 22.0 115.0 1.0 255.0 CAR,TB 16.0 NFC NFC -10.0 47.0 1.0 NFC None [email protected]
123 2017 KC MIA Week 16 15.0 286.0 13.0 59.0 2.0 345.0 1514120400000 W 1:00PM ET Regular False 23.0 301.0 29.0 103.0 NaN 404.0 KC,MIA 16.0 AFC AFC -11.0 43.5 1.0 AFC None [email protected]
161 2017 NE BUF Week 16 17.0 242.0 16.0 84.0 NaN 326.0 1514120400000 W 1:00PM ET Regular False 28.0 218.0 37.0 193.0 1.0 411.0 BUF,NE 16.0 AFC AFC -11.0 47.5 0.5 AFC None [email protected]
245 2017 TEN LAR Week 16 20.0 286.0 27.0 116.0 1.0 402.0 1514120400000 L 1:00PM ET Regular False 19.0 269.0 23.0 97.0 1.0 366.0 LAR,TEN 16.0 NFC Inter 5.5 46.5 1.0 AFC None [email protected]
189 2017 NYJ LAC Week 16 21.0 290.0 14.0 89.0 NaN 379.0 1514120400000 L 1:00PM ET Regular False 13.0 98.0 7.0 197.0 3.0 295.0 LAC,NYJ 16.0 AFC AFC 7.0 42.5 0.5 AFC None [email protected]
42 2017 CHI CLE Week 16 12.0 178.0 3.0 75.0 3.0 253.0 1514120400000 W 1:00PM ET Regular False 16.0 161.0 20.0 97.0 NaN 258.0 CHI,CLE 16.0 AFC Inter -6.0 37.0 1.0 NFC None [email protected]
168 2017 NO ATL Week 16 15.0 264.0 13.0 67.0 2.0 331.0 1514120400000 W 1:00PM ET Regular False 15.0 229.0 23.0 86.0 1.0 315.0 ATL,NO 16.0 NFC NFC -5.5 52.0 0.5 NFC None [email protected]
52 2017 CIN DET Week 16 15.0 189.0 17.0 87.0 1.0 276.0 1514120400000 W 1:00PM ET Regular False 28.0 222.0 26.0 142.0 1.0 364.0 CIN,DET 16.0 NFC Inter 3.0 43.5 0.0 AFC None [email protected]
250 2017 WAS DEN Week 16 18.0 171.0 11.0 159.0 3.0 330.0 1514120400000 W 1:00PM ET Regular False 19.0 299.0 27.0 87.0 2.0 386.0 DEN,WAS 16.0 AFC Inter -3.0 39.5 1.0 NFC None [email protected]
227 2017 SF JAC Week 16 32.0 380.0 33.0 92.0 3.0 472.0 1514131500000 W 4:05PM ET Regular False 26.0 238.0 44.0 131.0 1.0 369.0 JAC,SF 16.0 AFC Inter 4.0 43.0 1.5 NFC None [email protected]
70 2017 DAL SEA Week 16 15.0 60.0 21.0 76.0 NaN 136.0 1514132700000 L 4:25PM ET Regular False 21.0 155.0 12.0 128.0 3.0 283.0 DAL,SEA 16.0 NFC NFC -4.5 47.5 1.5 NFC None [email protected]
3 2017 ARI NYG Week 16 12.0 250.0 0.0 43.0 3.0 293.0 1514132700000 W 4:25PM ET Regular False 19.0 215.0 23.0 74.0 2.0 289.0 ARI,NYG 16.0 NFC NFC -3.0 39.0 2.0 NFC None [email protected]
101 2017 HOU PIT Week 16 20.0 226.0 34.0 104.0 NaN 330.0 1514219400000 L 4:30PM ET Regular False 13.0 51.0 6.0 176.0 2.0 227.0 HOU,PIT 16.0 AFC AFC 9.0 45.5 1.0 AFC None [email protected]
205 2017 PHI OAK Week 16 13.0 137.0 10.0 137.0 5.0 274.0 1514233800000 W 8:30PM ET Regular False 12.0 138.0 19.0 78.0 2.0 216.0 OAK,PHI 16.0 AFC Inter -10.0 46.0 2.0 NFC None [email protected]
In [42]:
matchups.describe()
Out[42]:
season away_first_downs away_pass_yds away_pts away_rush_yds away_to away_yards game_datetime home_first_downs home_pass_yds home_pts home_rush_yds home_to home_yards week_num home_line over_under handicap_difference
count 507.000000 507.000000 507.000000 507.000000 507.000000 370.000000 507.000000 5.070000e+02 507.000000 507.000000 507.000000 507.000000 359.000000 507.000000 507.000000 507.000000 507.000000 507.000000
mean 2016.473373 19.136095 229.378698 21.043393 104.074951 1.943243 333.453649 1.493335e+12 20.558185 239.579882 23.737673 114.165680 1.874652 353.745562 8.658777 -2.074951 45.075937 1.345168
std 0.499784 4.901321 73.878857 9.448743 49.056053 1.089253 82.230752 1.574378e+10 4.751749 72.933853 10.041624 48.685043 1.021226 79.238797 4.928735 5.483023 4.045035 1.361233
min 2016.000000 6.000000 6.000000 0.000000 6.000000 1.000000 100.000000 1.473367e+12 5.000000 51.000000 0.000000 14.000000 1.000000 123.000000 1.000000 -17.000000 36.500000 0.000000
25% 2016.000000 16.000000 178.000000 14.000000 67.000000 1.000000 279.000000 1.478443e+12 17.000000 189.000000 17.000000 80.000000 1.000000 295.500000 4.000000 -6.000000 42.000000 0.500000
50% 2016.000000 19.000000 228.000000 20.000000 95.000000 2.000000 333.000000 1.483288e+12 20.000000 238.000000 24.000000 106.000000 2.000000 349.000000 9.000000 -3.000000 44.500000 1.000000
75% 2017.000000 22.000000 281.500000 27.000000 134.000000 2.000000 387.000000 1.509282e+12 24.000000 287.000000 30.000000 142.000000 2.000000 408.000000 13.000000 2.500000 47.500000 2.000000
max 2017.000000 33.000000 498.000000 51.000000 298.000000 8.000000 626.000000 1.514234e+12 37.000000 486.000000 57.000000 313.000000 5.000000 589.000000 17.000000 11.500000 60.000000 9.000000

We will use 538's Elo Algorithm

The features for this model are:

  • game location (home, away, neutral)
  • team score
  • opponent score
In [43]:
matchups['is_neutral']=matchups['game_location'].apply(lambda x: True if x=="N" else False)
In [44]:
from collections import defaultdict
def silverK(MOV, elo_diff):
    K_0=20
    multiplier=np.log(abs(MOV)+1)*(2.2/((elo_diff)*.001+2.2))
    return K_0*multiplier,K_0*multiplier

def silverS(home_score, away_score):
    S_home,S_away=0,0
    if home_score>away_score:
        S_home=1
    elif away_score>home_score:
        S_away=1
    else:
        S_home,S_away=.5,.5
    return S_home,S_away

def silver_elo_update(home_score, away_score, home_rating, away_rating, isNeutral):
    HOME_AD=65.
    if not isNeutral:
        pass#home_rating+=HOME_AD
    E_home = elo_prediction(home_rating,away_rating)
    E_away=1-E_home
    elo_diff=home_rating-away_rating
    MOV=home_score-away_score
    
    S_home,S_away = silverS(home_score,away_score)
    if home_score>=away_score:
        elo_winner=home_rating
        elo_loser=away_rating
    else:
        elo_loser=home_rating
        elo_winner=away_rating

    K_home,K_away =  silverK(MOV,elo_winner-elo_loser)
        
    return K_home*(S_home-E_home),K_away*(S_away-E_away)

def elo_prediction(home_rating,away_rating):
    E_home = 1./(1 + 10 ** ((away_rating - home_rating) / (400.)))
    return E_home

def score_prediction(home_rating,away_rating):
    return (home_rating-away_rating)/25.
class HeadToHeadModel(object):
    def __init__(self, events, update_function, prediction_function=None):
        self.update_function=update_function
        self.events=events
        self.ratings=defaultdict(lambda: 1505)
        self.prediction_function = prediction_function
        self.predictions = []
        self.curr_season=defaultdict(lambda: self.events[0][1]['season'])

    def compute_elo_ratings(self):
        for idx, event in self.events:
            new_year=event['season']
            label_i=event['home_name']
            label_j=event['away_name']
            
            if self.curr_season[label_i]!=new_year:
                self.curr_season[label_i]=new_year
                self.ratings[label_i]=self.ratings[label_i]*1/3+1505.*.75
            elif self.curr_season[label_j]!=new_year:
                self.curr_season[label_j]=new_year
                self.ratings[label_j]=self.ratings[label_j]*.75+1505.*.25
            #todo change below to just use event
            update=self.update_function(event['home_pts'],event['away_pts'], self.ratings[label_i], self.ratings[label_j], event['is_neutral'])
            self.ratings[label_i]+=update[0]
            self.ratings[label_j]+=update[1]
            

    def power_rankings(self):
        from operator import itemgetter
        #sort dictionary by value to get ascending list of teams
        power_rankings = sorted(self.ratings.items(), key=itemgetter(1), reverse=True)
        power = []
        #Make the 0-th team 1st
        for i, x in enumerate(power_rankings):
            power.append((i + 1, x))
        return power
m=HeadToHeadModel(list(matchups.iterrows()), silver_elo_update, elo_prediction)
m.compute_elo_ratings()
m.power_rankings()
Out[44]:
[(1, ('NE', 1806.2242573188869)),
 (2, ('PHI', 1792.6452135564753)),
 (3, ('NO', 1787.9023382585831)),
 (4, ('MIN', 1776.1899411706868)),
 (5, ('PIT', 1764.1860335994625)),
 (6, ('LAR', 1752.3472338578288)),
 (7, ('CAR', 1721.6010986323781)),
 (8, ('ATL', 1712.6057392284827)),
 (9, ('BAL', 1708.0830352371711)),
 (10, ('KC', 1701.5410154038477)),
 (11, ('SEA', 1700.7222410483578)),
 (12, ('JAC', 1686.923804818282)),
 (13, ('LAC', 1677.9601149524478)),
 (14, ('DAL', 1652.7571320137151)),
 (15, ('DET', 1619.2894998012846)),
 (16, ('BUF', 1614.5297022791335)),
 (17, ('WAS', 1606.3205507025741)),
 (18, ('TEN', 1605.7120729931769)),
 (19, ('GB', 1591.2914627291802)),
 (20, ('ARI', 1585.8295026335654)),
 (21, ('CHI', 1563.7927380700064)),
 (22, ('MIA', 1560.0262753096611)),
 (23, ('SF', 1559.138656280099)),
 (24, ('OAK', 1559.0911884068314)),
 (25, ('CIN', 1558.1544925397311)),
 (26, ('NYJ', 1542.1580233534039)),
 (27, ('TB', 1535.4824356996987)),
 (28, ('DEN', 1530.5459957956239)),
 (29, ('HOU', 1496.8524064385265)),
 (30, ('IND', 1473.2985494125467)),
 (31, ('NYG', 1461.1059097051573)),
 (32, ('CLE', 1372.3462344241079))]

What Good are Rankings without Testing?

Elo is a robust algorithm for strength of schedule based ratings. But what good are rankings without testing them?!?. Let's test our rankings at picking the winners and against the spread.

In [45]:
#Let's redo our elo model with the addition of elo ranking output
class HeadToHeadModel(object):
    def __init__(self, events, update_function, prediction_function=None):
        self.update_function=update_function
        self.events=events
        self.ratings=defaultdict(lambda: 1505)
        self.prediction_function = prediction_function
        self.predictions = []
        self.curr_season=defaultdict(lambda: self.events[0][1]['season'])
        self.elos=[]
    def compute_elo_ratings(self):
        for idx, event in self.events:
            new_year=event['season']
            label_i=event['home_name']
            label_j=event['away_name']
            
            if self.curr_season[label_i]!=new_year:
                self.curr_season[label_i]=new_year
                self.ratings[label_i]=self.ratings[label_i]*1/3+1505.*2/3
            elif self.curr_season[label_j]!=new_year:
                self.curr_season[label_j]=new_year
                self.ratings[label_j]=self.ratings[label_j]*.75+1505.*.25
            #todo change below to just use event
            update=self.update_function(event['home_pts'],event['away_pts'], self.ratings[label_i], self.ratings[label_j], event['is_neutral'])
            self.elos.append({
                "home_elo":self.ratings[label_i],
                "away_elo":self.ratings[label_j],
                "index": idx,
                
                             })
            self.ratings[label_i]+=update[0]
            self.ratings[label_j]+=update[1]
            

            

    def power_rankings(self):
        from operator import itemgetter
        #sort dictionary by value to get ascending list of teams
        power_rankings = sorted(self.ratings.items(), key=itemgetter(1), reverse=True)
        power = []
        #Make the 0-th team 1st
        for i, x in enumerate(power_rankings):
            power.append((i + 1, x))
        return power
m=HeadToHeadModel(list(matchups.iterrows()), silver_elo_update, elo_prediction)
m.compute_elo_ratings()
m.power_rankings()
Out[45]:
[(1, ('NE', 1686.8247048770047)),
 (2, ('PHI', 1667.1115127144103)),
 (3, ('NO', 1663.7663170125279)),
 (4, ('MIN', 1657.0373492036902)),
 (5, ('PIT', 1641.2494007990233)),
 (6, ('LAR', 1633.3470749369756)),
 (7, ('CAR', 1596.3033075113656)),
 (8, ('ATL', 1588.8871296442694)),
 (9, ('BAL', 1583.8110481968999)),
 (10, ('KC', 1580.2516595946611)),
 (11, ('SEA', 1577.1386861899484)),
 (12, ('JAC', 1563.3842300893075)),
 (13, ('LAC', 1556.7095643009777)),
 (14, ('DAL', 1533.220824137682)),
 (15, ('DET', 1499.5159388812194)),
 (16, ('BUF', 1493.8212705903575)),
 (17, ('WAS', 1487.5994547778589)),
 (18, ('TEN', 1486.224416917114)),
 (19, ('GB', 1471.0810101436889)),
 (20, ('ARI', 1461.9538668485861)),
 (21, ('CHI', 1443.037416783995)),
 (22, ('SF', 1441.2024552207763)),
 (23, ('CIN', 1440.433702226368)),
 (24, ('OAK', 1438.2056708463833)),
 (25, ('MIA', 1434.7143210093607)),
 (26, ('NYJ', 1420.883821694438)),
 (27, ('TB', 1411.6113469222712)),
 (28, ('DEN', 1409.050633257574)),
 (29, ('HOU', 1378.6692624072318)),
 (30, ('IND', 1350.2651435422003)),
 (31, ('NYG', 1337.5988691604612)),
 (32, ('CLE', 1252.143123892517))]
In [46]:
elo=pd.DataFrame(m.elos).set_index("index").join(matchups)
elo.head(2)
Out[46]:
away_elo home_elo season home_name away_name full_game_type away_first_downs away_pass_yds away_pts away_rush_yds away_to away_yards game_datetime game_location game_outcome game_time game_type had_overtime home_first_downs home_pass_yds home_pts home_rush_yds home_to home_yards teams week_num away_conference conference_play home_line over_under handicap_difference home_conference line_moves matchup_id is_neutral
0 1511.230702 1499.093278 2016 ARI LAR Week 4 12.0 247.0 17.0 41.0 1.0 288.0 1475421900000 None L 3:25PM ET Regular False 26.0 302.0 13.0 118.0 5.0 420.0 ARI,LAR 4.0 NFC NFC -10.0 43.5 1.5 NFC 29 [email protected] False
0 1511.230702 1499.093278 2017 ARI DAL Week 3 15.0 174.0 28.0 99.0 NaN 273.0 1506371400000 L 8:30PM ET Regular False 22.0 283.0 17.0 49.0 NaN 332.0 ARI,DAL 3.0 NFC NFC 3.0 46.5 0.5 NFC None [email protected] False
In [47]:
elo.describe()
Out[47]:
away_elo home_elo season away_first_downs away_pass_yds away_pts away_rush_yds away_to away_yards game_datetime home_first_downs home_pass_yds home_pts home_rush_yds home_to home_yards week_num home_line over_under handicap_difference
count 985.000000 985.000000 985.000000 985.000000 985.000000 985.000000 985.000000 721.000000 985.000000 9.850000e+02 985.000000 985.000000 985.000000 985.000000 696.000000 985.000000 985.000000 985.000000 985.000000 985.000000
mean 1510.049548 1504.588524 2016.486294 19.100508 228.398985 20.906599 104.049746 1.951456 332.448731 1.493761e+12 20.556345 238.825381 23.730964 114.300508 1.876437 353.125888 8.697462 -2.094924 45.047716 1.345685
std 70.928885 70.719279 0.500066 4.888082 73.511587 9.423710 49.070678 1.093733 82.067378 1.573130e+10 4.745679 72.307271 10.043488 48.424351 1.024431 78.569957 4.903525 5.508989 4.052157 1.364118
min 1267.471058 1275.488362 2016.000000 6.000000 6.000000 0.000000 6.000000 1.000000 100.000000 1.473367e+12 5.000000 51.000000 0.000000 14.000000 1.000000 123.000000 1.000000 -17.000000 36.500000 0.000000
25% 1475.308375 1467.448007 2016.000000 16.000000 178.000000 14.000000 67.000000 1.000000 278.000000 1.478464e+12 17.000000 189.000000 17.000000 80.000000 1.000000 295.000000 4.000000 -6.000000 42.000000 0.500000
50% 1508.145683 1505.000000 2016.000000 19.000000 227.000000 20.000000 95.000000 2.000000 331.000000 1.484412e+12 20.000000 237.000000 24.000000 108.000000 2.000000 349.000000 9.000000 -3.000000 44.500000 1.000000
75% 1553.839191 1546.633178 2017.000000 22.000000 279.000000 27.000000 134.000000 2.000000 386.000000 1.509294e+12 24.000000 286.000000 30.000000 142.000000 2.000000 408.000000 13.000000 2.500000 47.500000 2.000000
max 1695.787498 1731.340632 2017.000000 33.000000 498.000000 51.000000 298.000000 8.000000 626.000000 1.514234e+12 37.000000 486.000000 57.000000 313.000000 5.000000 589.000000 17.000000 11.500000 60.000000 9.000000

Prediction and Testing

Since we are seeding our rankings starting with the 2011 season it would be unfair to our model to expect to it be right in the beginning. For our purposes, we'll look at performance during the whole season and during weeks 4-15 to allow rating to settle and not lose when a team rests their players in weeks 16 and 17

In [48]:
def predict_home_margin(row):
    #how many points the home team is expected to win bye
    return score_prediction(row['home_elo']+65, row['away_elo'])
elo['predicted_home_margin']=elo.apply(predict_home_margin,axis=1)
In [49]:
elo.head(1)
Out[49]:
away_elo home_elo season home_name away_name full_game_type away_first_downs away_pass_yds away_pts away_rush_yds away_to away_yards game_datetime game_location game_outcome game_time game_type had_overtime home_first_downs home_pass_yds home_pts home_rush_yds home_to home_yards teams week_num away_conference conference_play home_line over_under handicap_difference home_conference line_moves matchup_id is_neutral predicted_home_margin
0 1511.230702 1499.093278 2016 ARI LAR Week 4 12.0 247.0 17.0 41.0 1.0 288.0 1475421900000 None L 3:25PM ET Regular False 26.0 302.0 13.0 118.0 5.0 420.0 ARI,LAR 4.0 NFC NFC -10.0 43.5 1.5 NFC 29 [email protected] False 2.114503
In [50]:
#1 is home winning
elo['predicted_winner']=elo.apply(lambda row: 1 if row['predicted_home_margin']>=0 else 0, axis=1)
In [51]:
elo['vegas_predicted_winner']=elo.apply(lambda row: 0 if row['home_line']>0 else 1, axis=1)#line assigns negative value
elo['winner']=elo.apply(lambda row: 1 if row['home_pts']>row['away_pts'] else 0, axis=1)#does not account for ties

Since this is a binary decision problem, either win or lose our bet, we will use confusion matrices to judge the performance of our model

In [52]:
from sklearn.metrics import confusion_matrix
def print_confusion_matrix(y_true, y_pred):
    conf_matrix=confusion_matrix(y_true, y_pred)
    success_rate=np.trace(conf_matrix)/np.sum(conf_matrix)
    print(success_rate)
    print(conf_matrix)
test_period=elo[(elo['week_num'].between(4,15,inclusive=True))&(elo['game_type']=='Regular')&(elo['season'].between(2012,2016))]
In [53]:
y_true='winner'
y_pred='predicted_winner'
print_confusion_matrix(test_period['winner'],test_period['predicted_winner'])
0.607142857143
[[ 48  98]
 [ 34 156]]
In [54]:
y_true='winner'
y_pred='vegas_predicted_winner'
print_confusion_matrix(test_period['winner'],test_period[y_pred])
0.639880952381
[[ 67  79]
 [ 42 148]]

Vegas is pretty good huh

We were good at predicting winners but vegas is slightly better... The highest we've gotten with a pure elo approach using more years of training is 67%. Let's look at our against the spread performance and see if we can find an edge.

In [55]:
elo.head(1)
Out[55]:
away_elo home_elo season home_name away_name full_game_type away_first_downs away_pass_yds away_pts away_rush_yds away_to away_yards game_datetime game_location game_outcome game_time game_type had_overtime home_first_downs home_pass_yds home_pts home_rush_yds home_to home_yards teams week_num away_conference conference_play home_line over_under handicap_difference home_conference line_moves matchup_id is_neutral predicted_home_margin predicted_winner vegas_predicted_winner winner
0 1511.230702 1499.093278 2016 ARI LAR Week 4 12.0 247.0 17.0 41.0 1.0 288.0 1475421900000 None L 3:25PM ET Regular False 26.0 302.0 13.0 118.0 5.0 420.0 ARI,LAR 4.0 NFC NFC -10.0 43.5 1.5 NFC 29 [email protected] False 2.114503 1 1 0
In [56]:
elo['home_margin']=elo.apply(lambda x: x['home_pts']-x['away_pts'], axis=1)
elo['home_bet']=elo.apply(lambda x: (x['predicted_home_margin']+x['home_line'])<0,axis=1)
elo['home_covers']=elo.apply(lambda x: (x['home_margin']+x['home_line'])>0,axis=1)
test_period=elo[(elo['week_num'].between(4,15,inclusive=True))&(elo['game_type']=='Regular')&(elo['season'].between(2012,2016))]
y_true='home_covers'
y_pred='home_bet'
print_confusion_matrix(test_period[y_true],test_period[y_pred])
0.491071428571
[[95 86]
 [85 70]]

That won't win for us. Under standard wagering strategies you need to be better than 52.4%

Performance each season during test period

In [57]:
elo['predicted_winner_right']=elo.apply(lambda x: 1 if x['winner']==x['predicted_winner'] else 0, axis=1)
elo['vegas_favored_wins']=elo.apply(lambda x: 1 if x['winner']==x['vegas_predicted_winner'] else 0, axis=1)
elo['ats_right']=elo.apply(lambda x: 1 if x['home_covers']==x['home_bet'] else 0, axis=1)
test_period=elo[(elo['week_num'].between(4,15,inclusive=True))&(elo['game_type']=='Regular')&(elo['season'].between(2012,2016))]
In [58]:
test_period[['predicted_winner_right','vegas_favored_wins','ats_right','season']].groupby("season").agg([np.mean, np.var])
Out[58]:
predicted_winner_right vegas_favored_wins ats_right
mean var mean var mean var
season
2016 0.607143 0.239232 0.639881 0.231121 0.491071 0.250666

Results

ATS is chancey but performance against vegas for winners is pretty close.

In [59]:
# for all years in our sample we do about just as good as vegas but no better
test_period[['predicted_winner_right','vegas_favored_wins','ats_right']].groupby(lambda x: 0).agg([np.mean, np.var])
Out[59]:
predicted_winner_right vegas_favored_wins ats_right
mean var mean var mean var
0 0.607143 0.239232 0.639881 0.231121 0.491071 0.250666

Next Steps

So now you've seen how to build a strength of schedule (SOS) model. You might think you can make it better, and you probably can, but it would be a better use of your team to create orthogonal features not based on SOS. Something like how a team plays using:

  • offensive scheme: e.g. vertical "Air Coryell" offense
  • base_defense: e.g. 3-4
  • running yards (and if they are a great running team)

Check out our team_season_log table and our team_game_logs to get the data you need to build it.

In [60]:
team_season_log = api.get_dataframe("team_season_log")
team_season_log.head()
Out[60]:
coaches league_name losses playoff_result points points_diff points_opp rank_def_pts rank_def_yds rank_off_pts rank_off_yds rank_points_diff rank_takeaway_giveaway rank_yds_diff historical_team_name team_name teams_in_league ties wins season defensive_coordinator offensive_coordinator offensive_scheme base_defense stadium
0 Arians NFL 8 269 -68 337 17 6 25 22 25 22 14 Arizona Cardinals ARI 32 0 7 2017 James Bettcher Harold Goodwin Air Coryell 3-4 University of Phoenix
1 Quinn NFL 6 331 26 305 11 10 15 9 13 24 6 Atlanta Falcons ATL 32 0 9 2017 Marquand Manuel Steve Sarkisian West Coast 4-3
2 Harbaugh NFL 6 368 96 272 4 9 9 26 7 1 20 Baltimore Ravens BAL 32 0 9 2017 Dean Pees Marty Mornhinweg West Coast 3-4 M&T; Bank
3 McDermott NFL 7 280 -63 343 18 25 24 29 22 6 29 Buffalo Bills BUF 32 0 8 2017 Leslie Frazier Rick Dennison West Coast 4-3 New Era Field
4 Rivera NFL 4 353 48 305 11 7 11 17 11 14 11 Carolina Panthers CAR 32 0 11 2017 Steve Wilks Mike Shula Erhardt-Perkins 4-3 Bank of America
In [61]:
team_game_logs = api.get_dataframe("team_game_logs")
team_game_logs.head()
Out[61]:
season team_name full_game_type opp_first_downs team_first_downs game_location game_outcome game_time opp_pass_yds team_pass_yds opp_pts team_pts opp_rush_yds team_rush_yds opp_to team_to opp_yards team_yards game_type week_num opp_name had_overtime game_datetime
0 2017 ARI Week 1 19 24 @ L 1:00PM ET 285 263 35 23 82 45 1 4 367 308 Regular 1 DET False 1505048400000
1 2017 ATL Week 1 20 18 @ W 1:00PM ET 176 308 17 23 125 64 301 372 Regular 1 CHI False 1505048400000
2 2017 BAL Week 1 14 17 @ W 1:00PM ET 144 111 0 20 77 157 5 1 221 268 Regular 1 CIN False 1505048400000
3 2017 BUF Week 1 11 23 W 1:00PM ET 176 218 12 21 38 190 2 1 214 408 Regular 1 NYJ False 1505048400000
4 2017 CAR Week 1 13 20 @ W 4:25PM ET 166 171 3 23 51 116 2 2 217 287 Regular 1 SF False 1505060700000