#!/usr/bin/env python # coding: utf-8 # Repeating the calculations explained [here](https://www.baseball-reference.com/about/parkadjust.shtml) in Python using `pybaseball`. # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: import numpy as np import pandas as pd import pybaseball # In[3]: # the test season from Baseball Reference/Total Baseball SEASON = 1982 TEAM = "ATL" # the 1982 Phillies #SEASON = 1982 #TEAM = "PHI" # the 2018 Phillies #SEASON = 2018 #TEAM = "PHI" DATA_PATH = f'./data/{TEAM}_{SEASON}.csv' VERIFY = (SEASON == 1982) and (TEAM == "ATL") # In[4]: # don't download the Lahman database every time the notebook is rerun (if possible) # see https://github.com/jldbc/pybaseball/issues/29 try: df = pd.read_csv(DATA_PATH) except FileNotFoundError: df = pybaseball.schedule_and_record(season=SEASON, team=TEAM) df.to_csv(DATA_PATH, index=False) df = df.rename(columns=str.lower) # In[5]: df.head() # In[6]: df['home_away'].value_counts() # In[7]: df['w/l'].value_counts() # In[8]: df['is_home'] = df['home_away'] == "Home" df['is_win'] = df['w/l'].str.startswith("W") # In[9]: home_away_df = (df.groupby('is_home') ['is_win', 'r', 'ra'] .sum() .rename(columns={'is_win': 'wins'})) home_away_df['rtot'] = home_away_df[['r', 'ra']].sum(axis=1) home_away_df['gp'] = df['is_home'].value_counts() # In[10]: home_away_df # In[11]: home_away_pct = home_away_df['wins'] / home_away_df['gp'] # In[12]: home_away_pct # In[13]: IPC = (18.5 - home_away_pct[True]) / (18.5 - (1 - home_away_pct[False])) # In[14]: def verify_agreement_with_total_baseball(val, true_val, name, atol=1e-3): assert np.allclose(val, true_val, atol=atol) print(f"{name} agrees with Total Baseball") # In[15]: if VERIFY: verify_agreement_with_total_baseball(IPC, 0.995, "IPC") # In[16]: rpg = home_away_df['rtot'] / home_away_df['gp'] # In[17]: rpg # In[18]: RFT = (rpg[True] / rpg[False]) / IPC # In[19]: if VERIFY: verify_agreement_with_total_baseball(RFT, 1.170, "RFT") # In[20]: NT = df['opp'].nunique() + 1 # In[21]: OPC = NT / (NT - 1 + RFT) # In[22]: if VERIFY: verify_agreement_with_total_baseball(OPC, 0.986, "OPC") # In[23]: SF = RFT * OPC # In[24]: if VERIFY: verify_agreement_with_total_baseball(SF, 1.154, "SF") # In[25]: SF1 = 1 - (SF - 1) / (NT - 1) # In[26]: if VERIFY: verify_agreement_with_total_baseball(SF1, 0.986, "SF1") # In[27]: home_away_df.head() # In[28]: RAL = 7947 / 972 # In[29]: RAT, RHT = home_away_df['r'] OAT, OHT = home_away_df['ra'] # In[30]: def iterate_team_ratings(TPR, RAL, NT, home_away_df): assert home_away_df.index.is_monotonic_increasing RAT, RHT = home_away_df['r'] / home_away_df['gp'] OAT, OHT = home_away_df['ra'] / home_away_df['gp'] TBR = (RAT / SF1 + RHT / SF) * (1 + (TPR - 1) / (NT - 1)) / RAL TPR_ = (OAT / SF1 + OHT / SF) * (1 + (TBR - 1) / (NT - 1)) / RAL return TBR, TPR_ def calculate_team_ratings(RAL, NT, home_away_df, TPR=1, iter=4): for _ in range(iter): TBR, TPR = iterate_team_ratings(TPR, RAL, NT, home_away_df) return TBR, TPR # In[31]: TBR_1iter, TPR_1iter = calculate_team_ratings(RAL, NT, home_away_df, iter=1) # In[32]: if VERIFY: verify_agreement_with_total_baseball(TBR_1iter, 1.044, "TBR", atol=1e-2) verify_agreement_with_total_baseball(TPR_1iter, 0.993, "TPR", atol=1e-2) # In[33]: TBR, TPR = calculate_team_ratings(RAL, NT, home_away_df) # In[34]: BPF = (SF + SF1) / (2 * (1 + (TPR - 1) / (NT - 1))) PPF = (SF + SF1) / (2 * (1 + (TBR - 1) / (NT - 1))) # In[35]: if VERIFY: verify_agreement_with_total_baseball(BPF, 1.07, "BPF")