Repeating the calculations explained here in Python using pybaseball.

In [1]:
%matplotlib inline
In [2]:
import numpy as np
import pandas as pd
import pybaseball
In [3]:
# the test season from Baseball Reference/Total Baseball
SEASON = 1982
TEAM = "ATL"

# the 1982 Phillies
#SEASON = 1982
#TEAM = "PHI"

# the 2018 Phillies
#SEASON = 2018
#TEAM = "PHI"

DATA_PATH = f'./data/{TEAM}_{SEASON}.csv'

VERIFY = (SEASON == 1982) and (TEAM == "ATL")
In [4]:
# don't download the Lahman database every time the notebook is rerun (if possible)
# see https://github.com/jldbc/pybaseball/issues/29

try:
    df = pd.read_csv(DATA_PATH)
except FileNotFoundError:
    df = pybaseball.schedule_and_record(season=SEASON, team=TEAM)
    df.to_csv(DATA_PATH, index=False)
    
df = df.rename(columns=str.lower)
In [5]:
df.head()
Out[5]:
date tm home_away opp w/l r ra inn w-l rank gb win loss save time d/n attendance streak orig. scheduled
0 Tuesday, Apr 6 ATL @ SDP W 1.0 0.0 9.0 1-0 1.0 Tied Mahler Eichelberger None 2:30 N 30188.0 1 NaN
1 Wednesday, Apr 7 ATL @ SDP W 6.0 4.0 9.0 2-0 1.0 Tied Walk Montefusco Garber 2:40 D 16684.0 2 NaN
2 Friday, Apr 9 ATL Home HOU W 6.0 2.0 9.0 3-0 1.0 up 1.0 Boggs Sutton Hrabosky 2:43 N 33133.0 3 NaN
3 Saturday, Apr 10 ATL Home HOU W 8.0 6.0 9.0 4-0 1.0 up 1.0 McWilliams Ruhle Camp 2:51 N 10885.0 4 NaN
4 Sunday, Apr 11 ATL Home HOU W 5.0 0.0 9.0 5-0 1.0 up 1.5 Mahler Ryan None 2:14 D 11322.0 5 NaN
In [6]:
df['home_away'].value_counts()
Out[6]:
Home    81
@       81
Name: home_away, dtype: int64
In [7]:
df['w/l'].value_counts()
Out[7]:
W       78
L       64
W-wo    11
L-wo     9
Name: w/l, dtype: int64
In [8]:
df['is_home'] = df['home_away'] == "Home"
df['is_win'] = df['w/l'].str.startswith("W")
In [9]:
home_away_df = (df.groupby('is_home')
                  ['is_win', 'r', 'ra']
                  .sum()
                  .rename(columns={'is_win': 'wins'}))
home_away_df['rtot'] = home_away_df[['r', 'ra']].sum(axis=1)
home_away_df['gp'] = df['is_home'].value_counts()
In [10]:
home_away_df
Out[10]:
wins r ra rtot gp
is_home
False 47.0 351.0 315.0 666.0 81
True 42.0 388.0 387.0 775.0 81
In [11]:
home_away_pct = home_away_df['wins'] / home_away_df['gp']
In [12]:
home_away_pct
Out[12]:
is_home
False    0.580247
True     0.518519
dtype: float64
In [13]:
IPC = (18.5 - home_away_pct[True]) / (18.5 - (1 - home_away_pct[False]))
In [14]:
def verify_agreement_with_total_baseball(val, true_val, name, atol=1e-3):
    assert np.allclose(val, true_val, atol=atol)
    print(f"{name} agrees with Total Baseball")
In [15]:
if VERIFY:
    verify_agreement_with_total_baseball(IPC, 0.995, "IPC")
IPC agrees with Total Baseball
In [16]:
rpg = home_away_df['rtot'] / home_away_df['gp']
In [17]:
rpg
Out[17]:
is_home
False    8.222222
True     9.567901
dtype: float64
In [18]:
RFT = (rpg[True] / rpg[False]) / IPC
In [19]:
if VERIFY:
    verify_agreement_with_total_baseball(RFT, 1.170, "RFT")
RFT agrees with Total Baseball
In [20]:
NT = df['opp'].nunique() + 1
In [21]:
OPC = NT / (NT - 1 + RFT)
In [22]:
if VERIFY:
    verify_agreement_with_total_baseball(OPC, 0.986, "OPC")
OPC agrees with Total Baseball
In [23]:
SF = RFT * OPC
In [24]:
if VERIFY:
    verify_agreement_with_total_baseball(SF, 1.154, "SF")
SF agrees with Total Baseball
In [25]:
SF1 = 1 - (SF - 1) / (NT - 1)
In [26]:
if VERIFY:
    verify_agreement_with_total_baseball(SF1, 0.986, "SF1")
SF1 agrees with Total Baseball
In [27]:
home_away_df.head()
Out[27]:
wins r ra rtot gp
is_home
False 47.0 351.0 315.0 666.0 81
True 42.0 388.0 387.0 775.0 81
In [28]:
RAL = 7947 / 972
In [29]:
RAT, RHT = home_away_df['r']
OAT, OHT = home_away_df['ra']
In [30]:
def iterate_team_ratings(TPR, RAL, NT, home_away_df):
    assert home_away_df.index.is_monotonic_increasing

    RAT, RHT = home_away_df['r'] / home_away_df['gp']
    OAT, OHT = home_away_df['ra'] / home_away_df['gp']

    TBR = (RAT / SF1 + RHT / SF) * (1 + (TPR - 1) / (NT - 1)) / RAL
    TPR_ = (OAT / SF1 + OHT / SF) * (1 + (TBR - 1) / (NT - 1)) / RAL
    
    return TBR, TPR_

def calculate_team_ratings(RAL, NT, home_away_df, TPR=1, iter=4):
    for _ in range(iter):
        TBR, TPR = iterate_team_ratings(TPR, RAL, NT, home_away_df)
        
    return TBR, TPR
In [31]:
TBR_1iter, TPR_1iter = calculate_team_ratings(RAL, NT, home_away_df, iter=1)
In [32]:
if VERIFY:
    verify_agreement_with_total_baseball(TBR_1iter, 1.044, "TBR", atol=1e-2)
    verify_agreement_with_total_baseball(TPR_1iter, 0.993, "TPR", atol=1e-2)
TBR agrees with Total Baseball
TPR agrees with Total Baseball
In [33]:
TBR, TPR = calculate_team_ratings(RAL, NT, home_away_df)
In [34]:
BPF = (SF + SF1) / (2 * (1 + (TPR - 1) / (NT - 1)))
PPF = (SF + SF1) / (2 * (1 + (TBR - 1) / (NT - 1)))
In [35]:
if VERIFY:
    verify_agreement_with_total_baseball(BPF, 1.07, "BPF")
BPF agrees with Total Baseball