Copyright 2021-2023 Patrick Hall (jphall@gwu.edu)
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
DISCLAIMER: This notebook is not legal or compliance advice.
import os # for directory and file manipulation
import numpy as np # for basic array manipulation
import pandas as pd # for dataframe manipulation
import datetime # for timestamp
# for model eval
from sklearn.metrics import accuracy_score, f1_score, log_loss, mean_squared_error, roc_auc_score
# global constants
ROUND = 3 # generally, insane precision is not needed
SEED = 12345 # seed for better reproducibility
# set global random seed for better reproducibility
np.random.seed(SEED)
y_name = 'high_priced'
scores_dir = 'data/scores'
# init score frame with known test y values
scores_frame = pd.read_csv(scores_dir + os.sep +'key.csv', index_col='Unnamed: 0')
# create random folds in reproducible way
np.random.seed(SEED)
scores_frame['fold'] = np.random.choice(5, scores_frame.shape[0])
# read in each score file in the directory as a new column
for file in sorted(os.listdir(scores_dir)):
if file != 'key.csv' and file.endswith('.csv'):
scores_frame[file[:-4]] = pd.read_csv(scores_dir + os.sep + file)['phat']
# sanity check
scores_frame
high_priced | fold | group1_rem_ebm | group2_rem_ebm | group2_rem_ebm2 | group3_rem_piml_EBM | group3_rem_piml_EBM2 | group5_rem_xgb2 | group8_rem_ebm | group9_rem_xgb | ph_rem_ebm | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | 2 | 0.118787 | 0.080557 | 0.080557 | 0.920389 | 0.136749 | 0.078326 | 0.223846 | 0.081792 | 0.219429 |
1 | 0.0 | 1 | 0.084506 | 0.026001 | 0.026001 | 0.969301 | 0.053751 | 0.035825 | 0.053926 | 0.110702 | 0.053929 |
2 | 1.0 | 4 | 0.210389 | 0.194961 | 0.194961 | 0.814272 | 0.182311 | 0.195332 | 0.143522 | 0.204048 | 0.133863 |
3 | 0.0 | 1 | 0.008529 | 0.028556 | 0.028556 | 0.974559 | 0.004065 | 0.022765 | 0.009371 | 0.024038 | 0.014419 |
4 | 1.0 | 2 | 0.189933 | 0.208263 | 0.208263 | 0.802908 | 0.211120 | 0.193035 | 0.151100 | 0.170243 | 0.156047 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
19826 | 0.0 | 3 | 0.163697 | 0.228342 | 0.228342 | 0.792251 | 0.209322 | 0.235192 | 0.216720 | 0.181403 | 0.184214 |
19827 | 0.0 | 1 | 0.114999 | 0.253998 | 0.253998 | 0.762946 | 0.206744 | 0.235832 | 0.161401 | 0.159468 | 0.141663 |
19828 | 1.0 | 3 | 0.141307 | 0.213364 | 0.213364 | 0.747401 | 0.246610 | 0.208723 | 0.242814 | 0.138141 | 0.233266 |
19829 | 0.0 | 1 | 0.007766 | 0.002176 | 0.002176 | 0.996455 | 0.000268 | 0.018702 | 0.005657 | 0.034570 | 0.009914 |
19830 | 0.0 | 0 | 0.163946 | 0.185484 | 0.185484 | 0.811429 | 0.177857 | 0.215085 | 0.167812 | 0.177785 | 0.155447 |
19831 rows × 11 columns
def max_acc(y, phat, res=0.01):
""" Utility function for finding max. accuracy at some cutoff.
:param y: Known y values.
:param phat: Model scores.
:param res: Resolution over which to search for max. accuracy, default 0.01.
:return: Max. accuracy for model scores.
"""
# init frame to store acc at different cutoffs
acc_frame = pd.DataFrame(columns=['cut', 'acc'])
# copy known y and score values into a temporary frame
temp_df = pd.concat([y, phat], axis=1)
# find accuracy at different cutoffs and store in acc_frame
for cut in np.arange(0, 1 + res, res):
temp_df['decision'] = np.where(temp_df.iloc[:, 1] > cut, 1, 0)
acc = accuracy_score(temp_df.iloc[:, 0], temp_df['decision'])
acc_frame = acc_frame.append({'cut': cut,
'acc': acc},
ignore_index=True)
# find max accurcay across all cutoffs
max_acc = acc_frame['acc'].max()
# house keeping
del acc_frame, temp_df
return max_acc
def max_f1(y, phat, res=0.01):
""" Utility function for finding max. F1 at some cutoff.
:param y: Known y values.
:param phat: Model scores.
:param res: Resolution over which to search for max. F1, default 0.01.
:return: Max. F1 for model scores.
"""
# init frame to store f1 at different cutoffs
f1_frame = pd.DataFrame(columns=['cut', 'f1'])
# copy known y and score values into a temporary frame
temp_df = pd.concat([y, phat], axis=1)
# find f1 at different cutoffs and store in acc_frame
for cut in np.arange(0, 1 + res, res):
temp_df['decision'] = np.where(temp_df.iloc[:, 1] > cut, 1, 0)
f1 = f1_score(temp_df.iloc[:, 0], temp_df['decision'])
f1_frame = f1_frame.append({'cut': cut,
'f1': f1},
ignore_index=True)
# find max f1 across all cutoffs
max_f1 = f1_frame['f1'].max()
# house keeping
del f1_frame, temp_df
return max_f1
eval_frame = pd.DataFrame() # init frame to hold score ranking
metric_list = ['acc', 'auc', 'f1', 'logloss', 'mse'] # metric to use for evaluation
# create eval frame row-by-row
for fold in sorted(scores_frame['fold'].unique()): # loop through folds
for metric_name in metric_list: # loop through metrics
# init row dict to hold each rows values
row_dict = {'fold': fold,
'metric': metric_name}
# cache known y values for fold
fold_y = scores_frame.loc[scores_frame['fold'] == fold, y_name]
for col_name in scores_frame.columns[2:]:
# cache fold scores
fold_scores = scores_frame.loc[scores_frame['fold'] == fold, col_name]
# calculate evaluation metric for fold
# with reasonable precision
if metric_name == 'acc':
row_dict[col_name] = np.round(max_acc(fold_y, fold_scores), ROUND)
if metric_name == 'auc':
row_dict[col_name] = np.round(roc_auc_score(fold_y, fold_scores), ROUND)
if metric_name == 'f1':
row_dict[col_name] = np.round(max_f1(fold_y, fold_scores), ROUND)
if metric_name == 'logloss':
row_dict[col_name] = np.round(log_loss(fold_y, fold_scores), ROUND)
if metric_name == 'mse':
row_dict[col_name] = np.round(mean_squared_error(fold_y, fold_scores), ROUND)
# append row values to eval_frame
eval_frame = eval_frame.append(row_dict, ignore_index=True)
# init a temporary frame to hold rank information
rank_names = [name + '_rank' for name in sorted(eval_frame.columns) if name not in ['fold', 'metric']]
rank_frame = pd.DataFrame(columns=rank_names)
# set columns to necessary order
eval_frame = eval_frame[['fold', 'metric'] + [name for name in sorted(eval_frame.columns) if name not in ['fold', 'metric']]]
# determine score ranks row-by-row
for i in range(0, eval_frame.shape[0]):
# get ranks for row based on metric
metric_name = eval_frame.loc[i, 'metric']
if metric_name in ['logloss', 'mse']:
ranks = eval_frame.iloc[i, 2:].rank().values
else:
ranks = eval_frame.iloc[i, 2:].rank(ascending=False).values
# create single-row frame and append to rank_frame
row_frame = pd.DataFrame(ranks.reshape(1, ranks.shape[0]), columns=rank_names)
rank_frame = rank_frame.append(row_frame, ignore_index=True)
# house keeping
del row_frame
# merge ranks onto eval_frame
eval_frame = pd.concat([eval_frame, rank_frame], axis=1)
# house keeping
del rank_frame
eval_frame
fold | metric | group1_rem_ebm | group2_rem_ebm | group2_rem_ebm2 | group3_rem_piml_EBM | group3_rem_piml_EBM2 | group5_rem_xgb2 | group8_rem_ebm | group9_rem_xgb | ph_rem_ebm | group1_rem_ebm_rank | group2_rem_ebm_rank | group2_rem_ebm2_rank | group3_rem_piml_EBM_rank | group3_rem_piml_EBM2_rank | group5_rem_xgb2_rank | group8_rem_ebm_rank | group9_rem_xgb_rank | ph_rem_ebm_rank | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | acc | 0.900 | 0.901 | 0.901 | 0.900 | 0.901 | 0.901 | 0.901 | 0.900 | 0.901 | 8.0 | 3.5 | 3.5 | 8.0 | 3.5 | 3.5 | 3.5 | 8.0 | 3.5 |
1 | 0.0 | auc | 0.781 | 0.840 | 0.840 | 0.163 | 0.821 | 0.836 | 0.793 | 0.797 | 0.791 | 8.0 | 1.5 | 1.5 | 9.0 | 4.0 | 3.0 | 6.0 | 5.0 | 7.0 |
2 | 0.0 | f1 | 0.347 | 0.405 | 0.405 | 0.182 | 0.381 | 0.392 | 0.342 | 0.357 | 0.347 | 6.5 | 1.5 | 1.5 | 9.0 | 4.0 | 3.0 | 8.0 | 5.0 | 6.5 |
3 | 0.0 | logloss | 0.280 | 0.251 | 0.251 | 3.257 | 0.262 | 0.254 | 0.274 | 0.277 | 0.275 | 8.0 | 1.5 | 1.5 | 9.0 | 4.0 | 3.0 | 5.0 | 7.0 | 6.0 |
4 | 0.0 | mse | 0.082 | 0.077 | 0.077 | 0.773 | 0.078 | 0.077 | 0.081 | 0.081 | 0.081 | 8.0 | 2.0 | 2.0 | 9.0 | 4.0 | 2.0 | 6.0 | 6.0 | 6.0 |
5 | 1.0 | acc | 0.906 | 0.906 | 0.906 | 0.906 | 0.906 | 0.906 | 0.906 | 0.906 | 0.906 | 5.0 | 5.0 | 5.0 | 5.0 | 5.0 | 5.0 | 5.0 | 5.0 | 5.0 |
6 | 1.0 | auc | 0.767 | 0.828 | 0.828 | 0.172 | 0.810 | 0.822 | 0.774 | 0.779 | 0.772 | 8.0 | 1.5 | 1.5 | 9.0 | 4.0 | 3.0 | 6.0 | 5.0 | 7.0 |
7 | 1.0 | f1 | 0.312 | 0.368 | 0.368 | 0.172 | 0.348 | 0.360 | 0.319 | 0.329 | 0.321 | 8.0 | 1.5 | 1.5 | 9.0 | 4.0 | 3.0 | 7.0 | 5.0 | 6.0 |
8 | 1.0 | logloss | 0.272 | 0.246 | 0.246 | 3.253 | 0.258 | 0.250 | 0.270 | 0.271 | 0.272 | 7.5 | 1.5 | 1.5 | 9.0 | 4.0 | 3.0 | 5.0 | 6.0 | 7.5 |
9 | 1.0 | mse | 0.079 | 0.074 | 0.074 | 0.778 | 0.077 | 0.075 | 0.079 | 0.078 | 0.079 | 7.0 | 1.5 | 1.5 | 9.0 | 4.0 | 3.0 | 7.0 | 5.0 | 7.0 |
10 | 2.0 | acc | 0.908 | 0.908 | 0.908 | 0.908 | 0.908 | 0.910 | 0.908 | 0.908 | 0.909 | 6.0 | 6.0 | 6.0 | 6.0 | 6.0 | 1.0 | 6.0 | 6.0 | 2.0 |
11 | 2.0 | auc | 0.759 | 0.825 | 0.825 | 0.175 | 0.815 | 0.826 | 0.781 | 0.772 | 0.780 | 8.0 | 2.5 | 2.5 | 9.0 | 4.0 | 1.0 | 5.0 | 7.0 | 6.0 |
12 | 2.0 | f1 | 0.304 | 0.372 | 0.372 | 0.169 | 0.354 | 0.371 | 0.315 | 0.320 | 0.323 | 8.0 | 1.5 | 1.5 | 9.0 | 4.0 | 3.0 | 7.0 | 6.0 | 5.0 |
13 | 2.0 | logloss | 0.271 | 0.246 | 0.246 | 3.284 | 0.251 | 0.245 | 0.264 | 0.271 | 0.264 | 7.5 | 2.5 | 2.5 | 9.0 | 4.0 | 1.0 | 5.5 | 7.5 | 5.5 |
14 | 2.0 | mse | 0.078 | 0.073 | 0.073 | 0.781 | 0.074 | 0.073 | 0.076 | 0.077 | 0.076 | 8.0 | 2.0 | 2.0 | 9.0 | 4.0 | 2.0 | 5.5 | 7.0 | 5.5 |
15 | 3.0 | acc | 0.903 | 0.903 | 0.903 | 0.903 | 0.903 | 0.903 | 0.903 | 0.903 | 0.903 | 5.0 | 5.0 | 5.0 | 5.0 | 5.0 | 5.0 | 5.0 | 5.0 | 5.0 |
16 | 3.0 | auc | 0.772 | 0.826 | 0.826 | 0.174 | 0.809 | 0.823 | 0.775 | 0.786 | 0.772 | 7.5 | 1.5 | 1.5 | 9.0 | 4.0 | 3.0 | 6.0 | 5.0 | 7.5 |
17 | 3.0 | f1 | 0.317 | 0.371 | 0.371 | 0.177 | 0.361 | 0.365 | 0.328 | 0.343 | 0.323 | 8.0 | 1.5 | 1.5 | 9.0 | 4.0 | 3.0 | 6.0 | 5.0 | 7.0 |
18 | 3.0 | logloss | 0.276 | 0.252 | 0.252 | 3.254 | 0.262 | 0.253 | 0.275 | 0.275 | 0.276 | 7.5 | 1.5 | 1.5 | 9.0 | 4.0 | 3.0 | 5.5 | 5.5 | 7.5 |
19 | 3.0 | mse | 0.081 | 0.077 | 0.077 | 0.775 | 0.079 | 0.077 | 0.080 | 0.080 | 0.080 | 8.0 | 2.0 | 2.0 | 9.0 | 4.0 | 2.0 | 6.0 | 6.0 | 6.0 |
20 | 4.0 | acc | 0.895 | 0.897 | 0.897 | 0.895 | 0.895 | 0.898 | 0.895 | 0.896 | 0.895 | 7.0 | 2.5 | 2.5 | 7.0 | 7.0 | 1.0 | 7.0 | 4.0 | 7.0 |
21 | 4.0 | auc | 0.754 | 0.831 | 0.831 | 0.170 | 0.818 | 0.828 | 0.785 | 0.779 | 0.782 | 8.0 | 1.5 | 1.5 | 9.0 | 4.0 | 3.0 | 5.0 | 7.0 | 6.0 |
22 | 4.0 | f1 | 0.323 | 0.401 | 0.401 | 0.190 | 0.404 | 0.397 | 0.364 | 0.354 | 0.362 | 8.0 | 2.5 | 2.5 | 9.0 | 1.0 | 4.0 | 5.0 | 7.0 | 6.0 |
23 | 4.0 | logloss | 0.296 | 0.263 | 0.263 | 3.200 | 0.273 | 0.266 | 0.286 | 0.291 | 0.287 | 8.0 | 1.5 | 1.5 | 9.0 | 4.0 | 3.0 | 5.0 | 7.0 | 6.0 |
24 | 4.0 | mse | 0.087 | 0.080 | 0.080 | 0.771 | 0.082 | 0.080 | 0.084 | 0.086 | 0.084 | 8.0 | 2.0 | 2.0 | 9.0 | 4.0 | 2.0 | 5.5 | 7.0 | 5.5 |
eval_frame
as CSV¶eval_frame.to_csv('model_eval_' + str(datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + '.csv'),
index=False)
eval_frame[[name for name in eval_frame.columns if name.endswith('rank')]].mean().sort_values()
group2_rem_ebm_rank 2.28 group2_rem_ebm2_rank 2.28 group5_rem_xgb2_rank 2.74 group3_rem_piml_EBM2_rank 4.14 group8_rem_ebm_rank 5.74 group9_rem_xgb_rank 5.96 ph_rem_ebm_rank 5.96 group1_rem_ebm_rank 7.46 group3_rem_piml_EBM_rank 8.44 dtype: float64