from sklearn import datasets
import pandas as pd
pd.set_option("display.max_columns", 100)
from comparison.model_comparison import ModelComparison, ModelName
from comparison.comparison_datasets import TaskName
from comparison.tuned_model_comparison import TunedModelComparison
import plotly.express as px
import plotly.graph_objects as go
import json
with open("tuned_perf_comparison.json", "r") as input_stream:
tuned_perfs = json.load(input_stream)
with open("perf_comparison.json", "r") as input_stream:
default_perfs = json.load(input_stream)
tuned_scores = {dataset_name: {model_name + "_with_tuned_parameters": tuned_perfs[dataset_name][model_name]["model_score"]
for model_name in tuned_perfs[dataset_name].keys()}
for dataset_name in tuned_perfs.keys()}
untuned_perfs = {dataset_name: {model_name + "_with_default_parameters": default_perfs[dataset_name][model_name]["model_score"]
for model_name in default_perfs[dataset_name].keys()}
for dataset_name in default_perfs.keys()}
perfs_tuned_vs_untuned = pd.concat([pd.DataFrame(untuned_perfs), pd.DataFrame(tuned_scores)])\
.transpose().fillna(0.773)
perfs_tuned_vs_untuned
catboost_with_default_parameters | lightgbm_with_default_parameters | lightgbm_with_catboost_encoder_with_default_parameters | xgboost_with_catboost_encoder_with_default_parameters | xgboost_with_default_parameters | catboost_with_tuned_parameters | lightgbm_with_tuned_parameters | lightgbm_with_catboost_encoder_with_tuned_parameters | xgboost_with_catboost_encoder_with_tuned_parameters | xgboost_with_tuned_parameters | |
---|---|---|---|---|---|---|---|---|---|---|
california | 0.849406 | 0.835564 | 0.835564 | 0.831576 | 0.831576 | 0.859586 | 0.854280 | 0.854029 | 0.850704 | 0.849581 |
adult | 0.859957 | 0.858053 | 0.856251 | 0.853773 | 0.856517 | 0.860857 | 0.860653 | 0.859322 | 0.858830 | 0.859506 |
ukair | 0.822140 | 0.804285 | 0.801945 | 0.829402 | 0.836355 | 0.855069 | 0.835399 | 0.829692 | 0.840542 | 0.865997 |
diabetes | 0.753759 | 0.733049 | 0.733049 | 0.725222 | 0.725222 | 0.774761 | 0.774744 | 0.772198 | 0.776094 | 0.772198 |
bank | 0.909823 | 0.909358 | 0.907014 | 0.901285 | 0.905687 | 0.911084 | 0.910420 | 0.908606 | 0.908407 | 0.909093 |
dating | 0.867390 | 0.870016 | 0.868226 | 0.865719 | 0.871210 | 0.872760 | 0.872168 | 0.868585 | 0.866554 | 0.873596 |
valley | 0.535493 | 0.586621 | 0.586621 | 0.559428 | 0.559428 | 0.594807 | 0.622114 | 0.615529 | 0.673307 | 0.674151 |
cars | 0.535673 | 0.521252 | 0.518111 | 0.503700 | 0.481019 | 0.541005 | 0.529365 | 0.530834 | 0.530311 | 0.531778 |
dataset_lengths = {dataset_name: default_perfs[dataset_name]["catboost"]["dataset_length"]
for dataset_name in default_perfs.keys()}
num_categories = {dataset_name: default_perfs[dataset_name]["catboost"]["num_categories"]
for dataset_name in default_perfs.keys()}
prop_categorical = {dataset_name: default_perfs[dataset_name]["catboost"]["num_categorical_features"] / float(default_perfs[dataset_name]["catboost"]["num_features"])
for dataset_name in default_perfs.keys()}
perfs_tuned_vs_untuned_scaled = perfs_tuned_vs_untuned.assign(**{col_name: perfs_tuned_vs_untuned[col_name] / perfs_tuned_vs_untuned["xgboost_with_default_parameters"]
for col_name in perfs_tuned_vs_untuned.columns})\
.assign(length=pd.Series(dataset_lengths),
categorical_features_proportion=pd.Series(prop_categorical),
num_categories=pd.Series(num_categories))
perfs_tuned_vs_untuned_scaled
catboost_with_default_parameters | lightgbm_with_default_parameters | lightgbm_with_catboost_encoder_with_default_parameters | xgboost_with_catboost_encoder_with_default_parameters | xgboost_with_default_parameters | catboost_with_tuned_parameters | lightgbm_with_tuned_parameters | lightgbm_with_catboost_encoder_with_tuned_parameters | xgboost_with_catboost_encoder_with_tuned_parameters | xgboost_with_tuned_parameters | length | categorical_features_proportion | num_categories | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
california | 1.021441 | 1.004796 | 1.004796 | 1.000000 | 1.0 | 1.033683 | 1.027302 | 1.027000 | 1.023002 | 1.021651 | 20640 | 0.000000 | 0 |
adult | 1.004016 | 1.001793 | 0.999689 | 0.996797 | 1.0 | 1.005068 | 1.004829 | 1.003275 | 1.002701 | 1.003490 | 48842 | 0.857143 | 122 |
ukair | 0.983004 | 0.961656 | 0.958858 | 0.991687 | 1.0 | 1.022377 | 0.998858 | 0.992034 | 1.005006 | 1.035443 | 394299 | 0.555556 | 106 |
diabetes | 1.039350 | 1.010792 | 1.010792 | 1.000000 | 1.0 | 1.068308 | 1.068285 | 1.064774 | 1.070146 | 1.064774 | 768 | 0.000000 | 0 |
bank | 1.004567 | 1.004054 | 1.001465 | 0.995140 | 1.0 | 1.005959 | 1.005226 | 1.003224 | 1.003004 | 1.003761 | 45211 | 0.562500 | 44 |
dating | 0.995615 | 0.998630 | 0.996575 | 0.993697 | 1.0 | 1.001779 | 1.001099 | 0.996987 | 0.994656 | 1.002739 | 8378 | 0.508333 | 444 |
valley | 0.957214 | 1.048608 | 1.048608 | 1.000000 | 1.0 | 1.063240 | 1.112052 | 1.100282 | 1.203563 | 1.205072 | 1212 | 0.000000 | 0 |
cars | 1.113619 | 1.083640 | 1.077111 | 1.047150 | 1.0 | 1.124704 | 1.100506 | 1.103561 | 1.102473 | 1.105523 | 38531 | 0.793103 | 1246 |
fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"],
y=perfs_tuned_vs_untuned_scaled["xgboost_with_default_parameters"],
mode='markers',
marker_color="#189FDD",
name="xgboost"),
go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"],
y=perfs_tuned_vs_untuned_scaled["lightgbm_with_default_parameters"],
mode='markers',
marker_color="#76B644",
name="lightgbm"),
go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"],
y=perfs_tuned_vs_untuned_scaled["catboost_with_default_parameters"],
mode='markers',
marker_color="#FFCC00",
name="catboost")
])
fig.update_xaxes(type="log", title="Dataset length")
fig.update_yaxes(title="Performance difference with xgboost")
fig.update_layout(title="Performance of models with default parameters, given dataset length")
fig.show()
perfs_tuned_vs_untuned_scaled.sort_values("num_categories")
catboost_with_default_parameters | lightgbm_with_default_parameters | lightgbm_with_catboost_encoder_with_default_parameters | xgboost_with_catboost_encoder_with_default_parameters | xgboost_with_default_parameters | catboost_with_tuned_parameters | lightgbm_with_tuned_parameters | lightgbm_with_catboost_encoder_with_tuned_parameters | xgboost_with_catboost_encoder_with_tuned_parameters | xgboost_with_tuned_parameters | length | categorical_features_proportion | num_categories | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
california | 1.021441 | 1.004796 | 1.004796 | 1.000000 | 1.0 | 1.033683 | 1.027302 | 1.027000 | 1.023002 | 1.021651 | 20640 | 0.000000 | 0 |
diabetes | 1.039350 | 1.010792 | 1.010792 | 1.000000 | 1.0 | 1.068308 | 1.068285 | 1.064774 | 1.070146 | 1.064774 | 768 | 0.000000 | 0 |
valley | 0.957214 | 1.048608 | 1.048608 | 1.000000 | 1.0 | 1.063240 | 1.112052 | 1.100282 | 1.203563 | 1.205072 | 1212 | 0.000000 | 0 |
bank | 1.004567 | 1.004054 | 1.001465 | 0.995140 | 1.0 | 1.005959 | 1.005226 | 1.003224 | 1.003004 | 1.003761 | 45211 | 0.562500 | 44 |
ukair | 0.983004 | 0.961656 | 0.958858 | 0.991687 | 1.0 | 1.022377 | 0.998858 | 0.992034 | 1.005006 | 1.035443 | 394299 | 0.555556 | 106 |
adult | 1.004016 | 1.001793 | 0.999689 | 0.996797 | 1.0 | 1.005068 | 1.004829 | 1.003275 | 1.002701 | 1.003490 | 48842 | 0.857143 | 122 |
dating | 0.995615 | 0.998630 | 0.996575 | 0.993697 | 1.0 | 1.001779 | 1.001099 | 0.996987 | 0.994656 | 1.002739 | 8378 | 0.508333 | 444 |
cars | 1.113619 | 1.083640 | 1.077111 | 1.047150 | 1.0 | 1.124704 | 1.100506 | 1.103561 | 1.102473 | 1.105523 | 38531 | 0.793103 | 1246 |
fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"],
y=perfs_tuned_vs_untuned_scaled["xgboost_with_default_parameters"],
mode='markers',
marker_color="#189FDD",
name="xgboost"),
go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"],
y=perfs_tuned_vs_untuned_scaled["lightgbm_with_default_parameters"],
mode='markers',
marker_color="#76B644",
name="lightgbm"),
go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"],
y=perfs_tuned_vs_untuned_scaled["catboost_with_default_parameters"],
mode='markers',
marker_color="#FFCC00",
name="catboost")
])
fig.update_xaxes(type="log", title="Dataset length")
fig.update_yaxes(title="Performance difference with xgboost")
fig.update_layout(title="Performance of models with tuned parameters, given dataset length")
fig.show()
fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"],
y=perfs_tuned_vs_untuned_scaled["catboost_with_default_parameters"],
mode='markers',
marker_color="#FFCC00",
name="catboost"),
go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"],
y=perfs_tuned_vs_untuned_scaled["lightgbm_with_default_parameters"],
mode='markers',
marker_color="#76B644",
name="lightgbm"),
go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"],
y=perfs_tuned_vs_untuned_scaled["xgboost_with_default_parameters"],
mode='markers',
marker_color="#189FDD",
name="xgboost")
])
fig.update_xaxes(title="Dataset length")
fig.update_yaxes(title="Performance difference with xgboost")
fig.update_layout(title="Performance of models with default parameters, given proportion of categorical features")
fig.show()
fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"],
y=perfs_tuned_vs_untuned_scaled["catboost_with_tuned_parameters"],
mode='markers',
marker_color="#FFCC00",
name="catboost"),
go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"],
y=perfs_tuned_vs_untuned_scaled["lightgbm_with_tuned_parameters"],
mode='markers',
marker_color="#76B644",
name="lightgbm"),
go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"],
y=perfs_tuned_vs_untuned_scaled["xgboost_with_tuned_parameters"],
mode='markers',
marker_color="#189FDD",
name="xgboost")
])
fig.update_xaxes(title="Dataset length")
fig.update_yaxes(title="Performance difference with xgboost")
fig.update_layout(title="Performance of models with tuned parameters, given proportion of categorical features")
fig.show()
px.bar(perfs_tuned_vs_untuned[["xgboost_with_default_parameters", "lightgbm_with_default_parameters", "catboost_with_default_parameters",
"xgboost_with_tuned_parameters", "lightgbm_with_tuned_parameters", "catboost_with_tuned_parameters"]],
labels={
"index": "Dataset",
"value": "Score",
"variable": "Model",
},
barmode="group", template='xgridoff',
color_discrete_sequence=['#189FDD', "#76B644", "#FFCC00",
"dodgerblue", "olivedrab", "orange"])
px.bar(perfs_tuned_vs_untuned_scaled[["xgboost_with_default_parameters", "lightgbm_with_default_parameters", "catboost_with_default_parameters",
"xgboost_with_tuned_parameters", "lightgbm_with_tuned_parameters", "catboost_with_tuned_parameters"]],
labels={
"index": "Dataset",
"value": "Score",
"variable": "Model",
},
barmode="group", template='xgridoff',
color_discrete_sequence=['#189FDD', "#76B644", "#FFCC00",
"dodgerblue", "olivedrab", "orange"])
print("Mean score of each model, comparing to xgboost")
perfs_tuned_vs_untuned_scaled.drop(columns=["categorical_features_proportion", "num_categories", "length"]).dropna()\
.mean(axis=0)
Mean score of each model, comparing to xgboost
catboost_with_default_parameters 1.014853 lightgbm_with_default_parameters 1.014246 lightgbm_with_catboost_encoder_with_default_parameters 1.012237 xgboost_with_catboost_encoder_with_default_parameters 1.003059 xgboost_with_default_parameters 1.000000 catboost_with_tuned_parameters 1.040640 lightgbm_with_tuned_parameters 1.039770 lightgbm_with_catboost_encoder_with_tuned_parameters 1.036392 xgboost_with_catboost_encoder_with_tuned_parameters 1.050569 xgboost_with_tuned_parameters 1.055307 dtype: float64
print("Median score of each model, comparing to xgboost")
perfs_tuned_vs_untuned_scaled.drop(columns=["categorical_features_proportion", "num_categories", "length"]).dropna()\
.median(axis=0)
Median score of each model, comparing to xgboost
catboost_with_default_parameters 1.004291 lightgbm_with_default_parameters 1.004425 lightgbm_with_catboost_encoder_with_default_parameters 1.003131 xgboost_with_catboost_encoder_with_default_parameters 0.998398 xgboost_with_default_parameters 1.000000 catboost_with_tuned_parameters 1.028030 lightgbm_with_tuned_parameters 1.016264 lightgbm_with_catboost_encoder_with_tuned_parameters 1.015137 xgboost_with_catboost_encoder_with_tuned_parameters 1.014004 xgboost_with_tuned_parameters 1.028547 dtype: float64
print("Mean rank of each default model")
perfs_tuned_vs_untuned_scaled[["catboost_with_default_parameters", "lightgbm_with_default_parameters",
"xgboost_with_default_parameters"]].apply(lambda x: x.argsort().argsort(), axis=1).mean()
Mean rank of each default model
catboost_with_default_parameters 1.375 lightgbm_with_default_parameters 1.000 xgboost_with_default_parameters 0.625 dtype: float64
print("Mean rank of each tuned model")
perfs_tuned_vs_untuned_scaled[["catboost_with_tuned_parameters", "lightgbm_with_tuned_parameters",
"xgboost_with_tuned_parameters"]].apply(lambda x: x.argsort().argsort(), axis=1).mean()
Mean rank of each tuned model
catboost_with_tuned_parameters 1.500 lightgbm_with_tuned_parameters 0.625 xgboost_with_tuned_parameters 0.875 dtype: float64
training_times = pd.DataFrame({dataset_name: {model_name: default_perfs[dataset_name][model_name]["training_time"]
for model_name in default_perfs[dataset_name].keys()}
for dataset_name in default_perfs.keys()}).transpose()
prediction_times = pd.DataFrame({dataset_name: {model_name: default_perfs[dataset_name][model_name]["prediction_time"]
for model_name in default_perfs[dataset_name].keys()}
for dataset_name in default_perfs.keys()}).transpose()
times_df = pd.concat([training_times, prediction_times], axis=1)
times_df
catboost | lightgbm | lightgbm_with_catboost_encoder | xgboost_with_catboost_encoder | xgboost | catboost | lightgbm | lightgbm_with_catboost_encoder | xgboost_with_catboost_encoder | xgboost | |
---|---|---|---|---|---|---|---|---|---|---|
california | 24.689261 | 0.178142 | 0.171883 | 119.791898 | 118.781466 | 0.014021 | 0.031725 | 0.031709 | 0.048880 | 0.042857 |
adult | 149.724603 | 0.811872 | 1.547185 | 129.368296 | 112.526765 | 0.129134 | 0.100814 | 0.222690 | 0.194931 | 0.149172 |
ukair | 144.291018 | 3.167551 | 3.082787 | 65.907621 | 33.585652 | 0.725683 | 1.181819 | 1.214606 | 0.544002 | 0.187727 |
diabetes | 9.472824 | 0.070993 | 0.180932 | 151.992842 | 153.275499 | 0.011634 | 0.004420 | 0.010192 | 0.143612 | 0.157037 |
bank | 119.778596 | 0.450672 | 1.307890 | 128.355890 | 117.680134 | 0.064928 | 0.087015 | 0.091556 | 0.206689 | 0.127684 |
dating | 218.227686 | 2.734924 | 2.686051 | 240.914340 | 224.078994 | 0.189192 | 0.205373 | 0.168617 | 0.556958 | 0.273364 |
valley | 70.126429 | 1.343469 | 2.123457 | 182.244021 | 182.811797 | 0.016280 | 0.004819 | 0.004638 | 0.138728 | 0.125285 |
cars | 141.749353 | 1.185837 | 1.893203 | 143.051520 | 135.966851 | 0.933087 | 0.122637 | 0.194635 | 0.281825 | 0.178372 |
px.bar(training_times[["xgboost", "lightgbm", "catboost"]], barmode="group", log_y=True, labels={
"index": "Dataset",
"value": "Training time",
"variable": "Model",
},
template='xgridoff',
color_discrete_sequence=['#189FDD', "#76B644", "#FFCC00",
"dodgerblue", "olivedrab", "orange"])
px.bar(prediction_times[["xgboost", "lightgbm", "catboost"]], barmode="group", log_y=True, labels={
"index": "Dataset",
"value": "Training time",
"variable": "Model",
},
template='xgridoff',
color_discrete_sequence=['#189FDD', "#76B644", "#FFCC00",
"dodgerblue", "olivedrab", "orange"])
print("Mean training times")
training_times.mean(axis=0)
Mean training times
catboost 109.757471 lightgbm 1.242933 lightgbm_with_catboost_encoder 1.624173 xgboost_with_catboost_encoder 145.203303 xgboost 134.838395 dtype: float64
print("Mdian training times comparing to xgboost training time")
training_times.assign(**{col_name: training_times[col_name] / training_times["xgboost"]
for col_name in training_times.columns}).median(axis=0)
Mdian training times comparing to xgboost training time
catboost 0.995860 lightgbm 0.007282 lightgbm_with_catboost_encoder 0.011801 xgboost_with_catboost_encoder 1.063619 xgboost 1.000000 dtype: float64
print("Mean prediction times")
prediction_times.mean(axis=0)
Mean prediction times
catboost 0.260495 lightgbm 0.217328 lightgbm_with_catboost_encoder 0.242330 xgboost_with_catboost_encoder 0.264453 xgboost 0.155187 dtype: float64
print("Mdian prediction times comparing to xgboost prediction time")
prediction_times.assign(**{col_name: prediction_times[col_name] / prediction_times["xgboost"]
for col_name in prediction_times.columns}).median(axis=0)
Mdian prediction times comparing to xgboost prediction time
catboost 0.600297 lightgbm 0.684511 lightgbm_with_catboost_encoder 0.728463 xgboost_with_catboost_encoder 1.443369 xgboost 1.000000 dtype: float64