In [1]:

from sklearn import datasets
import pandas as pd
pd.set_option("display.max_columns", 100)
from comparison.model_comparison import ModelComparison, ModelName
from comparison.comparison_datasets import TaskName
from comparison.tuned_model_comparison import TunedModelComparison
import plotly.express as px
import plotly.graph_objects as go
import json

Loading data¶

In [2]:

with open("tuned_perf_comparison.json", "r") as input_stream:
    tuned_perfs = json.load(input_stream)
with open("perf_comparison.json", "r") as input_stream:
    default_perfs = json.load(input_stream)

In [3]:

tuned_scores = {dataset_name: {model_name + "_with_tuned_parameters": tuned_perfs[dataset_name][model_name]["model_score"] 
                for model_name in tuned_perfs[dataset_name].keys()}
                for dataset_name in tuned_perfs.keys()}

In [4]:

untuned_perfs = {dataset_name: {model_name + "_with_default_parameters": default_perfs[dataset_name][model_name]["model_score"] 
                for model_name in default_perfs[dataset_name].keys()}
                for dataset_name in default_perfs.keys()}

In [5]:

perfs_tuned_vs_untuned = pd.concat([pd.DataFrame(untuned_perfs), pd.DataFrame(tuned_scores)])\
                                .transpose().fillna(0.773)
perfs_tuned_vs_untuned

Out[5]:

	catboost_with_default_parameters	lightgbm_with_default_parameters	lightgbm_with_catboost_encoder_with_default_parameters	xgboost_with_catboost_encoder_with_default_parameters	xgboost_with_default_parameters	catboost_with_tuned_parameters	lightgbm_with_tuned_parameters	lightgbm_with_catboost_encoder_with_tuned_parameters	xgboost_with_catboost_encoder_with_tuned_parameters	xgboost_with_tuned_parameters
california	0.849406	0.835564	0.835564	0.831576	0.831576	0.859586	0.854280	0.854029	0.850704	0.849581
adult	0.859957	0.858053	0.856251	0.853773	0.856517	0.860857	0.860653	0.859322	0.858830	0.859506
ukair	0.822140	0.804285	0.801945	0.829402	0.836355	0.855069	0.835399	0.829692	0.840542	0.865997
diabetes	0.753759	0.733049	0.733049	0.725222	0.725222	0.774761	0.774744	0.772198	0.776094	0.772198
bank	0.909823	0.909358	0.907014	0.901285	0.905687	0.911084	0.910420	0.908606	0.908407	0.909093
dating	0.867390	0.870016	0.868226	0.865719	0.871210	0.872760	0.872168	0.868585	0.866554	0.873596
valley	0.535493	0.586621	0.586621	0.559428	0.559428	0.594807	0.622114	0.615529	0.673307	0.674151
cars	0.535673	0.521252	0.518111	0.503700	0.481019	0.541005	0.529365	0.530834	0.530311	0.531778

In [6]:

dataset_lengths = {dataset_name: default_perfs[dataset_name]["catboost"]["dataset_length"] 
                   for dataset_name in default_perfs.keys()}
num_categories = {dataset_name: default_perfs[dataset_name]["catboost"]["num_categories"] 
                  for dataset_name in default_perfs.keys()}
prop_categorical = {dataset_name: default_perfs[dataset_name]["catboost"]["num_categorical_features"] / float(default_perfs[dataset_name]["catboost"]["num_features"])
                    for dataset_name in default_perfs.keys()}

In [7]:

perfs_tuned_vs_untuned_scaled = perfs_tuned_vs_untuned.assign(**{col_name: perfs_tuned_vs_untuned[col_name] / perfs_tuned_vs_untuned["xgboost_with_default_parameters"]
                                                                 for col_name in perfs_tuned_vs_untuned.columns})\
                                                        .assign(length=pd.Series(dataset_lengths),
                                                               categorical_features_proportion=pd.Series(prop_categorical),
                                                               num_categories=pd.Series(num_categories))
perfs_tuned_vs_untuned_scaled

Out[7]:

	catboost_with_default_parameters	lightgbm_with_default_parameters	lightgbm_with_catboost_encoder_with_default_parameters	xgboost_with_catboost_encoder_with_default_parameters	xgboost_with_default_parameters	catboost_with_tuned_parameters	lightgbm_with_tuned_parameters	lightgbm_with_catboost_encoder_with_tuned_parameters	xgboost_with_catboost_encoder_with_tuned_parameters	xgboost_with_tuned_parameters	length	categorical_features_proportion	num_categories
california	1.021441	1.004796	1.004796	1.000000	1.0	1.033683	1.027302	1.027000	1.023002	1.021651	20640	0.000000	0
adult	1.004016	1.001793	0.999689	0.996797	1.0	1.005068	1.004829	1.003275	1.002701	1.003490	48842	0.857143	122
ukair	0.983004	0.961656	0.958858	0.991687	1.0	1.022377	0.998858	0.992034	1.005006	1.035443	394299	0.555556	106
diabetes	1.039350	1.010792	1.010792	1.000000	1.0	1.068308	1.068285	1.064774	1.070146	1.064774	768	0.000000	0
bank	1.004567	1.004054	1.001465	0.995140	1.0	1.005959	1.005226	1.003224	1.003004	1.003761	45211	0.562500	44
dating	0.995615	0.998630	0.996575	0.993697	1.0	1.001779	1.001099	0.996987	0.994656	1.002739	8378	0.508333	444
valley	0.957214	1.048608	1.048608	1.000000	1.0	1.063240	1.112052	1.100282	1.203563	1.205072	1212	0.000000	0
cars	1.113619	1.083640	1.077111	1.047150	1.0	1.124704	1.100506	1.103561	1.102473	1.105523	38531	0.793103	1246

Dataset length impact¶

In [9]:

fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"], 
                                y=perfs_tuned_vs_untuned_scaled["xgboost_with_default_parameters"],
                                mode='markers',
                               marker_color="#189FDD",
                                name="xgboost"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"], 
                                y=perfs_tuned_vs_untuned_scaled["lightgbm_with_default_parameters"],
                                mode='markers',
                               marker_color="#76B644",
                                name="lightgbm"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"], 
                                y=perfs_tuned_vs_untuned_scaled["catboost_with_default_parameters"],
                                mode='markers',
                               marker_color="#FFCC00",
                                name="catboost")
                     ])
fig.update_xaxes(type="log", title="Dataset length")
fig.update_yaxes(title="Performance difference with xgboost")

fig.update_layout(title="Performance of models with default parameters, given dataset length")
fig.show()

In [10]:

perfs_tuned_vs_untuned_scaled.sort_values("num_categories")

Out[10]:

	catboost_with_default_parameters	lightgbm_with_default_parameters	lightgbm_with_catboost_encoder_with_default_parameters	xgboost_with_catboost_encoder_with_default_parameters	xgboost_with_default_parameters	catboost_with_tuned_parameters	lightgbm_with_tuned_parameters	lightgbm_with_catboost_encoder_with_tuned_parameters	xgboost_with_catboost_encoder_with_tuned_parameters	xgboost_with_tuned_parameters	length	categorical_features_proportion	num_categories
california	1.021441	1.004796	1.004796	1.000000	1.0	1.033683	1.027302	1.027000	1.023002	1.021651	20640	0.000000	0
diabetes	1.039350	1.010792	1.010792	1.000000	1.0	1.068308	1.068285	1.064774	1.070146	1.064774	768	0.000000	0
valley	0.957214	1.048608	1.048608	1.000000	1.0	1.063240	1.112052	1.100282	1.203563	1.205072	1212	0.000000	0
bank	1.004567	1.004054	1.001465	0.995140	1.0	1.005959	1.005226	1.003224	1.003004	1.003761	45211	0.562500	44
ukair	0.983004	0.961656	0.958858	0.991687	1.0	1.022377	0.998858	0.992034	1.005006	1.035443	394299	0.555556	106
adult	1.004016	1.001793	0.999689	0.996797	1.0	1.005068	1.004829	1.003275	1.002701	1.003490	48842	0.857143	122
dating	0.995615	0.998630	0.996575	0.993697	1.0	1.001779	1.001099	0.996987	0.994656	1.002739	8378	0.508333	444
cars	1.113619	1.083640	1.077111	1.047150	1.0	1.124704	1.100506	1.103561	1.102473	1.105523	38531	0.793103	1246

In [11]:

fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"], 
                                y=perfs_tuned_vs_untuned_scaled["xgboost_with_default_parameters"],
                                mode='markers',
                               marker_color="#189FDD",
                                name="xgboost"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"], 
                                y=perfs_tuned_vs_untuned_scaled["lightgbm_with_default_parameters"],
                                mode='markers',
                               marker_color="#76B644",
                                name="lightgbm"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"], 
                                y=perfs_tuned_vs_untuned_scaled["catboost_with_default_parameters"],
                                mode='markers',
                               marker_color="#FFCC00",
                                name="catboost")
                     ])
fig.update_xaxes(type="log", title="Dataset length")
fig.update_yaxes(title="Performance difference with xgboost")

fig.update_layout(title="Performance of models with tuned parameters, given dataset length")
fig.show()

Proportion of categorical features impact¶

In [12]:

fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"], 
                                y=perfs_tuned_vs_untuned_scaled["catboost_with_default_parameters"],
                                mode='markers',
                               marker_color="#FFCC00",
                                name="catboost"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"], 
                                y=perfs_tuned_vs_untuned_scaled["lightgbm_with_default_parameters"],
                                mode='markers',
                               marker_color="#76B644",
                                name="lightgbm"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"], 
                                y=perfs_tuned_vs_untuned_scaled["xgboost_with_default_parameters"],
                                mode='markers',
                               marker_color="#189FDD",
                                name="xgboost")
                     ])
fig.update_xaxes(title="Dataset length")
fig.update_yaxes(title="Performance difference with xgboost")

fig.update_layout(title="Performance of models with default parameters, given proportion of categorical features")
fig.show()

In [13]:

fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"], 
                                y=perfs_tuned_vs_untuned_scaled["catboost_with_tuned_parameters"],
                                mode='markers',
                               marker_color="#FFCC00",
                                name="catboost"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"], 
                                y=perfs_tuned_vs_untuned_scaled["lightgbm_with_tuned_parameters"],
                                mode='markers',
                               marker_color="#76B644",
                                name="lightgbm"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"], 
                                y=perfs_tuned_vs_untuned_scaled["xgboost_with_tuned_parameters"],
                                mode='markers',
                               marker_color="#189FDD",
                                name="xgboost")
                     ])
fig.update_xaxes(title="Dataset length")
fig.update_yaxes(title="Performance difference with xgboost")

fig.update_layout(title="Performance of models with tuned parameters, given proportion of categorical features")
fig.show()

All performances¶

In [36]:

px.bar(perfs_tuned_vs_untuned[["xgboost_with_default_parameters", "lightgbm_with_default_parameters", "catboost_with_default_parameters",
                              "xgboost_with_tuned_parameters", "lightgbm_with_tuned_parameters", "catboost_with_tuned_parameters"]],
       labels={
             "index": "Dataset",
             "value": "Score",
             "variable": "Model",
                 },
       barmode="group", template='xgridoff',
      color_discrete_sequence=['#189FDD', "#76B644", "#FFCC00",
                              "dodgerblue", "olivedrab", "orange"])

In [20]:

px.bar(perfs_tuned_vs_untuned_scaled[["xgboost_with_default_parameters", "lightgbm_with_default_parameters", "catboost_with_default_parameters",
                              "xgboost_with_tuned_parameters", "lightgbm_with_tuned_parameters", "catboost_with_tuned_parameters"]],
       labels={
             "index": "Dataset",
             "value": "Score",
             "variable": "Model",
                 },
       barmode="group", template='xgridoff',
      color_discrete_sequence=['#189FDD', "#76B644", "#FFCC00",
                              "dodgerblue", "olivedrab", "orange"])

In [21]:

print("Mean score of each model, comparing to xgboost")
perfs_tuned_vs_untuned_scaled.drop(columns=["categorical_features_proportion", "num_categories", "length"]).dropna()\
                             .mean(axis=0)

Mean score of each model, comparing to xgboost

Out[21]:

catboost_with_default_parameters                          1.014853
lightgbm_with_default_parameters                          1.014246
lightgbm_with_catboost_encoder_with_default_parameters    1.012237
xgboost_with_catboost_encoder_with_default_parameters     1.003059
xgboost_with_default_parameters                           1.000000
catboost_with_tuned_parameters                            1.040640
lightgbm_with_tuned_parameters                            1.039770
lightgbm_with_catboost_encoder_with_tuned_parameters      1.036392
xgboost_with_catboost_encoder_with_tuned_parameters       1.050569
xgboost_with_tuned_parameters                             1.055307
dtype: float64

In [22]:

print("Median score of each model, comparing to xgboost")
perfs_tuned_vs_untuned_scaled.drop(columns=["categorical_features_proportion", "num_categories", "length"]).dropna()\
                             .median(axis=0)

Median score of each model, comparing to xgboost

Out[22]:

catboost_with_default_parameters                          1.004291
lightgbm_with_default_parameters                          1.004425
lightgbm_with_catboost_encoder_with_default_parameters    1.003131
xgboost_with_catboost_encoder_with_default_parameters     0.998398
xgboost_with_default_parameters                           1.000000
catboost_with_tuned_parameters                            1.028030
lightgbm_with_tuned_parameters                            1.016264
lightgbm_with_catboost_encoder_with_tuned_parameters      1.015137
xgboost_with_catboost_encoder_with_tuned_parameters       1.014004
xgboost_with_tuned_parameters                             1.028547
dtype: float64

In [23]:

print("Mean rank of each default model")
perfs_tuned_vs_untuned_scaled[["catboost_with_default_parameters", "lightgbm_with_default_parameters", 
                               "xgboost_with_default_parameters"]].apply(lambda x: x.argsort().argsort(), axis=1).mean()

Mean rank of each default model

Out[23]:

catboost_with_default_parameters    1.375
lightgbm_with_default_parameters    1.000
xgboost_with_default_parameters     0.625
dtype: float64

In [24]:

print("Mean rank of each tuned model")
perfs_tuned_vs_untuned_scaled[["catboost_with_tuned_parameters", "lightgbm_with_tuned_parameters", 
                               "xgboost_with_tuned_parameters"]].apply(lambda x: x.argsort().argsort(), axis=1).mean()

Mean rank of each tuned model

Out[24]:

catboost_with_tuned_parameters    1.500
lightgbm_with_tuned_parameters    0.625
xgboost_with_tuned_parameters     0.875
dtype: float64

Training and prediction time¶

In [25]:

training_times = pd.DataFrame({dataset_name: {model_name: default_perfs[dataset_name][model_name]["training_time"] 
                for model_name in default_perfs[dataset_name].keys()}
                for dataset_name in default_perfs.keys()}).transpose()
prediction_times = pd.DataFrame({dataset_name: {model_name: default_perfs[dataset_name][model_name]["prediction_time"] 
                for model_name in default_perfs[dataset_name].keys()}
                for dataset_name in default_perfs.keys()}).transpose()
times_df = pd.concat([training_times, prediction_times], axis=1)

In [26]:

times_df

Out[26]:

	catboost	lightgbm	lightgbm_with_catboost_encoder	xgboost_with_catboost_encoder	xgboost	catboost	lightgbm	lightgbm_with_catboost_encoder	xgboost_with_catboost_encoder	xgboost
california	24.689261	0.178142	0.171883	119.791898	118.781466	0.014021	0.031725	0.031709	0.048880	0.042857
adult	149.724603	0.811872	1.547185	129.368296	112.526765	0.129134	0.100814	0.222690	0.194931	0.149172
ukair	144.291018	3.167551	3.082787	65.907621	33.585652	0.725683	1.181819	1.214606	0.544002	0.187727
diabetes	9.472824	0.070993	0.180932	151.992842	153.275499	0.011634	0.004420	0.010192	0.143612	0.157037
bank	119.778596	0.450672	1.307890	128.355890	117.680134	0.064928	0.087015	0.091556	0.206689	0.127684
dating	218.227686	2.734924	2.686051	240.914340	224.078994	0.189192	0.205373	0.168617	0.556958	0.273364
valley	70.126429	1.343469	2.123457	182.244021	182.811797	0.016280	0.004819	0.004638	0.138728	0.125285
cars	141.749353	1.185837	1.893203	143.051520	135.966851	0.933087	0.122637	0.194635	0.281825	0.178372

In [27]:

px.bar(training_times[["xgboost", "lightgbm", "catboost"]], barmode="group", log_y=True, labels={
             "index": "Dataset",
             "value": "Training time",
             "variable": "Model",
                 },
       template='xgridoff',
      color_discrete_sequence=['#189FDD', "#76B644", "#FFCC00",
                              "dodgerblue", "olivedrab", "orange"])

In [28]:

px.bar(prediction_times[["xgboost", "lightgbm", "catboost"]], barmode="group", log_y=True, labels={
             "index": "Dataset",
             "value": "Training time",
             "variable": "Model",
                 },
       template='xgridoff',
      color_discrete_sequence=['#189FDD', "#76B644", "#FFCC00",
                              "dodgerblue", "olivedrab", "orange"])

In [29]:

print("Mean training times")
training_times.mean(axis=0)

Mean training times

Out[29]:

catboost                          109.757471
lightgbm                            1.242933
lightgbm_with_catboost_encoder      1.624173
xgboost_with_catboost_encoder     145.203303
xgboost                           134.838395
dtype: float64

In [30]:

print("Mdian training times comparing to xgboost training time")
training_times.assign(**{col_name: training_times[col_name] / training_times["xgboost"]
                        for col_name in training_times.columns}).median(axis=0)

Mdian training times comparing to xgboost training time

Out[30]:

catboost                          0.995860
lightgbm                          0.007282
lightgbm_with_catboost_encoder    0.011801
xgboost_with_catboost_encoder     1.063619
xgboost                           1.000000
dtype: float64

In [31]:

print("Mean prediction times")
prediction_times.mean(axis=0)

Mean prediction times

Out[31]:

catboost                          0.260495
lightgbm                          0.217328
lightgbm_with_catboost_encoder    0.242330
xgboost_with_catboost_encoder     0.264453
xgboost                           0.155187
dtype: float64

In [32]:

print("Mdian prediction times comparing to xgboost prediction time")
prediction_times.assign(**{col_name: prediction_times[col_name] / prediction_times["xgboost"]
                        for col_name in prediction_times.columns}).median(axis=0)

Mdian prediction times comparing to xgboost prediction time

Out[32]:

catboost                          0.600297
lightgbm                          0.684511
lightgbm_with_catboost_encoder    0.728463
xgboost_with_catboost_encoder     1.443369
xgboost                           1.000000
dtype: float64