import numpy as np import shap import sklearn from sklearn.model_selection import train_test_split ## Census income X, y = shap.datasets.adult() X = X.values print("Original dataframe shape", X.shape) n_samples, n_features = X.shape # Add noise random_state = np.random.RandomState(0) X = X + 4 * random_state.randn(n_samples, n_features) X = np.c_[X, random_state.randn(n_samples, 100 * n_features)] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.25, random_state=random_state ) print("Noisy dataframe shape", X.shape) print("Classes", np.unique(y)) X_test, X_calib, y_test, y_calib = train_test_split( X_test, y_test, test_size=4000 / len(X_test), random_state=random_state, ) print("学習・補正・テスト用データ比率", np.array([len(X_train), len(X_calib), len(X_test)]) / len(X)) from autogluon.tabular import TabularDataset tr_data = TabularDataset(X_train) tr_data["y"] = y_train tr_data.head() te_data = TabularDataset(X_test) te_data["y"] = y_test te_data.head() tr_data.y.describe() te_data.y.describe() from autogluon.tabular import TabularPredictor save_path = "trained-model" predictor = TabularPredictor(label="y", path=save_path).fit( tr_data, hyperparameters="toy", time_limit=30 ) from fastcore.basics import store_attr class AutoGluonWrapper: """ sklearnを使って信頼性曲線を描いたり、確率補正を行うために、 必要なインタフェースを用意する。 """ def __init__( self, trained_model_path, # AutoGluon学習済みモデルの保存パス classes_, # sklearn APIに求められる属性 ): store_attr() def load_model(self): """ AutoGluon学習済みモデルをロード """ self.ag_model = TabularPredictor.load(self.trained_model_path) def fit(self): """ sklearn API に求められるメソッド """ return True def predict_proba(self, X): """ sklearn API に求められるメソッド """ X = TabularDataset(X) proba = self.ag_model.predict_proba(X) return proba.values ag_ = AutoGluonWrapper(save_path, classes_=np.unique(y)) ag_.load_model() from kowaza.proba_calib import plot_calibration_curve plot_calibration_curve(dict(AutoGluon=ag_), X_test, y_test) from sklearn.calibration import CalibratedClassifierCV, calibration_curve sigmoid = CalibratedClassifierCV(ag_, cv="prefit", method="sigmoid") sigmoid.fit(X_calib, y_calib) isotonic = CalibratedClassifierCV(ag_, cv="prefit", method="isotonic") isotonic.fit(X_calib, y_calib) plot_calibration_curve( dict( AutoGluon=ag_, Sigmoid=sigmoid, Isotonic=isotonic, ), X_test, y_test, )