import numpy as np
from sklearn.datasets import make_classification
from sklearn.metrics import brier_score_loss,log_loss
from sklearn.model_selection import train_test_split
from sklearn.calibration import CalibratedClassifierCV,calibration_curve
from sklearn import metrics
from sklearn.ensemble import GradientBoostingClassifier
import matplotlib.pyplot as plt
np.random.seed(42)
X, y = make_classification(n_samples=150000,n_features=10,n_informative=5,n_redundant=5, class_sep=0.05)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.60)
clf = GradientBoostingClassifier()
clf.fit(X_train,y_train)
GradientBoostingClassifier(criterion='friedman_mse', init=None, learning_rate=0.1, loss='deviance', max_depth=3, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=100, presort='auto', random_state=None, subsample=1.0, verbose=0, warm_start=False)
ccv_sig = CalibratedClassifierCV(clf,cv='prefit',method='sigmoid')
ccv_sig.fit(X_train,y_train)
CalibratedClassifierCV(base_estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None, learning_rate=0.1, loss='deviance', max_depth=3, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=100, presort='auto', random_state=None, subsample=1.0, verbose=0, warm_start=False), cv='prefit', method='sigmoid')
ccv_iso = CalibratedClassifierCV(clf,cv='prefit',method='isotonic')
ccv_iso.fit(X_train,y_train)
CalibratedClassifierCV(base_estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None, learning_rate=0.1, loss='deviance', max_depth=3, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=100, presort='auto', random_state=None, subsample=1.0, verbose=0, warm_start=False), cv='prefit', method='isotonic')
fig,axes = plt.subplots(1,3,sharey=True)
y_preds = clf.predict_proba(X_test)
clf_preds = y_preds[:,1]
fpr, tpr, _ = metrics.roc_curve(y_test, clf_preds)
auc_score = metrics.auc(fpr, tpr)
axes[0].set_title('ROC Curve - original classifier')
axes[0].plot(fpr, tpr, label='AUC = {:.5f}'.format(auc_score))
axes[0].plot([0,1],[0,1],'k:')
axes[0].set_xlim([-0.1,1.1])
axes[0].set_ylim([-0.1,1.1])
axes[0].set_ylabel('True Positive Rate')
axes[0].set_xlabel('False Positive Rate')
axes[0].legend(loc='lower right')
## CCV sigmoid
y_preds = ccv_sig.predict_proba(X_test)
ccv_preds_sig = y_preds[:,1]
fpr, tpr, _ = metrics.roc_curve(y_test, ccv_preds_sig)
auc_score = metrics.auc(fpr, tpr)
axes[1].set_title('ROC Curve - Calibrated classifier')
axes[1].plot(fpr, tpr, label='AUC = {:.5f}'.format(auc_score))
axes[1].plot([0,1],[0,1],'k:')
axes[1].set_xlim([-0.1,1.1])
axes[1].set_ylim([-0.1,1.1])
axes[1].set_ylabel('True Positive Rate')
axes[1].set_xlabel('False Positive Rate')
axes[1].legend(loc='lower right')
## CCV isotonic
y_preds = ccv_iso.predict_proba(X_test)
ccv_preds_iso = y_preds[:,1]
fpr, tpr, _ = metrics.roc_curve(y_test, ccv_preds_iso)
auc_score = metrics.auc(fpr, tpr)
axes[2].set_title('ROC Curve - Calibrated classifier')
axes[2].plot(fpr, tpr, label='AUC = {:.5f}'.format(auc_score))
axes[2].plot([0,1],[0,1],'k:')
axes[2].set_xlim([-0.1,1.1])
axes[2].set_ylim([-0.1,1.1])
axes[2].set_ylabel('True Positive Rate')
axes[2].set_xlabel('False Positive Rate')
axes[2].legend(loc='lower right')
plt.gcf().set_size_inches(15,5)
plt.show()
plt.clf()
fig, axes = plt.subplots(1,2,sharey=True)
# SIGMOID CALIBRATION
ax=axes[0]
ax.set_xlim([-0.1,1.1])
ax.set_ylim([-0.1,1.1])
ax.plot([0, 1], [0, 1], "k:", label="Perfect calibration")
clf_score = brier_score_loss(y_test, clf_preds, pos_label=1)
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, clf_preds, n_bins=30)
ax.plot(mean_predicted_value, fraction_of_positives, "r-", label="No Calibration (Brier loss={:.3f})".format(clf_score))
clf_score = brier_score_loss(y_test, ccv_preds_sig, pos_label=1)
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, ccv_preds_sig, n_bins=30)
ax.plot(mean_predicted_value, fraction_of_positives, "b-", label="Sigmoid Calibration (Brier loss={:.3f})".format(clf_score))
ax.legend(loc='lower right')
ax.set_title('Original vs Sigmoid Calibration', size=16)
plt.subplots_adjust(top=0.85)
# ISOTONIC CALIBRATION
ax=axes[1]
ax.set_xlim([-0.1,1.1])
ax.set_ylim([-0.1,1.1])
ax.plot([0, 1], [0, 1], "k:", label="Perfect calibration")
clf_score = brier_score_loss(y_test, clf_preds, pos_label=1)
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, clf_preds, n_bins=30)
ax.plot(mean_predicted_value, fraction_of_positives, "r-", label="No Calibration (Brier loss={:.3f})".format(clf_score))
clf_score = brier_score_loss(y_test, ccv_preds_iso, pos_label=1)
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, ccv_preds_iso, n_bins=30)
ax.plot(mean_predicted_value, fraction_of_positives, "b-", label="Isotonic Calibration (Brier loss={:.3f})".format(clf_score))
ax.legend(loc='lower right')
ax.set_title('Original vs Isotonic Calibration', size=16)
plt.subplots_adjust(top=0.85)
plt.gcf().set_size_inches(12,6)
plt.show()
<matplotlib.figure.Figure at 0x7f63a2840208>