import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.calibration import calibration_curve as skcalibration_curve
def calibration_curve(y_true, y_prob, normalize=False, n_bins=5):
if normalize:
y_prob = (y_prob - y_prob.min()) / (y_prob.max() - y_prob.min())
bins = np.linspace(0, 1 + 1e-8, n_bins + 1)
binids = np.digitize(y_prob, bins) - 1
bin_sums = np.bincount(binids, weights=y_prob, minlength=len(bins))
bin_true = np.bincount(binids, weights=y_true, minlength=len(bins))
bin_total = np.bincount(binids, minlength=len(bins))
nonzero = bin_total != 0
prob_true = (bin_true[nonzero] / bin_total[nonzero])
prob_pred = (bin_sums[nonzero] / bin_total[nonzero])
return prob_true, prob_pred
X, y = load_iris(return_X_y=True)
X, y = X[y != 2], y[y != 2]
clf = LogisticRegression(max_iter=10000).fit(X, y)
y_prob = clf.predict_proba(X)[:, 1]
prob_true1, prob_pred1 = calibration_curve(y, y_prob)
prob_true2, prob_pred2 = skcalibration_curve(y, y_prob)
X, y = load_iris(return_X_y=True)
X, y = X[y != 2], y[y != 2]
clf = LinearSVC(max_iter=10000).fit(X, y)
y_prob = clf.decision_function(X)
prob_true1, prob_pred1 = calibration_curve(y, y_prob, normalize=True)
prob_true2, prob_pred2 = skcalibration_curve(y, y_prob, normalize=True)