import numpy as np
from sklearn.datasets import load_boston, load_breast_cancer, load_iris
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.inspection import partial_dependence as skpartial_dependence
def partial_dependence(estimator, X, features, grid_resolution=100):
if len(np.unique(X[:, features])) < grid_resolution:
values = np.unique(X[:, features])
else:
values = np.linspace(np.min(X[:, features]), np.max(X[:, features]),
num=grid_resolution, endpoint=True)
if estimator._estimator_type == "regressor":
prediction_method = estimator.predict
else: # estimator._estimator_type == "classifier"
prediction_method = estimator.predict_proba
averaged_predictions = []
for value in values:
X_eval = X.copy()
X_eval[:, features] = value
predictions = prediction_method(X_eval)
averaged_predictions.append(np.mean(predictions, axis=0))
averaged_predictions = np.array(averaged_predictions).T
if estimator._estimator_type == "regressor":
averaged_predictions = averaged_predictions.reshape(1, -1)
elif estimator._estimator_type == "classifier" and averaged_predictions.shape[0] == 2:
averaged_predictions = averaged_predictions[1].reshape(1, -1)
return averaged_predictions, values
# regression
X, y = load_boston(return_X_y=True)
clf = RandomForestRegressor(random_state=0).fit(X, y)
for i in range(X.shape[1]):
ans1 = partial_dependence(clf, X, i)
ans2 = skpartial_dependence(clf, X, i, percentiles=(0, 1))
assert np.allclose(ans1[0], ans2[0])
assert np.allclose(ans1[1], ans2[1][0])
# binary classification
X, y = load_breast_cancer(return_X_y=True)
clf = RandomForestClassifier(random_state=0).fit(X, y)
for i in range(X.shape[1]):
ans1 = partial_dependence(clf, X, i)
ans2 = skpartial_dependence(clf, X, i, percentiles=(0, 1))
assert np.allclose(ans1[1], ans2[1][0])
# multiclass classification
X, y = load_iris(return_X_y=True)
clf = RandomForestClassifier(random_state=0).fit(X, y)
for i in range(X.shape[1]):
ans1 = partial_dependence(clf, X, i)
ans2 = skpartial_dependence(clf, X, i, percentiles=(0, 1))
assert np.allclose(ans1[1], ans2[1][0])