import numpy as np
from sklearn.base import clone
from sklearn.datasets import load_boston, load_iris
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import validation_curve as skvalidation_curve
def validation_curve(estimator, X, y, param_name, param_range):
if estimator._estimator_type == "regressor":
cv = KFold()
else: # estimator._estimator_type == "classifier"
cv = StratifiedKFold()
train_scores = np.zeros((len(param_range), cv.n_splits))
test_scores = np.zeros((len(param_range), cv.n_splits))
for i, param in enumerate(param_range):
for j, (train, test) in enumerate(cv.split(X, y)):
est = clone(estimator)
est.set_params(**{param_name: param})
est.fit(X[train], y[train])
train_scores[i, j] = est.score(X[train], y[train])
test_scores[i, j] = est.score(X[test], y[test])
return train_scores, test_scores
# regression
X, y = load_boston(return_X_y=True)
clf = DecisionTreeRegressor(random_state=0)
ans1 = validation_curve(clf, X, y, "max_depth", [2, 4, 6, 8, 10])
ans2 = validation_curve(clf, X, y, "max_depth", [2, 4, 6, 8, 10])
assert np.allclose(ans1[0], ans2[0])
assert np.allclose(ans1[1], ans2[1])
# classification
X, y = load_iris(return_X_y=True)
clf = DecisionTreeClassifier(random_state=0)
ans1 = validation_curve(clf, X, y, "max_depth", [1, 2, 3, 4, 5])
ans2 = validation_curve(clf, X, y, "max_depth", [1, 2, 3, 4, 5])
assert np.allclose(ans1[0], ans2[0])
assert np.allclose(ans1[1], ans2[1])