Notebook

In [1]:

import numpy as np
from sklearn.base import clone
from sklearn.datasets import load_boston, load_iris
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import validation_curve as skvalidation_curve

In [2]:

def validation_curve(estimator, X, y, param_name, param_range):
    if estimator._estimator_type == "regressor":
        cv = KFold()
    else:  # estimator._estimator_type == "classifier"
        cv = StratifiedKFold()
    train_scores = np.zeros((len(param_range), cv.n_splits))
    test_scores = np.zeros((len(param_range), cv.n_splits))
    for i, param in enumerate(param_range):
        for j, (train, test) in enumerate(cv.split(X, y)):
            est = clone(estimator)
            est.set_params(**{param_name: param})
            est.fit(X[train], y[train])
            train_scores[i, j] = est.score(X[train], y[train])
            test_scores[i, j] = est.score(X[test], y[test])
    return train_scores, test_scores

In [3]:

# regression
X, y = load_boston(return_X_y=True)
clf = DecisionTreeRegressor(random_state=0)
ans1 = validation_curve(clf, X, y, "max_depth", [2, 4, 6, 8, 10])
ans2 = validation_curve(clf, X, y, "max_depth", [2, 4, 6, 8, 10])
assert np.allclose(ans1[0], ans2[0])
assert np.allclose(ans1[1], ans2[1])

In [4]:

# classification
X, y = load_iris(return_X_y=True)
clf = DecisionTreeClassifier(random_state=0)
ans1 = validation_curve(clf, X, y, "max_depth", [1, 2, 3, 4, 5])
ans2 = validation_curve(clf, X, y, "max_depth", [1, 2, 3, 4, 5])
assert np.allclose(ans1[0], ans2[0])
assert np.allclose(ans1[1], ans2[1])