import numpy as np
from itertools import product
from sklearn.base import clone
from sklearn.datasets import load_boston, load_iris
from sklearn.svm import SVC, SVR
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import GridSearchCV as skGridSearchCV
class GridSearchCV():
def __init__(self, estimator, param_grid):
self.estimator = estimator
self.param_grid = param_grid
def generate_grid(self):
items = sorted(self.param_grid.items())
keys, values = zip(*items)
for v in product(*values):
params = dict(zip(keys, v))
yield params
def fit(self, X, y):
if self.estimator._estimator_type == "regressor":
cv = KFold()
else: # estimator._estimator_type == "classifier"
cv = StratifiedKFold()
train_scores, test_scores = [], []
params = []
for i, cur_param in enumerate(self.generate_grid()):
cur_train_score, cur_test_score = [], []
for j, (train, test) in enumerate(cv.split(X, y)):
est = clone(self.estimator)
est.set_params(**cur_param)
est.fit(X[train], y[train])
cur_train_score.append(est.score(X[train], y[train]))
cur_test_score.append(est.score(X[test], y[test]))
params.append(cur_param)
train_scores.append(cur_train_score)
test_scores.append(cur_test_score)
train_scores = np.array(train_scores)
test_scores = np.array(test_scores)
cv_results = {}
for i in range(cv.n_splits):
cv_results["split" + str(i) + "_train_score"] = train_scores[:, i]
cv_results["split" + str(i) + "_test_score"] = test_scores[:, i]
cv_results["mean_train_score"] = np.mean(train_scores, axis=1)
cv_results["std_train_score"] = np.std(train_scores, axis=1)
cv_results["mean_test_score"] = np.mean(test_scores, axis=1)
cv_results["std_test_score"] = np.std(test_scores, axis=1)
cv_results['params'] = params
self.cv_results_ = cv_results
self.best_params_ = cv_results['params'][np.argmax(cv_results['mean_test_score'])]
self.best_estimator_ = clone(self.estimator)
self.best_estimator_.set_params(**self.best_params_)
self.best_estimator_.fit(X, y)
return self
def decision_function(self, X):
return self.best_estimator_.decision_function(X)
def predict(self, X):
return self.best_estimator_.predict(X)
# regressor
X, y = load_boston(return_X_y=True)
param_grid = {"C":[0.1, 1, 10], "gamma":[0.1, 1, 10]}
clf1 = GridSearchCV(SVR(), param_grid).fit(X, y)
clf2 = skGridSearchCV(SVR(), param_grid, return_train_score=True).fit(X, y)
for i in range(5):
assert np.allclose(clf1.cv_results_["split" + str(i) + "_train_score"],
clf2.cv_results_["split" + str(i) + "_train_score"])
assert np.allclose(clf1.cv_results_["split" + str(i) + "_test_score"],
clf2.cv_results_["split" + str(i) + "_test_score"])
assert np.allclose(clf1.cv_results_["mean_train_score"], clf2.cv_results_["mean_train_score"])
assert np.allclose(clf1.cv_results_["std_train_score"], clf2.cv_results_["std_train_score"])
assert np.allclose(clf1.cv_results_["mean_test_score"], clf2.cv_results_["mean_test_score"])
assert np.allclose(clf1.cv_results_["std_test_score"], clf2.cv_results_["std_test_score"])
assert np.allclose(clf1.best_params_["C"], clf2.best_params_["C"])
assert np.allclose(clf1.best_params_["gamma"], clf2.best_params_["gamma"])
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.allclose(pred1, pred2)
# classification
X, y = load_iris(return_X_y=True)
param_grid = {"C":[0.1, 1, 10], "gamma":[0.1, 1, 10]}
clf1 = GridSearchCV(SVC(random_state=0), param_grid).fit(X, y)
clf2 = skGridSearchCV(SVC(random_state=0), param_grid, return_train_score=True).fit(X, y)
for i in range(5):
assert np.allclose(clf1.cv_results_["split" + str(i) + "_train_score"],
clf2.cv_results_["split" + str(i) + "_train_score"])
assert np.allclose(clf1.cv_results_["split" + str(i) + "_test_score"],
clf2.cv_results_["split" + str(i) + "_test_score"])
assert np.allclose(clf1.cv_results_["mean_train_score"], clf2.cv_results_["mean_train_score"])
assert np.allclose(clf1.cv_results_["std_train_score"], clf2.cv_results_["std_train_score"])
assert np.allclose(clf1.cv_results_["mean_test_score"], clf2.cv_results_["mean_test_score"])
assert np.allclose(clf1.cv_results_["std_test_score"], clf2.cv_results_["std_test_score"])
assert np.allclose(clf1.best_params_["C"], clf2.best_params_["C"])
assert np.allclose(clf1.best_params_["gamma"], clf2.best_params_["gamma"])
prob1 = clf1.decision_function(X)
prob2 = clf2.decision_function(X)
assert np.allclose(prob1, prob2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)