import numpy as np
from sklearn.base import clone
from sklearn.datasets import load_boston, load_iris
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import cross_val_predict as skcross_val_predict
def cross_val_predict(estimator, X, y, method="predict"):
if estimator._estimator_type == "regressor":
cv = KFold()
else: # estimator._estimator_type == "classifier"
cv = StratifiedKFold()
predictions = []
indices = []
for train, test in cv.split(X, y):
est = clone(estimator)
est.fit(X[train], y[train])
predictions.extend(getattr(est, method)(X[test]))
indices.extend(test)
inv_indices = np.empty(len(indices), dtype=np.int)
inv_indices[indices] = np.arange(len(indices))
return np.array(predictions)[inv_indices]
# regression
X, y = load_boston(return_X_y=True)
clf = RandomForestRegressor(random_state=0)
ans1 = cross_val_predict(clf, X, y)
ans2 = skcross_val_predict(clf, X, y)
assert np.allclose(ans1, ans2)
# classification
X, y = load_iris(return_X_y=True)
clf = RandomForestClassifier(random_state=0)
ans1 = cross_val_predict(clf, X, y)
ans2 = skcross_val_predict(clf, X, y)
assert np.array_equal(ans1, ans2)
ans1 = cross_val_predict(clf, X, y, method="predict_proba")
ans2 = skcross_val_predict(clf, X, y, method="predict_proba")
assert np.allclose(ans1, ans2)