import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import TimeSeriesSplit as skTimeSeriesSplit
class TimeSeriesSplit():
def __init__(self, n_splits=5):
self.n_splits = n_splits
def split(self, X, y):
indices = np.arange(X.shape[0])
n_folds = self.n_splits + 1
test_size = X.shape[0] // n_folds
test_starts = np.arange(test_size + X.shape[0] % n_folds,
X.shape[0], test_size)
for test_start in test_starts:
yield (indices[:test_start],
indices[test_start:test_start + test_size])
X, y = load_boston(return_X_y=True)
cv1 = TimeSeriesSplit(n_splits=5)
cv2 = skTimeSeriesSplit(n_splits=5)
for (train1, test1), (train2, test2) in zip(cv1.split(X, y), cv2.split(X, y)):
assert np.array_equal(train1, train2)
assert np.array_equal(test1, test2)