Notebook

In [1]:

import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import TimeSeriesSplit as skTimeSeriesSplit

In [2]:

class TimeSeriesSplit():
    def __init__(self, n_splits=5):
        self.n_splits = n_splits

    def split(self, X, y):
        indices = np.arange(X.shape[0])
        n_folds = self.n_splits + 1
        test_size = X.shape[0] // n_folds
        test_starts = np.arange(test_size + X.shape[0] % n_folds,
                                X.shape[0], test_size)
        for test_start in test_starts:
            yield (indices[:test_start],
                   indices[test_start:test_start + test_size])

In [3]:

X, y = load_boston(return_X_y=True)
cv1 = TimeSeriesSplit(n_splits=5)
cv2 = skTimeSeriesSplit(n_splits=5)
for (train1, test1), (train2, test2) in zip(cv1.split(X, y), cv2.split(X, y)):
    assert np.array_equal(train1, train2)
    assert np.array_equal(test1, test2)