import numpy as np
from math import ceil, floor
from sklearn.datasets import load_boston
from sklearn.model_selection import ShuffleSplit as skShuffleSplit
class ShuffleSplit():
def __init__(self, n_splits=10,
train_size=0.9, test_size=0.1, random_state=0):
self.n_splits = n_splits
self.train_size = train_size
self.test_size = test_size
self.random_state = random_state
def split(self, X, y):
n_train = floor(self.train_size * X.shape[0])
n_test = ceil(self.test_size * X.shape[0])
rng = np.random.RandomState(self.random_state)
for _ in range(self.n_splits):
permutation = rng.permutation(X.shape[0])
yield (permutation[n_test:(n_test + n_train)],
permutation[:n_test])
X, y = load_boston(return_X_y=True)
cv1 = ShuffleSplit(n_splits=5, random_state=0)
cv2 = skShuffleSplit(n_splits=5, random_state=0)
for (train1, test1), (train2, test2) in zip(cv1.split(X, y), cv2.split(X, y)):
assert np.array_equal(train1, train2)
assert np.array_equal(test1, test2)
X, y = load_boston(return_X_y=True)
cv1 = ShuffleSplit(n_splits=5, train_size=0.5, test_size=0.2, random_state=0)
cv2 = skShuffleSplit(n_splits=5, train_size=0.5, test_size=0.2, random_state=0)
for (train1, test1), (train2, test2) in zip(cv1.split(X, y), cv2.split(X, y)):
assert np.array_equal(train1, train2)
assert np.array_equal(test1, test2)