import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
X, y = make_classification(n_samples=10000, n_features=10)
kf = KFold(n_splits=5,random_state=42,shuffle=True)
accuracies = []
for train_index, test_index in kf.split(X):
data_train = X[train_index]
target_train = y[train_index]
data_test = X[test_index]
target_test = y[test_index]
# if needed, do preprocessing here
clf = LogisticRegression()
clf.fit(data_train,target_train)
preds = clf.predict(data_test)
accuracy = accuracy_score(target_test,preds)
print('accuracy: {:.2f}'.format(accuracy))
accuracies.append(accuracy)
average_accuracy = np.mean(accuracies)
print('average accuracy (over all folds): {:.2f}'.format(average_accuracy))
accuracy: 0.92 accuracy: 0.92 accuracy: 0.91 accuracy: 0.92 accuracy: 0.91 average accuracy (over all folds): 0.92