import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import cross_val_score
# set up the regression problem
X_reg, y_reg = load_diabetes(return_X_y=True)
reg = LinearRegression()
# set up the classification problem
df = pd.read_csv('http://bit.ly/kaggletrain')
X_clf = df[['Pclass', 'Fare', 'SibSp']]
y_clf = df['Survived']
clf = LogisticRegression()
from sklearn.model_selection import KFold, StratifiedKFold
kf = KFold(5, shuffle=True, random_state=1)
cross_val_score(reg, X_reg, y_reg, cv=kf, scoring='r2')
array([0.43843604, 0.38982527, 0.52792606, 0.47359858, 0.57449343])
skf = StratifiedKFold(5, shuffle=True, random_state=1)
cross_val_score(clf, X_clf, y_clf, cv=skf, scoring='accuracy')
array([0.65363128, 0.7247191 , 0.66853933, 0.68539326, 0.65730337])
© 2020 Data School. All rights reserved.