#!/usr/bin/env python # coding: utf-8 # 이 노트북의 코드에 대한 설명은 [반복 교차 검증](https://tensorflow.blog/2017/12/27/%EB%B0%98%EB%B3%B5-%EA%B5%90%EC%B0%A8-%EA%B2%80%EC%A6%9D/) 글을 참고하세요. # In[1]: get_ipython().run_line_magic('load_ext', 'watermark') get_ipython().run_line_magic('watermark', '-v -p sklearn,numpy,scipy') # In[2]: import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[3]: from sklearn.datasets import load_iris from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold from sklearn.linear_model import LogisticRegression # In[4]: iris = load_iris() logreg = LogisticRegression(solver='liblinear', multi_class='auto', max_iter=1000) # RepeatedKFold # In[5]: kfold = KFold(n_splits=5) scores = cross_val_score(logreg, iris.data, iris.target, cv=kfold) scores, scores.mean() # In[6]: from sklearn.model_selection import RepeatedKFold rkfold = RepeatedKFold(n_splits=5, n_repeats=5, random_state=42) scores = cross_val_score(logreg, iris.data, iris.target, cv=rkfold) scores, scores.mean() # In[7]: plt.boxplot(scores) plt.show() # RepeatedStratifiedKFold # In[8]: skfold = StratifiedKFold(n_splits=5) scores = cross_val_score(logreg, iris.data, iris.target, cv=skfold) scores, scores.mean() # In[9]: from sklearn.model_selection import RepeatedStratifiedKFold rskfold = RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=42) scores = cross_val_score(logreg, iris.data, iris.target, cv=rskfold) scores, scores.mean() # In[10]: plt.boxplot(scores) plt.show() # In[11]: from sklearn.model_selection import GridSearchCV, train_test_split X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=42) param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]} grid_search = GridSearchCV(logreg, param_grid, cv=rskfold, return_train_score=True, iid=False) grid_search.fit(X_train, y_train) # In[12]: grid_search.score(X_test, y_test), grid_search.best_params_, grid_search.best_score_ # In[13]: for k in grid_search.cv_results_: if 'split' in k: print(k, grid_search.cv_results_[k])