이 노트북의 코드에 대한 설명은 다중 평가 지표: cross_validate() 글을 참고하세요.
import pandas as pd
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, cross_val_score
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(
digits.data, digits.target == 9, random_state=42)
from sklearn.svm import SVC
cross_val_score(SVC(gamma='auto'), X_train, y_train, cv=3)
array([0.90200445, 0.90200445, 0.90200445])
cross_val_score(SVC(gamma='auto'), X_train, y_train, scoring='accuracy', cv=3)
array([0.90200445, 0.90200445, 0.90200445])
from sklearn.model_selection import cross_validate
cross_validate(SVC(gamma='auto'), X_train, y_train,
scoring=['accuracy', 'roc_auc'],
return_train_score=True, cv=3)
{'fit_time': array([0.03770995, 0.03589416, 0.03686881]), 'score_time': array([0.12240219, 0.11768389, 0.11690235]), 'test_accuracy': array([0.90200445, 0.90200445, 0.90200445]), 'train_accuracy': array([1., 1., 1.]), 'test_roc_auc': array([0.99657688, 0.99814815, 0.99943883]), 'train_roc_auc': array([1., 1., 1.])}
cross_validate(SVC(gamma='auto'), X_train, y_train,
scoring=['accuracy'], cv=3,
return_train_score=False)['test_accuracy']
array([0.90200445, 0.90200445, 0.90200445])
cross_validate(SVC(gamma='auto'), X_train, y_train,
scoring={'acc':'accuracy', 'ra':'roc_auc'},
return_train_score=False, cv=3)
{'fit_time': array([0.03640604, 0.03584003, 0.03449273]), 'score_time': array([0.11128712, 0.10693693, 0.11939406]), 'test_acc': array([0.90200445, 0.90200445, 0.90200445]), 'test_ra': array([0.99657688, 0.99814815, 0.99943883])}
from sklearn.model_selection import GridSearchCV
param_grid = {'gamma': [0.0001, 0.01, 0.1, 1, 10]}
grid = GridSearchCV(SVC(), param_grid=param_grid,
scoring=['accuracy'], refit='accuracy',
return_train_score=True, cv=3)
grid.fit(X_train, y_train)
GridSearchCV(cv=3, estimator=SVC(), param_grid={'gamma': [0.0001, 0.01, 0.1, 1, 10]}, refit='accuracy', return_train_score=True, scoring=['accuracy'])
grid.best_params_
{'gamma': 0.0001}
grid.best_score_
0.9651076466221232
np.transpose(pd.DataFrame(grid.cv_results_))
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
mean_fit_time | 0.006796 | 0.033728 | 0.036865 | 0.029152 | 0.028625 |
std_fit_time | 0.000076 | 0.000763 | 0.000255 | 0.000303 | 0.000275 |
mean_score_time | 0.012703 | 0.059657 | 0.060291 | 0.054508 | 0.054504 |
std_score_time | 0.001003 | 0.000545 | 0.000915 | 0.003451 | 0.001073 |
param_gamma | 0.0001 | 0.01 | 0.1 | 1 | 10 |
params | {'gamma': 0.0001} | {'gamma': 0.01} | {'gamma': 0.1} | {'gamma': 1} | {'gamma': 10} |
split0_test_accuracy | 0.966592 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
split1_test_accuracy | 0.96882 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
split2_test_accuracy | 0.959911 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
mean_test_accuracy | 0.965108 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
std_test_accuracy | 0.003785 | 0.0 | 0.0 | 0.0 | 0.0 |
rank_test_accuracy | 1 | 2 | 2 | 2 | 2 |
split0_train_accuracy | 0.975501 | 1.0 | 1.0 | 1.0 | 1.0 |
split1_train_accuracy | 0.962138 | 1.0 | 1.0 | 1.0 | 1.0 |
split2_train_accuracy | 0.974388 | 1.0 | 1.0 | 1.0 | 1.0 |
mean_train_accuracy | 0.970676 | 1.0 | 1.0 | 1.0 | 1.0 |
std_train_accuracy | 0.006054 | 0.0 | 0.0 | 0.0 | 0.0 |
grid = GridSearchCV(SVC(), param_grid=param_grid,
scoring={'acc':'accuracy', 'ra':'roc_auc'}, refit='ra',
return_train_score=True, cv=3)
grid.fit(X_train, y_train)
GridSearchCV(cv=3, estimator=SVC(), param_grid={'gamma': [0.0001, 0.01, 0.1, 1, 10]}, refit='ra', return_train_score=True, scoring={'acc': 'accuracy', 'ra': 'roc_auc'})
grid.best_params_
{'gamma': 0.01}
grid.best_score_
0.9983352038907594
np.transpose(pd.DataFrame(grid.cv_results_))
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
mean_fit_time | 0.006864 | 0.034273 | 0.037054 | 0.030654 | 0.028933 |
std_fit_time | 0.000172 | 0.000251 | 0.000493 | 0.000555 | 0.000616 |
mean_score_time | 0.024493 | 0.114614 | 0.121366 | 0.11359 | 0.107006 |
std_score_time | 0.002559 | 0.001332 | 0.003262 | 0.003361 | 0.00633 |
param_gamma | 0.0001 | 0.01 | 0.1 | 1 | 10 |
params | {'gamma': 0.0001} | {'gamma': 0.01} | {'gamma': 0.1} | {'gamma': 1} | {'gamma': 10} |
split0_test_acc | 0.966592 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
split1_test_acc | 0.96882 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
split2_test_acc | 0.959911 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
mean_test_acc | 0.965108 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
std_test_acc | 0.003785 | 0.0 | 0.0 | 0.0 | 0.0 |
rank_test_acc | 1 | 2 | 2 | 2 | 2 |
split0_train_acc | 0.975501 | 1.0 | 1.0 | 1.0 | 1.0 |
split1_train_acc | 0.962138 | 1.0 | 1.0 | 1.0 | 1.0 |
split2_train_acc | 0.974388 | 1.0 | 1.0 | 1.0 | 1.0 |
mean_train_acc | 0.970676 | 1.0 | 1.0 | 1.0 | 1.0 |
std_train_acc | 0.006054 | 0.0 | 0.0 | 0.0 | 0.0 |
split0_test_ra | 0.98367 | 0.997419 | 0.934007 | 0.5 | 0.5 |
split1_test_ra | 0.987149 | 0.998148 | 0.912458 | 0.5 | 0.5 |
split2_test_ra | 0.994388 | 0.999439 | 0.910494 | 0.5 | 0.5 |
mean_test_ra | 0.988403 | 0.998335 | 0.918986 | 0.5 | 0.5 |
std_test_ra | 0.004465 | 0.000835 | 0.010651 | 0.0 | 0.0 |
rank_test_ra | 2 | 1 | 3 | 4 | 4 |
split0_train_ra | 0.992017 | 1.0 | 1.0 | 1.0 | 1.0 |
split1_train_ra | 0.994935 | 1.0 | 1.0 | 1.0 | 1.0 |
split2_train_ra | 0.98945 | 1.0 | 1.0 | 1.0 | 1.0 |
mean_train_ra | 0.992134 | 1.0 | 1.0 | 1.0 | 1.0 |
std_train_ra | 0.002241 | 0.0 | 0.0 | 0.0 | 0.0 |
grid.best_estimator_
SVC(gamma=0.01)