%matplotlib inline
import sys
sys.path.append('..')
from preamble import *
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import load_iris
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=42)
gbrt = GradientBoostingClassifier(learning_rate=0.01, random_state=0)
gbrt.fit(X_train, y_train)
GradientBoostingClassifier(criterion='friedman_mse', init=None, learning_rate=0.01, loss='deviance', max_depth=3, max_features=None, max_leaf_nodes=None, min_impurity_split=1e-07, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=100, presort='auto', random_state=0, subsample=1.0, verbose=0, warm_start=False)
print("결정 함수의 결과 형태: {}".format(gbrt.decision_function(X_test).shape))
print("결정 함수 결과: \n{}".format(gbrt.decision_function(X_test)[:6, :]))
결정 함수의 결과 형태: (38, 3) 결정 함수 결과: [[-0.529 1.466 -0.504] [ 1.512 -0.496 -0.503] [-0.524 -0.468 1.52 ] [-0.529 1.466 -0.504] [-0.531 1.282 0.215] [ 1.512 -0.496 -0.503]]
print("가장 큰 결정 함수의 인덱스: \n{}".format(np.argmax(gbrt.decision_function(X_test), axis=1)))
print("예측:\n{}".format(gbrt.predict(X_test)))
가장 큰 결정 함수의 인덱스: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1 0] 예측: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1 0]
print("예측 확률: \n{}".format(gbrt.predict_proba(X_test)[:6]))
print("합: {}".format(gbrt.predict_proba(X_test)[:6].sum(axis=1)))
예측 확률: [[ 0.107 0.784 0.109] [ 0.789 0.106 0.105] [ 0.102 0.108 0.789] [ 0.107 0.784 0.109] [ 0.108 0.663 0.228] [ 0.789 0.106 0.105]] 합: [ 1. 1. 1. 1. 1. 1.]
print("가장 큰 결정 함수의 인덱스: \n{}".format(np.argmax(gbrt.predict_proba(X_test), axis=1)))
print("예측:\n{}".format(gbrt.predict(X_test)))
가장 큰 결정 함수의 인덱스: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1 0] 예측: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1 0]
from sklearn import linear_model
logreg = linear_model.LogisticRegression()
named_target = iris.target_names[y_train]
logreg.fit(X_train, named_target)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, solver='liblinear', tol=0.0001, verbose=0, warm_start=False)
print("훈련 데이터에 있는 클래스 종류: {}".format(logreg.classes_))
print("예측: {}".format(logreg.predict(X_test)[:10]))
argmax_dec_func = np.argmax(logreg.decision_function(X_test), axis=1)
print("가장 큰 결정 함수의 인덱스: {}".format(argmax_dec_func[:10]))
print("인덱스를 classes_에 여")
훈련 데이터에 있는 클래스 종류: ['setosa' 'versicolor' 'virginica'] 예측: ['versicolor' 'setosa' 'virginica' 'versicolor' 'versicolor' 'setosa' 'versicolor' 'virginica' 'versicolor' 'versicolor'] 가장 큰 결정 함수의 인덱스: [1 0 2 1 1 0 1 2 1 1]