from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=0)
svm = SVC()
svm.fit(X_train, y_train)
print('The accuracy on the training subset: {:.3f}'.format(svm.score(X_train, y_train)))
print('The accuracy on the test subset: {:.3f}'.format(svm.score(X_test, y_test)))
The accuracy on the training subset: 1.000 The accuracy on the test subset: 0.629
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(X_train.min(axis=0), 'o', label='Min')
plt.plot(X_train.max(axis=0), 'v', label='Max')
plt.xlabel('Feature Index')
plt.ylabel('Feature Magnitude in Log Scale')
plt.yscale('log')
plt.legend(loc='upper right')
<matplotlib.legend.Legend at 0x234c9312da0>
min_train = X_train.min(axis=0)
range_train = (X_train - min_train).max(axis=0)
X_train_scaled = (X_train - min_train)/range_train
print('Minimum per feature\n{}'.format(X_train_scaled.min(axis=0)))
print('Maximum per feature\n{}'.format(X_train_scaled.max(axis=0)))
Minimum per feature [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] Maximum per feature [ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
X_test_scaled = (X_test - min_train)/range_train
svm = SVC()
svm.fit(X_train_scaled, y_train)
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape=None, degree=3, gamma='auto', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
print('The accuracy on the training subset: {:.3f}'.format(svm.score(X_train_scaled, y_train)))
print('The accuracy on the test subset: {:.3f}'.format(svm.score(X_test_scaled, y_test)))
The accuracy on the training subset: 0.948 The accuracy on the test subset: 0.951
svm = SVC(C=1000)
svm.fit(X_train_scaled, y_train)
print('The accuracy on the training subset: {:.3f}'.format(svm.score(X_train_scaled, y_train)))
print('The accuracy on the test subset: {:.3f}'.format(svm.score(X_test_scaled, y_test)))
The accuracy on the training subset: 0.988 The accuracy on the test subset: 0.972
print('The decision function is:\n\n{}'.format(svm.decision_function(X_test_scaled)[:20]))
The decision function is: [ -4.85978922 1.99579027 3.72514352 2.2759514 4.90872748 3.38738403 5.5791361 3.65809059 4.35247953 7.58641901 1.03844859 1.83665248 6.57010973 0.18802654 0.78505785 -3.91506363 3.36498378 -8.28312921 -7.22330097 -13.2443725 ]
print('Thresholded decision function:\n\n{}'.format(svm.decision_function(X_test_scaled)[:20]>0))
Thresholded decision function: [False True True True True True True True True True True True True True True False True False False False]
svm.classes_
array([0, 1])