from sklearn.datasets import load_iris
from sklearn.cross_validation import train_test_split
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
from sklearn.svm import SVC
clf = SVC()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(X)
X_pca = pca.transform(X)
import numpy as np
np.set_printoptions(precision=2)
from sklearn.cross_validation import cross_val_score, StratifiedKFold
scores = cross_val_score(SVC(), X_train, y_train, cv=5)
print(scores)
[ 0.92 1. 0.96 1. 1. ]
from sklearn.cross_validation import ShuffleSplit
cv_ss = ShuffleSplit(len(X_train))
scores_shuffle_split = cross_val_score(SVC(), X_train, y_train, cv=cv_ss)
print(scores_shuffle_split)
[ 1. 1. 1. 1. 1. 0.83 1. 0.92 1. 0.92]
from sklearn.cross_validation import LeaveOneLabelOut
labels = np.arange(len(X_train)) % 3
cv_label = LeaveOneLabelOut(labels)
scores_pout = cross_val_score(SVC(), X_train, y_train, cv=cv_label)
import numpy as np
from sklearn.grid_search import GridSearchCV
param_grid = {'C': 10. ** np.arange(-3, 3), 'gamma': 10. ** np.arange(-3, 3)}
grid = GridSearchCV(SVC(), param_grid=param_grid)
grid.fit(X_train, y_train)
print(grid.best_params_)
print(grid.score(X_test, y_test))
{'C': 100.0, 'gamma': 0.01} 1.0
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
pipe = make_pipeline(StandardScaler(), SVC())
pipe.fit(X_train, y_train)
pipe.predict(X_test)
array([0, 0, 1, 2, 0, 2, 0, 1, 0, 2, 2, 1, 2, 2, 0, 2, 1, 2, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1])