%matplotlib inline import numpy as np from matplotlib import pyplot as plt from sklearn import datasets faces = datasets.fetch_olivetti_faces() faces.data.shape fig = plt.figure(figsize=(8, 6)) # plot several images for i in range(15): ax = fig.add_subplot(3, 5, i + 1, xticks=[], yticks=[]) ax.imshow(faces.images[i], cmap=plt.cm.bone) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(faces.data, faces.target, random_state=0) print(X_train.shape, X_test.shape) from sklearn import decomposition pca = decomposition.RandomizedPCA(n_components=150, whiten=True) pca.fit(X_train) plt.imshow(pca.mean_.reshape(faces.images[0].shape), cmap=plt.cm.bone) print(pca.components_.shape) fig = plt.figure(figsize=(16, 6)) for i in range(30): ax = fig.add_subplot(3, 10, i + 1, xticks=[], yticks=[]) ax.imshow(pca.components_[i].reshape(faces.images[0].shape), cmap=plt.cm.bone) X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) print(X_train_pca.shape) print(X_test_pca.shape) from sklearn import svm clf = svm.SVC(C=5., gamma=0.001) clf.fit(X_train_pca, y_train) fig = plt.figure(figsize=(8, 6)) for i in range(15): ax = fig.add_subplot(3, 5, i + 1, xticks=[], yticks=[]) ax.imshow(X_test[i].reshape(faces.images[0].shape), cmap=plt.cm.bone) y_pred = clf.predict(X_test_pca[i])[0] color = ('black' if y_pred == y_test[i] else 'red') ax.set_title(faces.target[y_pred], fontsize='small', color=color) from sklearn import metrics y_pred = clf.predict(X_test_pca) print(metrics.classification_report(y_test, y_pred)) print(metrics.confusion_matrix(y_test, y_pred)) print(metrics.f1_score(y_test, y_pred)) from sklearn.pipeline import Pipeline clf = Pipeline([('pca', decomposition.RandomizedPCA(n_components=150, whiten=True)), ('svm', svm.LinearSVC(C=1.0))]) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print(metrics.confusion_matrix(y_pred, y_test))