Using scikit-learn Spectral clustering.
import numpy as np
import pylab as pl
Small utility function to display a gallery of images:
def plot_images(images):
pl.gray()
pl.figure()
for i, img in enumerate(images[:25]):
pl.subplot(5, 5, i)
pl.imshow(img, interpolation="nearest")
pl.xticks(())
pl.yticks(())
Lest load the digits dataset that comes with scikit learn (as a CSV file with gray level pixel values. Let's shuffle the dataset to make shure that the algorithm cannot exploit any ordering information.
from sklearn import datasets
from sklearn.utils import shuffle
digits = datasets.load_digits()
images, data, target = shuffle(
digits.images, digits.data, digits.target)
plot_images(images)
from sklearn import cluster, neighbors
n_clusters = 10
S = neighbors.kneighbors_graph(data, 10)
sc = cluster.SpectralClustering(n_clusters, mode='arpack', n_init=50)
sc.fit(S)
sc.labels_
for i in range(n_clusters):
plot_images(images[sc.labels_ == i])
The following will runt the cProfile
tool from the Python stdlib and display the output in a paged, tiled panel.
%prun cluster.SpectralClustering(10, mode='arpack').fit(S)
from sklearn import svm, metrics
X_train, y_train, X_test, y_test = data[:500], target[:500], data[500:], target[500:]
clf = svm.SVC(gamma=0.001).fit(X_train, y_train)
print metrics.classification_report(y_test, clf.predict(X_test))
cm = metrics.confusion_matrix(target[500:], clf.predict(data[500:]))
print cm
pl.imshow(cm)