%pylab inline # Display given faces in a grid def show_patches(ims, grid_size=(16, 6)): fig = plt.figure(figsize=grid_size) for i in range(min(len(ims), 30)): ax = fig.add_subplot(3, 10, i + 1, xticks=[], yticks=[]) ax.imshow(ims[i].reshape((8, 8)), cmap=plt.cm.bone, interpolation='nearest') import sklearn from sklearn.pipeline import Pipeline from sklearn import svm # Load data import cPickle def load_data(file): with open(file, 'rb') as f: data = cPickle.load(f) return data data = load_data("cifar-10-batches-py/data_batch_1") images = data['data'] # Reshape to go from length 3072 vector to 32x32 rgb images # order='F' deals with specifics of how the data is laid out images = images.reshape((-1, 32, 32, 3), order='F') labels = np.array(data['labels']) def get_classes(classes=[0, 1, 2], per_class=100): # Array of indices i where labels[i] is in classes indices = np.concatenate([np.where(labels == c)[0][:per_class] for c in classes]) return images[indices], labels[indices] # For speed, let's consider only 2 classes, 100 images per class for now classes = [0,1,2,3] X, Y = get_classes(classes, 100) # That's cars, cats, and birds in this case for c in classes: plt.imshow(images[labels == c][0], interpolation='nearest') plt.show() from sklearn.cross_validation import KFold from sklearn.metrics import accuracy_score def score(clf, X, Y, folds=2, verbose=False): predictions = np.zeros(len(Y)) for i, (train, test) in enumerate(KFold(len(X), n_folds=folds, shuffle=True)): clf.fit(X[train], Y[train]) predictions[test] = clf.predict(X[test]) if verbose: print("Fold {}: {}".format(i + 1, accuracy_score(Y[test], predictions[test]))) return accuracy_score(Y, predictions) from sklearn.feature_extraction import image def rgb2gray(rgb): return np.dot(rgb[...,:3], [0.299, 0.587, 0.144]) # Used to whiten patches - reduces variance, helps with classification from sklearn.decomposition import RandomizedPCA class PatchExtractor(sklearn.base.BaseEstimator): def __init__(self, patch_size=(8, 8)): self.patch_size = patch_size def _extract_patches(self, x): """ Extracts patches from given H x W image """ # Extract patches in a grid, reshape to proper size # Details not really important - written this way for speed return image.extract_patches(x, self.patch_size, self.patch_size[0]).reshape((-1, self.patch_size[0] * self.patch_size[1])) def fit(self, X, Y=None): patches = np.concatenate([self._extract_patches(rgb2gray(x)) for x in X]) print(patches.shape) #self.pca = RandomizedPCA(whiten=True) #self.pca.fit(patches) return self def transform(self, X, Y=None): return np.array([self._extract_patches(rgb2gray(x)) for x in X]) #return np.array([self.pca.transform(self._extract_patches(rgb2gray(x))) for x in X]) from sklearn.cluster import KMeans class Codebook(sklearn.base.BaseEstimator): def __init__(self, size=10): self.size = size self.clusterer = KMeans(n_clusters=size) def _get_histogram(self, x): """ Returns histogram of codewords for given features """ # Alternative method: return distance of each patch to cluster centers # return self.clusterer.transform(x).ravel() # Assign each patch to a cluster clusters = self.clusterer.predict(x) # Get the number of each patch type return np.bincount(clusters, minlength=self.size) def fit(self, X, Y=None): # print("Fitting clusterer") self.clusterer.fit(np.concatenate(X)) return self def transform(self, X, Y=None): return np.array([self._get_histogram(x) for x in X]) X, Y = get_classes([1, 3, 5, 7, 9], 100) patcher = PatchExtractor(patch_size=(8, 8)) codebook = Codebook(size=50) clf = svm.SVC(kernel='linear') pipeline = Pipeline([("Patch_extractor", patcher), ("Codebook", codebook), ("svm", clf)]) score(pipeline, X, Y, 2, verbose=True) X, Y = get_classes([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 900) patcher = PatchExtractor(patch_size=(8, 8)) codebook = Codebook(size=50) clf = svm.SVC(kernel='linear') pipeline = Pipeline([("Patch_extractor", patcher), ("Codebook", codebook), ("svm", clf)]) score(pipeline, X, Y, 2, verbose=True) # You end up with edge like features show_patches(codebook.clusterer.cluster_centers_) class RGBFeature(sklearn.base.BaseEstimator): """ Maps an image to its RGB color averages """ def fit(self, X, Y=None): return self def transform(self, X, Y=None): # Get average of each color channel return np.array([[np.average(X[:, :, i]) for i in range(3)] for x in X]) X, Y = get_classes([1, 3, 5, 7, 9], 500) rgb = RGBFeature() clf = svm.SVC(kernel='linear') pipeline = Pipeline([("RGB Average", rgb), ("svm", clf)]) score(pipeline, X, Y, 5, verbose=True)