#!/usr/bin/env python # coding: utf-8 # #### PCA and creepy guys # # # This is a small notebook intended to play around with the Olivetti dataset # In[1]: from sklearn.datasets import fetch_olivetti_faces from sklearn.decomposition import PCA import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: faces = fetch_olivetti_faces() print(faces.DESCR) # In[3]: # Here are the first ten guys of the dataset fig = plt.figure(figsize=(10, 10)) for i in range(10): ax = plt.subplot2grid((1, 10), (0, i)) ax.imshow(faces.data[i * 10].reshape(64, 64), cmap=plt.cm.gray) ax.axis('off') # In[4]: # Let's compute the PCA pca = PCA() pca.fit(faces.data) # In[5]: # Now, the creepy guys are in the components_ attribute. # Here are the first ten ones: fig = plt.figure(figsize=(10, 10)) for i in range(10): ax = plt.subplot2grid((1, 10), (0, i)) ax.imshow(pca.components_[i].reshape(64, 64), cmap=plt.cm.gray) ax.axis('off') # In[7]: # Reconstruction process from skimage.io import imsave face = faces.data[0] # we will reconstruct the first face # During the reconstruction process we are actually computing, at the kth frame, # a rank k approximation of the face. To get a rank k approximation of a face, # we need to first transform it into the 'latent space', and then # transform it back to the original space # Step 1: transform the face into the latent space. # It's now a vector with 400 components. The kth component gives the importance # of the kth creepy guy trans = pca.transform(face.reshape(1, -1)) # Reshape for scikit learn # Step 2: reconstruction. To build the kth frame, we use all the creepy guys # up until the kth one. # Warning: this will save 400 png images. for k in range(400): rank_k_approx = trans[:, :k].dot(pca.components_[:k]) + pca.mean_ imsave('{:>03}'.format(str(k)) + '.jpg', rank_k_approx.reshape(64, 64))