import numpy as np
import pylab as pl
from sklearn.datasets import load_digits
from sklearn.neighbors import KernelDensity
from sklearn.decomposition import PCA
from sklearn.grid_search import GridSearchCV
%pylab inline --no-import-all
Populating the interactive namespace from numpy and matplotlib
digits = load_digits()
X, y = digits.data, digits.target
## whiten cannot be TRue, otherwise A LOT INFORMATION LOSS
## when inverse_transform it
pca = PCA(n_components=15, whiten=False)
pca_X = pca.fit_transform(X)
print pca_X.shape, y.shape
(1797, 15) (1797,)
## estimation based on PCA
kd = KernelDensity()
params = dict(bandwidth = np.logspace(-3, 3, 30))
gs = GridSearchCV(kd, params, cv = 3)
%time gs.fit(pca_X)
print gs.best_score_
print gs.best_params_
CPU times: user 6.12 s, sys: 4 ms, total: 6.13 s Wall time: 6.13 s -28073.5244634 {'bandwidth': 3.2903445623126677}
## sampled based on PCA
kd = gs.best_estimator_
#kd.fit(pca_X)
new_pca_data = kd.sample(25, random_state =0)
new_data = pca.inverse_transform(new_pca_data)
new_images = new_data.reshape((-1, 8, 8))
print new_images.shape
fig, axes = pl.subplots(nrows = 5, ncols = 5, figsize = (1 * 5, 1 * 5))
axes = axes.ravel()
fig.subplots_adjust(wspace = 0.1, hspace = 0.1)
for i, img in enumerate(new_images):
axes[i].imshow(img, cmap = pl.cm.gray)
axes[i].get_xaxis().set_visible(False)
axes[i].get_yaxis().set_visible(False)
(25, 8, 8)
## estimation based on raw images
kd = KernelDensity()
params = dict(bandwidth = np.logspace(-3, 3, 30))
gs = GridSearchCV(kd, params, cv = 3)
%time gs.fit(X)
print gs.best_score_
print gs.best_params_
CPU times: user 11.5 s, sys: 0 ns, total: 11.5 s Wall time: 11.5 s -88307.8552761 {'bandwidth': 2.0433597178569416}
## sampled based on PCA
kd = gs.best_estimator_
#kd.fit(pca_X)
new_data = kd.sample(25, random_state =0)
new_images = new_data.reshape((-1, 8, 8))
print new_images.shape
fig, axes = pl.subplots(nrows = 5, ncols = 5, figsize = (1 * 5, 1 * 5))
axes = axes.ravel()
fig.subplots_adjust(wspace = 0.1, hspace = 0.1)
for i, img in enumerate(new_images):
axes[i].imshow(img, cmap = pl.cm.gray)
axes[i].get_xaxis().set_visible(False)
axes[i].get_yaxis().set_visible(False)
(25, 8, 8)
## sampled based on PCA of ONE TYPE OF DIGITS
## estimation based on PCA
for digit in range(10):
kd = KernelDensity()
params = dict(bandwidth = np.logspace(-3, 3, 30))
gs = GridSearchCV(kd, params, cv = 3)
%time gs.fit(pca_X[y==digit])
print gs.best_score_
print gs.best_params_
kd = gs.best_estimator_
#kd.fit(pca_X)
new_pca_data = kd.sample(25, random_state =0)
new_data = pca.inverse_transform(new_pca_data)
new_images = new_data.reshape((-1, 8, 8))
print new_images.shape
fig, axes = pl.subplots(nrows = 5, ncols = 5, figsize = (1 * 5, 1 * 5))
axes = axes.ravel()
fig.subplots_adjust(wspace = 0.1, hspace = 0.1)
for i, img in enumerate(new_images):
axes[i].imshow(img, cmap = pl.cm.gray)
axes[i].get_xaxis().set_visible(False)
axes[i].get_yaxis().set_visible(False)
CPU times: user 200 ms, sys: 0 ns, total: 200 ms Wall time: 198 ms -2448.12988232 {'bandwidth': 3.2903445623126677} (25, 8, 8) CPU times: user 156 ms, sys: 0 ns, total: 156 ms Wall time: 155 ms -2765.30622637 {'bandwidth': 3.2903445623126677} (25, 8, 8) CPU times: user 152 ms, sys: 0 ns, total: 152 ms Wall time: 152 ms -2687.8595831 {'bandwidth': 3.2903445623126677} (25, 8, 8) CPU times: user 160 ms, sys: 0 ns, total: 160 ms Wall time: 158 ms -2804.41774729 {'bandwidth': 3.2903445623126677} (25, 8, 8) CPU times: user 156 ms, sys: 0 ns, total: 156 ms Wall time: 156 ms -2694.56145187 {'bandwidth': 3.2903445623126677} (25, 8, 8) CPU times: user 160 ms, sys: 0 ns, total: 160 ms Wall time: 158 ms -2698.40124215 {'bandwidth': 3.2903445623126677} (25, 8, 8) CPU times: user 156 ms, sys: 8 ms, total: 164 ms Wall time: 164 ms -2553.18987672 {'bandwidth': 3.2903445623126677} (25, 8, 8) CPU times: user 156 ms, sys: 0 ns, total: 156 ms Wall time: 156 ms -2674.16251989 {'bandwidth': 3.2903445623126677} (25, 8, 8) CPU times: user 152 ms, sys: 0 ns, total: 152 ms Wall time: 152 ms -2737.75685017 {'bandwidth': 5.2983169062837074} (25, 8, 8) CPU times: user 156 ms, sys: 0 ns, total: 156 ms Wall time: 156 ms -2778.99731487 {'bandwidth': 3.2903445623126677} (25, 8, 8)