Usage examples of VbMfa for dimensionality reduction, including image compression and clustering.
import numpy as np
import sys
import sklearn.datasets as ds
import matplotlib.pyplot as plt
import matplotlib.cm as plt_cm
import matplotlib.colors as plt_colt
import matplotlib.patches as plt_patches
import seaborn as sns
import pandas as pd
%matplotlib inline
%load_ext autoreload
%autoreload 2
import vbmfa.fa
import vbmfa.mfa
The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload
sns.set_style('darkgrid')
def plot_scatter(x, classes, ax=None):
ax = plt.gca() if ax is None else ax
cmap = plt_cm.jet
norm = plt_col.Normalize(vmin=np.min(classes), vmax=np.max(classes))
mapper = plt_cm.ScalarMappable(cmap=cmap, norm=norm)
colors = mapper.to_rgba(classes)
ax.scatter(x[0, :], x[1, :], color=colors, s=20)
def plot_mse(mse):
fig, ax = plt.subplots(figsize=(10, 4))
ax.plot(mse, linewidth=2, marker='s',markersize=5, markerfacecolor='red')
ax.set_xlabel('Iteration')
ax.set_ylabel('MSE')
def plot_grid(n, ncols=4, size=(5, 5)):
nrows = int(np.ceil(n/float(ncols)))
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(size[0]*ncols, size[1]*nrows))
ax = ax.ravel()
return [fig, ax]
def plot_grid(n, ncols=4, size=(5, 5)):
nrows = int(np.ceil(n/float(ncols)))
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(size[0]*ncols, size[1]*nrows))
ax = ax.ravel()
return [fig, ax]
def plot_compress(q, n=30):
np.random.seed(0)
fa = vbfa.VbFa(data_y, q)
fa.fit()
y = fa.x_to_y()
fig, ax = plot_grid(n, ncols=10)
dim = int(np.sqrt(fa.P))
for i in range(n):
ax[i].matshow(y[:, i].reshape(dim, dim), cmap='binary')
def plot_images(images, n=30, size=2):
fig, ax = plot_grid(n, ncols=10, size=(size, size))
dim = int(np.sqrt(images.shape[0]))
with sns.axes_style('white'):
for i in range(n):
ax[i].grid()
ax[i].set_axis_off()
ax[i].matshow(images[:, i].reshape(dim, dim), cmap='binary')
def cluster_labels(q_s, labels):
clust = q_s.argmax(0)
clust_t = pd.crosstab(clust, labels)
clust_label = np.array(clust_t).argmax(1)
return np.array([clust_label[x] for x in clust])
digits = ds.load_digits()
data_y = digits.data.transpose()
data_t = digits.target
plot_images(data_y)
np.random.seed(0)
mfa = vbmfa.mfa.VbMfa(data_y, 16, 1)
mfa.fit()
print('MSE: {:.3f}'.format(mfa.mse()))
plot_images(mfa.x_to_y())
MSE: 573.435
np.random.seed(0)
mfa = vbmfa.mfa.VbMfa(data_y, 16, 2)
mfa.fit()
print('MSE: {:.3f}'.format(mfa.mse()))
plot_images(mfa.x_to_y())
MSE: 519.046
np.random.seed(0)
mfa = vbmfa.mfa.VbMfa(data_y, 16, 10)
mfa.fit()
print('MSE: {:.3f}'.format(mfa.mse()))
plot_images(mfa.x_to_y())
MSE: 470.377
c_labels = cluster_labels(mfa.q_s, data_t)
print(pd.crosstab(c_labels, data_t))
col_0 0 1 2 3 4 5 6 7 8 9 row_0 0 122 2 9 4 1 9 6 0 23 33 1 13 129 10 9 9 3 4 33 83 19 2 4 28 143 3 0 2 1 1 9 0 3 16 11 4 83 3 10 0 2 5 44 4 3 0 0 18 122 11 5 5 1 1 5 4 4 1 16 9 113 7 0 9 17 6 4 5 9 14 19 3 152 1 14 3 7 2 3 0 6 17 9 2 137 5 4 9 10 0 1 30 1 22 4 0 25 59
Not too bad: most elements lie on the diagonal.
show = [0, 1, 2, 3, 4, 5]
num = 10
fig, ax = plot_grid(len(show) * num, ncols=num, size=(2, 2))
y = mfa.x_to_y()
with sns.axes_style('whitegrid'):
for i in range(len(show)):
y_i = y[:, c_labels == show[i]]
for j in range(num):
k = int(i * 10 + j)
ax[k].matshow(y_i[:, j].reshape(8, 8), cmap='binary')
ax[k].set_axis_off()
ax[k].grid()
High-dimensional (64x64) images of faces from different perspectives. Contains in total 10 images for 40 different persons.
faces = ds.fetch_olivetti_faces()
num_samples = 5 # Only use the first n persons
images_per_sample = 10
n = num_samples * images_per_sample
data_y = faces.data.transpose()[:, :n]
data_t = faces.target[:n]
plot_images(data_y, 50)
np.random.seed(0)
mfa = vbmfa.mfa.VbMfa(data_y, 64, 1)
mfa.fit(verbose=True)
print('MSE: {:.3f}'.format(mfa.mse()))
plot_images(mfa.x_to_y(), 50)
1: 43.587 2: 45.900 MSE: 45.900
np.random.seed(0)
mfa = vbmfa.mfa.VbMfa(data_y, 64, 5)
mfa.fit_highdim()
plot_images(mfa.x_to_y(), 50)
c_labels = cluster_labels(mfa.q_s, data_t)
print(pd.crosstab(c_labels, data_t))
col_0 0 1 2 3 4 row_0 0 4 0 0 0 0 1 0 10 0 1 0 2 0 0 2 1 0 3 3 0 6 7 5 4 3 0 2 1 5