# Variational Bayesian Factor Analysis¶

Usage examples of VbFa for dimensionality reduction, including visualization and image compression.

## Import modules¶

In [1]:
import numpy as np

import sklearn.datasets as ds
from sklearn.decomposition import ProbabilisticPCA as PPCA
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
import matplotlib.cm as plt_cm
import matplotlib.colors as plt_col
import matplotlib.patches as plt_patches
import seaborn as sns

%matplotlib inline

import vbmfa.fa as vbfa
In [2]:
sns.set_style('darkgrid')

## Helper functions¶

In [3]:
def plot_scatter(x, classes, ax=None):
ax = plt.gca() if ax is None else ax
cmap = plt_cm.jet
norm = plt_col.Normalize(vmin=np.min(classes), vmax=np.max(classes))
mapper = plt_cm.ScalarMappable(cmap=cmap, norm=norm)
colors = mapper.to_rgba(classes)
ax.scatter(x[0, :], x[1, :], color=colors, s=20)

def plot_mse(mse):
fig, ax = plt.subplots(figsize=(10, 4))
ax.plot(mse, linewidth=2, marker='s',markersize=5, markerfacecolor='red')
ax.set_xlabel('Iteration')
ax.set_ylabel('MSE')

def plot_grid(n, ncols=4, size=(5, 5)):
nrows = int(np.ceil(n/float(ncols)))
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(size[0]*ncols, size[1]*nrows))
ax = ax.ravel()
return [fig, ax]

def plot_grid(n, ncols=4, size=(5, 5)):
nrows = int(np.ceil(n/float(ncols)))
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(size[0]*ncols, size[1]*nrows))
ax = ax.ravel()
return [fig, ax]

def plot_compress(q, n=30):
np.random.seed(0)
fa = vbfa.VbFa(data_y, q)
fa.fit()
y = fa.x_to_y()
fig, ax = plot_grid(n, ncols=10)
dim = int(np.sqrt(fa.P))
for i in range(n):
ax[i].matshow(y[:, i].reshape(dim, dim), cmap='binary')

def plot_images(images, n=30, size=2):
fig, ax = plot_grid(n, ncols=10, size=(size, size))
dim = int(np.sqrt(images.shape[0]))
with sns.axes_style('white'):
for i in range(n):
ax[i].grid()
ax[i].set_axis_off()
ax[i].matshow(images[:, i].reshape(dim, dim), cmap='binary')

def plot_ve(ve):
x = np.arange(len(ve))
fig, ax = plt.subplots(figsize=(5, 4))
ax.set_xlabel('Factor')
ax.set_ylabel('% Variance explained')
ax.bar(x, ve)

# Digits¶

In [4]:
data_y = digits.data.transpose()
data_t = digits.target

## Visualization¶

### PCA¶

In [5]:
pca = PCA(n_components=2)
pca_x = pca.fit_transform(data_y.transpose())
plot_scatter(pca_x.transpose(), data_t)
print('MSE: {:.3f}'.format(np.linalg.norm(data_y.transpose() - pca.inverse_transform(pca_x))))
MSE: 1242.386

### VBFA¶

In [6]:
np.random.seed(0)
fa = vbfa.VbFa(data_y, 2)
mse = [fa.mse()]
maxit = 7
fig, ax = plot_grid(maxit + 1)
plot_scatter(fa.q_x.mean, data_t, ax[0])
fa.init()
for i in range(maxit):
fa.update()
j = i + 1
plot_scatter(fa.q_x.mean, data_t, ax[j])
ax[j].set_title('Iteration {}'.format(j))
mse.append(fa.mse())
plot_mse(mse)
print('MSE: {:f}'.format(mse[-1]))
MSE: 1249.675244

## Image compression¶

### 8 factors¶

In [7]:
def plot_images(images, n=30, size=2):
fig, ax = plot_grid(n, ncols=10, size=(size, size))
dim = int(np.sqrt(images.shape[0]))
with sns.axes_style('white'):
for i in range(n):
ax[i].grid()
ax[i].set_axis_off()
ax[i].matshow(images[:, i].reshape(dim, dim), cmap='binary')
In [8]:
np.random.seed(0)
fa = vbfa.VbFa(data_y, 8)
fa.fit()
fa.order_factors()
plot_images(fa.x_to_y())
In [9]:
plot_ve(fa.variance_explained(sort=False))
In [10]:
plot_scatter(fa.q_x.mean[:2, :], data_t)

### 32 factors¶

In [11]:
np.random.seed(0)
fa = vbfa.VbFa(data_y, 32)
fa.fit()
plot_images(fa.x_to_y())

### 64 factors¶

In [12]:
np.random.seed(0)
fa = vbfa.VbFa(data_y, 64)
fa.fit()
plot_images(fa.x_to_y())

# Olvetti faces¶

In [13]:
faces = ds.fetch_olivetti_faces()
data_y = faces.data.transpose()
data_t = faces.target
In [14]:
faces = ds.fetch_olivetti_faces

## Visualization¶

### PCA¶

In [15]:
pca = PCA(n_components=2)
pca_x = pca.fit_transform(data_y.transpose())
plot_scatter(pca_x.transpose(), data_t)
print('MSE: {:f}'.format(np.linalg.norm(data_y.transpose() - pca.inverse_transform(pca_x))))
MSE: 140.117264

### VBFA¶

In [16]:
np.random.seed(0)
fa = vbfa.VbFa(data_y, 2)
mse = [fa.mse()]
maxit = 3
fig, ax = plot_grid(maxit + 1)
plot_scatter(fa.q_x.mean, data_t, ax[0])
for i in range(maxit):
fa.update()
j = i + 1
plot_scatter(fa.q_x.mean, data_t, ax[j])
ax[j].set_title('Iteration {}'.format(j))
mse.append(fa.mse())
plot_mse(mse)
print('MSE: {:f}'.format(fa.mse()))
MSE: 142.345154