import numpy as np import sys import sklearn.datasets as ds import matplotlib.pyplot as plt import matplotlib.cm as plt_cm import matplotlib.colors as plt_colt import matplotlib.patches as plt_patches import seaborn as sns import pandas as pd %matplotlib inline %load_ext autoreload %autoreload 2 import vbmfa.fa import vbmfa.mfa sns.set_style('darkgrid') def plot_scatter(x, classes, ax=None): ax = plt.gca() if ax is None else ax cmap = plt_cm.jet norm = plt_col.Normalize(vmin=np.min(classes), vmax=np.max(classes)) mapper = plt_cm.ScalarMappable(cmap=cmap, norm=norm) colors = mapper.to_rgba(classes) ax.scatter(x[0, :], x[1, :], color=colors, s=20) def plot_mse(mse): fig, ax = plt.subplots(figsize=(10, 4)) ax.plot(mse, linewidth=2, marker='s',markersize=5, markerfacecolor='red') ax.set_xlabel('Iteration') ax.set_ylabel('MSE') def plot_grid(n, ncols=4, size=(5, 5)): nrows = int(np.ceil(n/float(ncols))) fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(size[0]*ncols, size[1]*nrows)) ax = ax.ravel() return [fig, ax] def plot_grid(n, ncols=4, size=(5, 5)): nrows = int(np.ceil(n/float(ncols))) fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(size[0]*ncols, size[1]*nrows)) ax = ax.ravel() return [fig, ax] def plot_compress(q, n=30): np.random.seed(0) fa = vbfa.VbFa(data_y, q) fa.fit() y = fa.x_to_y() fig, ax = plot_grid(n, ncols=10) dim = int(np.sqrt(fa.P)) for i in range(n): ax[i].matshow(y[:, i].reshape(dim, dim), cmap='binary') def plot_images(images, n=30, size=2): fig, ax = plot_grid(n, ncols=10, size=(size, size)) dim = int(np.sqrt(images.shape[0])) with sns.axes_style('white'): for i in range(n): ax[i].grid() ax[i].set_axis_off() ax[i].matshow(images[:, i].reshape(dim, dim), cmap='binary') def cluster_labels(q_s, labels): clust = q_s.argmax(0) clust_t = pd.crosstab(clust, labels) clust_label = np.array(clust_t).argmax(1) return np.array([clust_label[x] for x in clust]) digits = ds.load_digits() data_y = digits.data.transpose() data_t = digits.target plot_images(data_y) np.random.seed(0) mfa = vbmfa.mfa.VbMfa(data_y, 16, 1) mfa.fit() print('MSE: {:.3f}'.format(mfa.mse())) plot_images(mfa.x_to_y()) np.random.seed(0) mfa = vbmfa.mfa.VbMfa(data_y, 16, 2) mfa.fit() print('MSE: {:.3f}'.format(mfa.mse())) plot_images(mfa.x_to_y()) np.random.seed(0) mfa = vbmfa.mfa.VbMfa(data_y, 16, 10) mfa.fit() print('MSE: {:.3f}'.format(mfa.mse())) plot_images(mfa.x_to_y()) c_labels = cluster_labels(mfa.q_s, data_t) print(pd.crosstab(c_labels, data_t)) show = [0, 1, 2, 3, 4, 5] num = 10 fig, ax = plot_grid(len(show) * num, ncols=num, size=(2, 2)) y = mfa.x_to_y() with sns.axes_style('whitegrid'): for i in range(len(show)): y_i = y[:, c_labels == show[i]] for j in range(num): k = int(i * 10 + j) ax[k].matshow(y_i[:, j].reshape(8, 8), cmap='binary') ax[k].set_axis_off() ax[k].grid() faces = ds.fetch_olivetti_faces() num_samples = 5 # Only use the first n persons images_per_sample = 10 n = num_samples * images_per_sample data_y = faces.data.transpose()[:, :n] data_t = faces.target[:n] plot_images(data_y, 50) np.random.seed(0) mfa = vbmfa.mfa.VbMfa(data_y, 64, 1) mfa.fit(verbose=True) print('MSE: {:.3f}'.format(mfa.mse())) plot_images(mfa.x_to_y(), 50) np.random.seed(0) mfa = vbmfa.mfa.VbMfa(data_y, 64, 5) mfa.fit_highdim() plot_images(mfa.x_to_y(), 50) c_labels = cluster_labels(mfa.q_s, data_t) print(pd.crosstab(c_labels, data_t))