Some fun with t-SNE
We'll again use the fashion-MNIST
dataset.
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import torch
# conda install -c pytorch torchvision
import torchvision
# note: if you cannot get torchvision installed
# using the above sequence, you can resort to
# the colab version here:
# -- just be sure to download and then upload
# the notebook to blackboard when complete.
fMNIST = torchvision.datasets.FashionMNIST(
root = './data/FashionMNIST',
train = True,
download = True)
X = fMNIST.data
X = np.array([x_i.flatten().numpy() for x_i in X])
X = X / 255 # normalize
y = fMNIST.train_labels.numpy()
/anaconda3/lib/python3.7/site-packages/torchvision/datasets/mnist.py:45: UserWarning: train_labels has been renamed targets warnings.warn("train_labels has been renamed targets")
from sklearn.manifold import TSNE
X_embedded = TSNE(n_components=2, perplexity=200).fit_transform(X[:5000])
X_embedded.shape
(5000, 2)
colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k', 'w', 'orange', 'purple']
c = [colors[y_i] for y_i in y[:5000]]
plt.scatter(X_embedded[:,0], X_embedded[:,1], c=c)
<matplotlib.collections.PathCollection at 0x7fc644c38e48>
Let's compare to PCA
from sklearn.decomposition import PCA
pca_X = PCA(n_components=2).fit_transform(X[:5000])
plt.scatter(pca_X[:,0], pca_X[:,1], c=c)
<matplotlib.collections.PathCollection at 0x7fc644e00a20>