DS4420: Non-linear dimension reduction!

Some fun with t-SNE

We'll again use the fashion-MNIST dataset.

In [34]:
import numpy as np 
import sklearn
import matplotlib.pyplot as plt

import torch
# conda install -c pytorch torchvision
import torchvision

# note: if you cannot get torchvision installed 
# using the above sequence, you can resort to 
# the colab version here: 
# -- just be sure to download and then upload
# the notebook to blackboard when complete.
fMNIST = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train = True,
    download = True)  
In [35]:
X = fMNIST.data
X = np.array([x_i.flatten().numpy() for x_i in X])
X = X / 255 # normalize
y = fMNIST.train_labels.numpy()
/anaconda3/lib/python3.7/site-packages/torchvision/datasets/mnist.py:45: UserWarning: train_labels has been renamed targets
  warnings.warn("train_labels has been renamed targets")
In [36]:
from sklearn.manifold import TSNE
In [41]:
X_embedded = TSNE(n_components=2, perplexity=200).fit_transform(X[:5000])
In [42]:
X_embedded.shape
Out[42]:
(5000, 2)
In [43]:
colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k', 'w', 'orange', 'purple']
c = [colors[y_i] for y_i in y[:5000]]
In [44]:
plt.scatter(X_embedded[:,0], X_embedded[:,1], c=c)
Out[44]:
<matplotlib.collections.PathCollection at 0x7fc644c38e48>

Let's compare to PCA

In [29]:
from sklearn.decomposition import PCA
In [31]:
pca_X = PCA(n_components=2).fit_transform(X[:5000])
In [33]:
plt.scatter(pca_X[:,0], pca_X[:,1], c=c)
Out[33]:
<matplotlib.collections.PathCollection at 0x7fc644e00a20>
In [ ]: