Introduction to Principal component analysis¶

Solutions¶

Exercise: PCA implementation¶

In [ ]:

S = (X.T @ X) / (n - 1) # cov matrix with n-1 degree of freedom
# S = np.cov(X.T) # this is equivalent (up to some numerical differences)
print('S =\n', S)
print()

λ, v = np.linalg.eig(S) # eigen system
idx = λ.argsort()[::-1] # sort eigenvalues in descending order
λ = λ[idx]
print('λ =', λ[0], λ[1], λ.sum())
print()
print('VAR PC1:, ', λ[0]/λ.sum()) # the amount of variance explained by PC1
print()

A = v[:, idx] # the projection matrix
print('A =\n', A) 
print()

Xmean = X.mean(axis=0, keepdims=True) # normalize
X_ = (X - Xmean) @ A.T # project (ie transform)
print('X_ =\n', X_)

Exercise: Digits dataset¶

In [ ]:

pca = PCA()
X_ = pca.fit_transform(X)

In [ ]:

colors = sns.color_palette('Set1', 10)

PC1 = X_[:, 0]
PC2 = X_[:, 1]

fig, ax = plt.subplots(figsize=(12, 10))
for pc1, pc2, s in zip(PC1, PC2, labels):
    c = colors[s]
    ax.text(pc1, pc2, str(s), fontdict=dict(fontsize=10, color=c))
ax.set_xlim(PC1.min()-1, PC1.max()+1)
ax.set_ylim(PC2.min()-1, PC2.max()+1)
ax.set_xticks([])
ax.set_yticks([])
pca_labels()
sns.despine()

Exercise: t-SNE¶

In [ ]:

tsne = TSNE()
X_ = tsne.fit_transform(X)

In [ ]:

colors = sns.color_palette('Set1', 10)

PC1 = X_[:, 0]
PC2 = X_[:, 1]

fig, ax = plt.subplots(figsize=(12, 10))
for pc1, pc2, s in zip(PC1, PC2, labels):
    c = colors[s]
    ax.text(pc1, pc2, str(s), fontdict=dict(fontsize=10, color=c))
ax.set_xlim(PC1.min()-1, PC1.max()+1)
ax.set_ylim(PC2.min()-1, PC2.max()+1)
ax.set_xticks([])
ax.set_yticks([])
pca_labels()
sns.despine()