S = (X.T @ X) / (n - 1) # cov matrix with n-1 degree of freedom
# S = np.cov(X.T) # this is equivalent (up to some numerical differences)
print('S =\n', S)
print()
λ, v = np.linalg.eig(S) # eigen system
idx = λ.argsort()[::-1] # sort eigenvalues in descending order
λ = λ[idx]
print('λ =', λ[0], λ[1], λ.sum())
print()
print('VAR PC1:, ', λ[0]/λ.sum()) # the amount of variance explained by PC1
print()
A = v[:, idx] # the projection matrix
print('A =\n', A)
print()
Xmean = X.mean(axis=0, keepdims=True) # normalize
X_ = (X - Xmean) @ A.T # project (ie transform)
print('X_ =\n', X_)
pca = PCA()
X_ = pca.fit_transform(X)
colors = sns.color_palette('Set1', 10)
PC1 = X_[:, 0]
PC2 = X_[:, 1]
fig, ax = plt.subplots(figsize=(12, 10))
for pc1, pc2, s in zip(PC1, PC2, labels):
c = colors[s]
ax.text(pc1, pc2, str(s), fontdict=dict(fontsize=10, color=c))
ax.set_xlim(PC1.min()-1, PC1.max()+1)
ax.set_ylim(PC2.min()-1, PC2.max()+1)
ax.set_xticks([])
ax.set_yticks([])
pca_labels()
sns.despine()
tsne = TSNE()
X_ = tsne.fit_transform(X)
colors = sns.color_palette('Set1', 10)
PC1 = X_[:, 0]
PC2 = X_[:, 1]
fig, ax = plt.subplots(figsize=(12, 10))
for pc1, pc2, s in zip(PC1, PC2, labels):
c = colors[s]
ax.text(pc1, pc2, str(s), fontdict=dict(fontsize=10, color=c))
ax.set_xlim(PC1.min()-1, PC1.max()+1)
ax.set_ylim(PC2.min()-1, PC2.max()+1)
ax.set_xticks([])
ax.set_yticks([])
pca_labels()
sns.despine()