import numpy as np
from scipy.linalg import svd
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA as skPCA
class PCA():
def __init__(self, n_components=2):
self.n_components = n_components
def fit(self, X):
U, S, V = svd(X, full_matrices=False)
self.components_ = V[:self.n_components]
self.explained_variance_ratio_ = np.square(S[:self.n_components]) / np.sum(np.square(S))
return self
def transform(self, X):
return np.dot(X, self.components_.T)
def inverse_transform(self, X):
return np.dot(X, self.components_)
X, _ = load_iris(return_X_y=True)
X -= np.mean(X, axis=0)
pca1 = PCA().fit(X)
Xt1 = pca1.transform(X)
Xinv1 = pca1.inverse_transform(Xt1)
pca2 = skPCA(n_components=2).fit(X)
Xt2 = pca2.transform(X)
Xinv2 = pca2.inverse_transform(Xt2)
for i in range(pca1.components_.shape[0]):
assert np.allclose(pca1.components_[i], pca2.components_[i]) or np.allclose(pca1.components_[i], -pca2.components_[i])
assert np.allclose(pca1.explained_variance_ratio_, pca2.explained_variance_ratio_)
for i in range(Xt1.shape[1]):
assert np.allclose(Xt1[:, i], Xt2[:, i]) or np.allclose(Xt1[:, i], -Xt2[:, i])
assert np.allclose(Xinv1, Xinv2)
class PCA():
def __init__(self, n_components=2):
self.n_components = n_components
def fit(self, X):
self.mean_ = np.mean(X, axis=0)
X_train = X - self.mean_
U, S, V = svd(X_train, full_matrices=False)
self.components_ = V[:self.n_components]
self.explained_variance_ratio_ = np.square(S[:self.n_components]) / np.sum(np.square(S))
return self
def transform(self, X):
X_train = X - self.mean_
return np.dot(X_train, self.components_.T)
def inverse_transform(self, X):
return np.dot(X, self.components_) + self.mean_
X, _ = load_iris(return_X_y=True)
pca1 = PCA().fit(X)
Xt1 = pca1.transform(X)
Xinv1 = pca1.inverse_transform(Xt1)
pca2 = skPCA(n_components=2).fit(X)
Xt2 = pca2.transform(X)
Xinv2 = pca2.inverse_transform(Xt2)
for i in range(pca1.components_.shape[0]):
assert np.allclose(pca1.components_[i], pca2.components_[i]) or np.allclose(pca1.components_[i], -pca2.components_[i])
assert np.allclose(pca1.explained_variance_ratio_, pca2.explained_variance_ratio_)
for i in range(Xt1.shape[1]):
assert np.allclose(Xt1[:, i], Xt2[:, i]) or np.allclose(Xt1[:, i], -Xt2[:, i])
assert np.allclose(Xinv1, Xinv2)
class PCA():
def __init__(self, n_components=2):
self.n_components = n_components
def fit(self, X):
covmat = np.cov(X, rowvar=False)
eigval, eigvec = np.linalg.eig(covmat)
idx = eigval.argsort()[::-1]
self.components_ = eigvec[idx[:self.n_components]]
self.explained_variance_ratio_ = np.square(eigval[idx[:self.n_components]]) / np.sum(np.square(eigval))
return self
def transform(self, X):
return np.dot(X, self.components_.T)
def inverse_transform(self, X):
return np.dot(X, self.components_)
X, _ = load_iris(return_X_y=True)
X -= np.mean(X, axis=0)
pca1 = PCA().fit(X)
Xt1 = pca1.transform(X)
Xinv1 = pca1.inverse_transform(Xt1)
pca2 = skPCA(n_components=2).fit(X)
Xt2 = pca2.transform(X)
Xinv2 = pca2.inverse_transform(Xt2)