import numpy as np
from scipy.sparse.linalg import svds
from sklearn.datasets import load_iris
from sklearn.decomposition import TruncatedSVD as skTruncatedSVD
class TruncatedSVD():
def __init__(self, n_components=2):
self.n_components = n_components
def fit(self, X):
U, Sigma, VT = svds(X, k=self.n_components)
U, Sigma, VT = U[:, ::-1], Sigma[::-1], VT[::-1]
self.components_ = VT
self.singular_values_ = Sigma
self.explained_variance_ = np.var(U * Sigma, axis=0)
self.explained_variance_ratio_ = self.explained_variance_ / np.var(X, axis=0).sum()
return self
def transform(self, X):
return np.dot(X, self.components_.T)
def inverse_transform(self, X):
return np.dot(X, self.components_)
X, _ = load_iris(return_X_y=True)
svd1 = TruncatedSVD().fit(X)
Xt1 = svd1.transform(X)
Xinv1 = svd1.inverse_transform(Xt1)
svd2 = skTruncatedSVD(n_components=2).fit(X)
Xt2 = svd2.transform(X)
Xinv2 = svd2.inverse_transform(Xt2)
for i in range(svd1.components_.shape[0]):
assert np.allclose(svd1.components_[i], svd2.components_[i]) or np.allclose(svd1.components_[i], -svd2.components_[i])
assert np.allclose(svd1.singular_values_, svd2.singular_values_)
assert np.allclose(svd1.explained_variance_, svd2.explained_variance_)
assert np.allclose(svd1.explained_variance_ratio_, svd2.explained_variance_ratio_)
for i in range(Xt1.shape[1]):
assert np.allclose(Xt1[:, i], Xt2[:, i]) or np.allclose(Xt1[:, i], -Xt2[:, i])
assert np.allclose(Xinv1, Xinv2)