import numpy as np
from numpy.linalg import eig
from scipy.spatial.distance import pdist,cdist, squareform
from sklearn.base import TransformerMixin, BaseEstimator
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
X, color = datasets.samples_generator.make_swiss_roll(n_samples=1500)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(*X.T, c=color, cmap=plt.cm.Spectral)
plt.show()
pdist
is much faster than cdist
. Thus, use pdist
for gram matrix calculation.
%time D = cdist(X,X,'euclid')
%time Y = pdist(X,'euclid')
print(D.shape,squareform(Y).shape)
CPU times: user 11.8 ms, sys: 985 µs, total: 12.8 ms Wall time: 14.8 ms CPU times: user 3.82 ms, sys: 0 ns, total: 3.82 ms Wall time: 3.83 ms (1500, 1500) (1500, 1500)
Kernel PCA is an eigenvalue decomposition of a gram matrix, which is calculated from a data matrix. Note that the gram matrix should be centerized in the higher dimensionality.
class KernelPCA(TransformerMixin, BaseEstimator):
def __init__(self, n_components=2, kernel='rbf', beta=1):
self.n_components = n_components
self.kernel = kernel
self.beta = beta# square of width of RBF function.
def fit(self, x, y=None):
D = pdist(x, self._rbf)
K = squareform(D) # gram matrix
n_samples = K.shape[0]
Jn = np.eye(n_samples) - np.ones(n_samples)/n_samples
K_cnt = Jn @ K # centrering K
w, v = eig(K_cnt)
self.x_train_ = x
self.explained_variance_ = w[:self.n_components]
self.explained_variance_ratio_ = w[:self.n_components] / w.sum()
self.alpha_ = v[:,:self.n_components]
return self
def transform(self, data):
K = cdist(self.x_train_, data, self._rbf)
return K.T @ self.alpha_
def _rbf(self,x,y):
return np.exp(-((x-y)**2.).sum()*self.beta)
for b in [0.1, 0.001]:
kpca = KernelPCA(n_components=2,beta=b)
kpca.fit(X)
A = kpca.transform(X)
with mpl.style.context('seaborn'):
fig = plt.figure()
ax = fig.add_subplot(111)# , projection='3d'
ax.scatter(*A.T, c=color, cmap=plt.cm.Spectral)
plt.axis('square')
plt.show()
/usr/local/lib/python3.6/dist-packages/numpy/core/_asarray.py:138: ComplexWarning: Casting complex values to real discards the imaginary part return array(a, dtype, copy=False, order=order, subok=True)