#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np import pandas as pd from lets_plot import * LetsPlot.setup_html() # In[2]: from sklearn.datasets import make_moons from sklearn.preprocessing import scale from sklearn.metrics.pairwise import check_pairwise_arrays from sklearn.metrics import euclidean_distances from sklearn.model_selection import train_test_split # In[3]: X, y = make_moons(noise = 0.1, n_samples = 1000) X = scale(X) # In[4]: data = {'x1':X.T[0], 'x2':X.T[1], 'target':y} # In[5]: p = ggplot() \ + geom_point(aes(x='x1', y='x2', fill='target', group='target'), data=data, size = 3, shape = 21, color='black') \ + scale_fill_manual(values=['red', 'blue']) \ + theme(legend_position='none') p # In[6]: def make_grid(X, grid_size=40): min_val, max_val = np.min(X), np.max(X) delta = (max_val - min_val) / grid_size xx = yy = np.arange(min_val, min_val + delta * (grid_size + 1), delta) grid = np.vstack(np.meshgrid(xx, yy)).reshape(2, - 1).T return grid # In[7]: grid_size = 40 grid = make_grid(X, grid_size=grid_size) data_grid = pd.DataFrame(grid, columns=['x1', 'x2']) data_grid['line'] = np.tile(np.arange(grid_size + 1), grid_size + 1) # In[8]: p += geom_path(aes(x='x1', y='x2', group='line'), data=data_grid, color='blue', alpha=0.5) p # In[9]: def kernel_rbf(x, xi = None, gamma = None): x, xi = check_pairwise_arrays(x, xi) if gamma == None: gamma = 1.0 / x.shape[1] K = - gamma * euclidean_distances (x, xi, squared = True) return np.exp (K) # In[10]: def kernel_poly(x, xi = None, degree = 3, gamma = None, coef0 = 1): x, xi = check_pairwise_arrays(x, xi) if gamma is None: gamma = 1.0 / x.shape[1] return (coef0 + gamma * np.dot (x, xi.T)) ** degree # In[11]: def kernel_centerer(K): n = K.shape[0] In = np.ones((n, n)) / n Kc = K - np.dot (In, K) - np.dot (K, In) + np.dot (np.dot (In, K), In) return Kc # In[12]: def kernel_eig(K, centered = True): if not centered: K = kernel_centerer (K) eig_val, eig_vec = np.linalg.eigh (K) # sort eigen values in descending order idx = np.argsort (eig_val)[::- 1] eig_val = eig_val[idx] eig_vec = eig_vec[:, idx] # zero eigenvectors with zero eigenvalues eig_vec = eig_vec[:, eig_val > 0] eig_val = eig_val[eig_val > 0] return eig_val, eig_vec # In[13]: class KernelTransformer(object): def __init__(self, kernel = 'rbf', gamma = None, degree = 3, coef0 = 1): self.gamma = gamma self.degree = degree self.coef0 = coef0 self.kernel = kernel def fit(self, X, y): self.X = X self.y = y # compute eigen vectors K = self.get_kernel (self.X) self.eig_val, self.eig_vec = kernel_eig(K, centered=False) return self def get_kernel(self, x, xi = None): x, xi = check_pairwise_arrays(x, xi) if self.kernel == 'poly': K = kernel_poly (x, xi, self.degree, self.gamma, self.coef0) elif self.kernel == 'linear': self.degree = 1 self.coef0 = 0 self.gamma = 1 K = kernel_poly (x, xi, self.degree, self.gamma, self.coef0) elif self.kernel == 'rbf': K = kernel_rbf (x, xi, self.gamma) return K def transform(self, X): K = self.get_kernel (X, self.X) return np.dot(K, self.eig_vec / np.sqrt(self.eig_val)) # In[14]: transformer = KernelTransformer(kernel='rbf', gamma=2, degree=3, coef0=0.) transformer.fit(X, y); # In[15]: X_transformed = transformer.transform(X)[:,:2] feature_data = {'x1':X_transformed[:, 0], 'x2':X_transformed[:, 1], 'target':y} # In[16]: p = ggplot() \ + geom_point(aes(x='x1', y='x2', fill='target', group='target'), data=feature_data, size = 3, shape = 21, color='black') \ + scale_fill_manual(values =['red', 'blue']) \ + theme(legend_position='none') p # In[17]: transformed_grid = transformer.transform(grid) transformed_grid = transformed_grid[:,:2] data_grid = pd.DataFrame (transformed_grid, columns=['x1', 'x2']) data_grid['line'] = np.tile(np.arange(grid_size + 1), grid_size + 1) # In[18]: p += geom_path(aes(x='x1', y='x2', group='line'), data=data_grid, color='blue', alpha=0.5) p # In[19]: X_train, X_test, y_train, y_test = train_test_split (X, y, test_size = 0.25) dat = {'x':X_train.T[0], 'y':X_train.T[1], 'variable':y_train} # In[20]: p = ggplot() \ + geom_point(aes (x='x1', y='x2', fill='target', group='target'), data=data, size = 3, shape = 21, color='black') \ + scale_fill_manual(values =['red', 'blue']) \ + theme(legend_position='none') p # In[21]: def step_function(x, margin = 0, label =[0, 1]): return np.where (x >= margin, label[1], label[0]) # In[22]: class KernelClassifier(object): def __init__(self, kernel = 'rbf', gamma = None, degree = 3, coef0 = 1): self.gamma = gamma self.degree = degree self.coef0 = coef0 self.kernel = kernel def fit(self, X, y): self.X = X self.y = y # count number of points in subsets self.n = np.array([np.count_nonzero(1 + np.array (np.where (self.y == self.y[i]))) for i in range(len(self.y))]) # compute bias a = self.get_kernel(self.X, self.X) b = step_function(self.y, 0.5,[1, - 1]) / self.n ** 2 / 2.0 self.b = np.sum(np.multiply (a, b)) return self def get_kernel(self, x, xi): if self.kernel == 'poly': return kernel_poly(x, xi, self.degree, self.gamma, self.coef0) elif self.kernel == 'linear': self.degree = 1 self.coef0 = 0 self.gamma = 1 return kernel_poly(x, xi, self.degree, self.gamma, self.coef0) elif self.kernel == 'rbf': return kernel_rbf (x, xi, self.gamma) def predict(self, X): a = self.get_kernel (X, self.X) b = step_function (self.y, 0.5,[- 1, 1]) / self.n y = np.sum(np.multiply (a, b), axis = 1) + self.b return step_function (y) def decision_function(self, X): a = self.get_kernel (X, self.X) b = step_function (self.y, 0.5,[- 1, 1]) / self.n y = np.sum (np.multiply (a, b), axis = 1) + self.b return y # In[23]: classifier = KernelClassifier(kernel='rbf', gamma=6.) classifier.fit(X_train, y_train); # In[24]: def get_grid_points(X, resolution=0.05): x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 return np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution)) # In[25]: xx1, xx2 = get_grid_points(X_train) Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) xx1, xx2, z = xx1.reshape(- 1), xx2.reshape(- 1), Z.reshape(- 1) region_data = {'x1':xx1, 'x2':xx2, 'region':z + 1} # In[26]: p += geom_raster(aes(x='x1', y='x2',group='region', fill='region'), data=region_data, alpha=0.3) \ + scale_fill_manual(values =['red', 'blue']) p += theme(axis_text='blank', axis_ticks='blank', axis_line='blank', axis_title='blank', legend_position='none') p += ggsize(500, 500) p # In[27]: test_data = pd.DataFrame(np.column_stack ((X_test, y_test)), columns =['x1', 'x2', 'target']) # In[28]: p += geom_point(aes(x='x1', y='x2', color='target'), data=test_data, size=5, alpha=0.5, shape=16) \ + scale_color_manual(values =['red', 'blue']) p