#!/usr/bin/env python # coding: utf-8 # # A Network Tour of Data Science # ###       Xavier Bresson, Winter 2016/17 # ## Exercise 4 - Code 2 : Unsupervised Learning # ## Unsupervised Clustering with Kernel K-Means # In[1]: # Load libraries # Math import numpy as np # Visualization get_ipython().run_line_magic('matplotlib', 'notebook') import matplotlib.pyplot as plt plt.rcParams.update({'figure.max_open_warning': 0}) from mpl_toolkits.axes_grid1 import make_axes_locatable from scipy import ndimage # Print output of LFR code import subprocess # Sparse matrix import scipy.sparse import scipy.sparse.linalg # 3D visualization import pylab from mpl_toolkits.mplot3d import Axes3D from matplotlib import pyplot # Import data import scipy.io # Import functions in lib folder import sys sys.path.insert(1, 'lib') # Import helper functions get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') from lib.utils import construct_kernel from lib.utils import compute_kernel_kmeans_EM from lib.utils import compute_kernel_kmeans_spectral from lib.utils import compute_purity # Import distance function import sklearn.metrics.pairwise # Remove warnings import warnings warnings.filterwarnings("ignore") # In[2]: # Load MNIST raw data images mat = scipy.io.loadmat('datasets/mnist_raw_data.mat') X = mat['Xraw'] n = X.shape[0] d = X.shape[1] Cgt = mat['Cgt'] - 1; Cgt = Cgt.squeeze() nc = len(np.unique(Cgt)) print('Number of data =',n) print('Data dimensionality =',d); print('Number of classes =',nc); # **Question 1a:** What is the clustering accuracy of standard/linear K-Means?
# Hint: You may use functions *Ker=construct_kernel(X,'linear')* to compute the # linear kernel and *[C_kmeans, En_kmeans]=compute_kernel_kmeans_EM(n_classes,Ker,Theta,10)* with *Theta= np.ones(n)* to run the standard K-Means algorithm, and *accuracy = compute_purity(C_computed,C_solution,n_clusters)* that returns the # accuracy. # In[3]: # Your code here # **Question 1b:** What is the clustering accuracy for the kernel K-Means algorithm with
# (1) Gaussian Kernel for the EM approach and the Spectral approach?
# (2) Polynomial Kernel for the EM approach and the Spectral approach?
# Hint: You may use functions *Ker=construct_kernel(X,'gaussian')* and *Ker=construct_kernel(X,'polynomial',[1,0,2])* to compute the non-linear kernels
# Hint: You may use functions *C_kmeans,__ = compute_kernel_kmeans_EM(K,Ker,Theta,10)* for the EM kernel KMeans algorithm and *C_kmeans,__ = compute_kernel_kmeans_spectral(K,Ker,Theta,10)* for the Spectral kernel K-Means algorithm.
# In[4]: # Your code here # **Question 1c:** What is the clustering accuracy for the kernel K-Means algorithm with
# (1) KNN_Gaussian Kernel for the EM approach and the Spectral approach?
# (2) KNN_Cosine_Binary Kernel for the EM approach and the Spectral approach?
# You can test for the value KNN_kernel=50.
# Hint: You may use functions *Ker = construct_kernel(X,'kNN_gaussian',KNN_kernel)* # and *Ker = construct_kernel(X,'kNN_cosine_binary',KNN_kernel)* to compute the # non-linear kernels. # In[5]: # Your code here # In[6]: