#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import matplotlib.pyplot as plt import pandas as pd import sklearn # #K-means Clustering # In[2]: import scipy.io # In[3]: data1 = scipy.io.loadmat('ex7data2.mat') # In[4]: plt.figure(figsize=(8, 6)) plt.scatter(data1['X'][:, 0], data1['X'][:, 1]) plt.show() # In[5]: from sklearn.cluster import KMeans # In[6]: clf = KMeans(n_clusters=3) clf.fit(data1['X']) clf.cluster_centers_ # In[7]: plt.figure(figsize=(8, 6)) plt.scatter(data1['X'][:, 0], data1['X'][:, 1], c=clf.labels_, alpha=0.3); plt.scatter(clf.cluster_centers_[:, 0], clf.cluster_centers_[:, 1], c=[1, 2, 3], marker='x', s=300) plt.show() # #Image compression with K-means # In[8]: import matplotlib.image as mpimg # In[9]: bird = mpimg.imread('bird_small.png') # In[10]: plt.imshow(bird) plt.show() # In[11]: w, h, d = bird.shape data = bird.reshape(w * h, d) # In[12]: clf2 = KMeans(n_clusters=16) clf2.fit(data) print clf2.cluster_centers_ # In[13]: compressed = clf2.cluster_centers_[clf2.labels_].reshape(w, h, d) # In[14]: plt.imshow(compressed) plt.show() # #Principal Component Analysis # In[15]: data2 = scipy.io.loadmat('ex7data1.mat') # In[16]: plt.figure(figsize=(8, 6)) plt.title('Example Dataset 1') plt.scatter(data2['X'][:, 0], data2['X'][:, 1]) plt.show() # In[17]: from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline # In[18]: clf3 = Pipeline([('zscore', StandardScaler()), ('pca', PCA(n_components=1))]) # In[19]: X = clf3.fit_transform(data2['X']) print clf3.named_steps['pca'].components_ # In[20]: X_inverse = clf3.named_steps['zscore'].inverse_transform(clf3.named_steps['pca'].components_ * X) # In[21]: plt.figure(figsize=(8, 6)) plt.title('Example Dataset 1') plt.scatter(data2['X'][:, 0], data2['X'][:, 1]) plt.scatter(X_inverse[:, 0], X_inverse[:, 1], c='r', marker='o') plt.show() # In[ ]: