import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.io import loadmat
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from scipy import linalg
data = loadmat('data/data.mat')
data.keys()
dict_keys(['__header__', '__version__', '__globals__', 'X'])
X = data['X']
print('X:', X.shape)
X: (300, 2)
km1 = KMeans(3)
km1.fit(X)
KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300, n_clusters=3, n_init=10, n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001, verbose=0)
plt.scatter(X[:,0], X[:,1], s=40, c=km1.labels_, cmap=plt.cm.prism)
plt.title('K-Means Clustering Results with K=3')
plt.scatter(km1.cluster_centers_[:,0], km1.cluster_centers_[:,1], marker='+', s=100, c='k', linewidth=2);
img = plt.imread('data/bird_small.png')
img_shape = img.shape
img_shape
(128, 128, 3)
A = img/255.0
AA = A.reshape(128*128,3)
AA.shape
(16384, 3)
km2 = KMeans(16)
km2.fit(AA)
KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300, n_clusters=16, n_init=10, n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001, verbose=0)
B = km2.cluster_centers_[km2.labels_].reshape(img_shape[0], img_shape[1], 3)
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(13,9))
ax1.imshow(img)
ax1.set_title('Original')
ax2.imshow(B*255)
ax2.set_title('Compressed, with 16 colors')
for ax in fig.axes:
ax.axis('off')