%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
import scipy.io
data1 = scipy.io.loadmat('ex7data2.mat')
plt.figure(figsize=(8, 6))
plt.scatter(data1['X'][:, 0], data1['X'][:, 1])
plt.show()
from sklearn.cluster import KMeans
clf = KMeans(n_clusters=3)
clf.fit(data1['X'])
clf.cluster_centers_
plt.figure(figsize=(8, 6))
plt.scatter(data1['X'][:, 0], data1['X'][:, 1], c=clf.labels_, alpha=0.3);
plt.scatter(clf.cluster_centers_[:, 0], clf.cluster_centers_[:, 1], c=[1, 2, 3], marker='x', s=300)
plt.show()
import matplotlib.image as mpimg
bird = mpimg.imread('bird_small.png')
plt.imshow(bird)
plt.show()
w, h, d = bird.shape
data = bird.reshape(w * h, d)
clf2 = KMeans(n_clusters=16)
clf2.fit(data)
print clf2.cluster_centers_
compressed = clf2.cluster_centers_[clf2.labels_].reshape(w, h, d)
plt.imshow(compressed)
plt.show()
data2 = scipy.io.loadmat('ex7data1.mat')
plt.figure(figsize=(8, 6))
plt.title('Example Dataset 1')
plt.scatter(data2['X'][:, 0], data2['X'][:, 1])
plt.show()
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
clf3 = Pipeline([('zscore', StandardScaler()),
('pca', PCA(n_components=1))])
X = clf3.fit_transform(data2['X'])
print clf3.named_steps['pca'].components_
X_inverse = clf3.named_steps['zscore'].inverse_transform(clf3.named_steps['pca'].components_ * X)
plt.figure(figsize=(8, 6))
plt.title('Example Dataset 1')
plt.scatter(data2['X'][:, 0], data2['X'][:, 1])
plt.scatter(X_inverse[:, 0], X_inverse[:, 1], c='r', marker='o')
plt.show()