In [1]:
%load_ext watermark
%watermark -a "Romell D.Z." -u -d -p numpy,pandas,matplotlib,sklearn
Romell D.Z. 
last updated: 2019-01-20 

numpy 1.14.6
pandas 0.23.4
matplotlib 2.2.2
sklearn 0.20.0

2. Unsupervised Learning

In [3]:
import warnings
warnings.simplefilter('ignore' )

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from __future__ import division
%config InlineBackend.figure_format = 'retina'
plt.rcParams['figure.figsize'] = (12,6) 
In [6]:
from sklearn import datasets
X, y = datasets.make_blobs(centers=4,n_samples=1500,n_features=2,cluster_std=1.5)
plt.scatter(X[:,0],X[:,1],c='gray')
Out[6]:
<matplotlib.collections.PathCollection at 0x1a19841e10>
In [7]:
from sklearn.cluster import KMeans
model = KMeans(n_clusters=4).fit(X)
model.cluster_centers_
Out[7]:
array([[-6.52524497,  1.12372811],
       [ 7.02736545,  5.64171268],
       [ 0.96321496, -4.15451572],
       [-8.38441908, -4.63891145]])
In [8]:
plt.scatter(X[:,0],X[:,1],c=model.labels_,cmap=plt.cm.viridis,alpha=.2)
centroids_x = model.cluster_centers_[:,0]
centroids_y = model.cluster_centers_[:,1]
plt.scatter(centroids_x,centroids_y,marker='D',c='r',s=50)
plt.savefig('snapshot/centroid_clusters',bbox_inches='tight',dpi=100);
In [23]:
X_test = np.random.random([3,2]) * 20 - 10
plt.scatter(X[:,0],X[:,1],c='gray',alpha=.2)
plt.scatter(list(zip(*X_test))[0],list(zip(*X_test))[1],c='b');
In [24]:
fig, ax = plt.subplots(1,2,figsize=(15,7))
model = KMeans(n_clusters=3)
model.fit(X)
ax[0].scatter(X[:,0],X[:,1],c=model.labels_,cmap=plt.cm.viridis,alpha=.2)
p_predict = model.predict(X_test)
centroids_x = model.cluster_centers_[:,0]
centroids_y = model.cluster_centers_[:,1]
ax[0].scatter(list(zip(*X_test))[0],list(zip(*X_test))[1],c=p_predict,cmap=plt.cm.viridis,marker='X',s=200)
ax[0].scatter(centroids_x,centroids_y,marker='D',c='r',s=50)

model = KMeans(n_clusters=4).fit(X)
p_predict = model.predict(X_test)
centroids_x = model.cluster_centers_[:,0]
centroids_y = model.cluster_centers_[:,1]
ax[1].scatter(X[:,0],X[:,1],c=model.labels_,cmap=plt.cm.viridis,alpha=.2)#kmeans.cluster_centers_
ax[1].scatter(list(zip(*X_test))[0],list(zip(*X_test))[1],c=p_predict,cmap=plt.cm.viridis,marker='X',s=200)
ax[1].scatter(centroids_x,centroids_y,marker='D',c='r',s=50)
plt.savefig('snapshot/kmeans_clusters',bbox_inches='tight',dpi=100);