# K-means examples¶

In [1]:
# Numerical arrays.
import numpy as np

# Machine learning - KMeans.
import sklearn.cluster as skcl

# Plotting.
import matplotlib.pyplot as plt

In [2]:
# Data set.
X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])

In [3]:
# Plot the data set.
plt.plot(X[:,0], X[:,1], 'x')
# Set reasonable limits.
plt.xlim([-2,14])
plt.ylim([-2,6])

Out[3]:
(-2.0, 6.0)
In [4]:
# Perform kmeans fitting.
kmeans = skcl.KMeans(n_clusters=2, random_state=0).fit(X)

In [5]:
# See the labels of the points.
kmeans.labels_

Out[5]:
array([1, 1, 1, 0, 0, 0])
In [6]:
# Plot the data set.
plt.plot(X[kmeans.labels_ == 0][:,0], X[kmeans.labels_ == 0][:,1], 'gx')
plt.plot(X[kmeans.labels_ == 1][:,0], X[kmeans.labels_ == 1][:,1], 'rx')
# Set reasonable limits.
plt.xlim([-2,14])
plt.ylim([-2,6])

Out[6]:
(-2.0, 6.0)
In [7]:
# Predict the cluster for two points.
newvals = np.array([[0, 0], [12, 3]])
predictions = kmeans.predict(newvals)
predictions

Out[7]:
array([1, 0])
In [8]:
# Plot the data set.
plt.plot(X[kmeans.labels_ == 0][:,0], X[kmeans.labels_ == 0][:,1], 'gx')
plt.plot(X[kmeans.labels_ == 1][:,0], X[kmeans.labels_ == 1][:,1], 'rx')
plt.plot(newvals[:,0], newvals[:,1], 'bo')
# Set reasonable limits.
plt.xlim([-2,14])
plt.ylim([-2,6])

Out[8]:
(-2.0, 6.0)
In [9]:
# Plot the data set.
plt.plot(X[kmeans.labels_ == 0][:,0], X[kmeans.labels_ == 0][:,1], 'gx')
plt.plot(X[kmeans.labels_ == 1][:,0], X[kmeans.labels_ == 1][:,1], 'rx')
plt.plot(newvals[predictions == 0][:,0], newvals[predictions == 0][:,1], 'go')
plt.plot(newvals[predictions == 1][:,0], newvals[predictions == 1][:,1], 'ro')
# Set reasonable limits.
plt.xlim([-2,14])
plt.ylim([-2,6])

Out[9]:
(-2.0, 6.0)
In [10]:
cent = kmeans.cluster_centers_
cent

Out[10]:
array([[10.,  2.],
[ 1.,  2.]])
In [11]:
# Plot the data set.
plt.plot(X[kmeans.labels_ == 0][:,0], X[kmeans.labels_ == 0][:,1], 'gx')
plt.plot(X[kmeans.labels_ == 1][:,0], X[kmeans.labels_ == 1][:,1], 'rx')
plt.plot(newvals[predictions == 0][:,0], newvals[predictions == 0][:,1], 'go')
plt.plot(newvals[predictions == 1][:,0], newvals[predictions == 1][:,1], 'ro')
plt.plot(cent[:,0], cent[:,1], 'k.')
# Set reasonable limits.
plt.xlim([-2,14])
plt.ylim([-2,6])

Out[11]:
(-2.0, 6.0)

#### My own dataset¶

In [12]:
# Data set.

# Two centre points.
c1 = np.array([1.0,  2.0])
c2 = np.array([5.0, 12.0])

# Create points randomly around the centre points.
c1x = np.random.normal(c1[0], 2.0, 10)
c1y = np.random.normal(c1[1], 2.0, 10)
c1p = np.vstack([c1x, c1y]).T
c2x = np.random.normal(c2[0], 2.0, 10)
c2y = np.random.normal(c2[1], 2.0, 10)
c2p = np.vstack([c2x, c2y]).T

# Merge the two lists of values.
myX = np.concatenate([c1p, c2p])
# Shuffle the points.
np.random.shuffle(myX)

myX

Out[12]:
array([[-0.60350908,  0.80600677],
[ 5.79766518, 10.527845  ],
[ 2.93118138, -2.27208718],
[ 6.25648216, 12.45203601],
[ 5.79633889,  8.5543678 ],
[ 6.59360945,  9.0939594 ],
[ 0.10517133,  2.36381443],
[ 2.30112082,  0.29770193],
[ 2.48206805, 11.06734928],
[ 4.02188676, 16.24798429],
[ 2.92692029, -0.29308455],
[ 2.70377258,  4.6306797 ],
[-0.25069006, -0.30494735],
[-0.4425126 ,  3.44875223],
[ 2.27559788,  5.89615065],
[ 5.24168866, 14.26724582],
[-4.02183793,  1.87040957],
[-1.48560208,  2.56505284],
[ 2.3880615 , 13.0276387 ],
[ 4.94471303, 11.85252002]])
In [13]:
# Plot the data set.
plt.plot(myX[:,0], myX[:,1], 'x')

Out[13]:
[<matplotlib.lines.Line2D at 0x25f83b60fd0>]
In [14]:
# Perform kmeans fitting.
mykmeans = skcl.KMeans(n_clusters=2, random_state=0).fit(myX)

In [15]:
# Plot the data set.
plt.plot(myX[mykmeans.labels_ == 0][:,0], myX[mykmeans.labels_ == 0][:,1], 'gx')
plt.plot(myX[mykmeans.labels_ == 1][:,0], myX[mykmeans.labels_ == 1][:,1], 'rx')

Out[15]:
[<matplotlib.lines.Line2D at 0x25f83bcc1f0>]
In [16]:
# Create new dummy points for classification.
# mynewvals = np.array([[0, 0], [6, 10]])
myxvals = np.linspace(-1.0, 7.0, 10)
myyvals = np.linspace(-1.0, 15.0, 10)
mynewvals = np.vstack([myxvals, myyvals]).T
mynewvals

Out[16]:
array([[-1.        , -1.        ],
[-0.11111111,  0.77777778],
[ 0.77777778,  2.55555556],
[ 1.66666667,  4.33333333],
[ 2.55555556,  6.11111111],
[ 3.44444444,  7.88888889],
[ 4.33333333,  9.66666667],
[ 5.22222222, 11.44444444],
[ 6.11111111, 13.22222222],
[ 7.        , 15.        ]])
In [17]:
# Predict the cluster for two points.
mypredictions = mykmeans.predict(mynewvals)
mypredictions

Out[17]:
array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
In [18]:
# Plot the data set.
plt.plot(myX[mykmeans.labels_ == 0][:,0], myX[mykmeans.labels_ == 0][:,1], 'gx')
plt.plot(myX[mykmeans.labels_ == 1][:,0], myX[mykmeans.labels_ == 1][:,1], 'rx')
plt.plot(mynewvals[mypredictions == 0][:,0], mynewvals[mypredictions == 0][:,1], 'go')
plt.plot(mynewvals[mypredictions == 1][:,0], mynewvals[mypredictions == 1][:,1], 'ro')

Out[18]:
[<matplotlib.lines.Line2D at 0x25f83c37100>]
In [19]:
# The centres of clusters.
mycent = mykmeans.cluster_centers_
mycent

Out[19]:
array([[ 0.58541932,  1.72804082],
[ 4.83583485, 11.89899404]])
In [20]:
# Plot the data set.
plt.plot(myX[mykmeans.labels_ == 0][:,0], myX[mykmeans.labels_ == 0][:,1], 'gx')
plt.plot(myX[mykmeans.labels_ == 1][:,0], myX[mykmeans.labels_ == 1][:,1], 'rx')
plt.plot(mynewvals[mypredictions == 0][:,0], mynewvals[mypredictions == 0][:,1], 'go')
plt.plot(mynewvals[mypredictions == 1][:,0], mynewvals[mypredictions == 1][:,1], 'ro')
plt.plot(mycent[:,0], mycent[:,1], 'k.')

Out[20]:
[<matplotlib.lines.Line2D at 0x25f83c97af0>]