import random as pyrandom
figsize(8,8)
matplotlib.rc("image",cmap="gray")
matplotlib.rc("image",interpolation="nearest")
ccolors = ['c+','g+','b+','y+','r+','m+','b+']
def plotclusters(data,centers=None):
xlim([0,100]); ylim([0,100])
if centers is None:
plot(data[:,0],data[:,1],'b+')
else:
for i in range(amax(centers)+1):
plot(data[centers==i,0],data[centers==i,1],ccolors[i%len(ccolors)])
data = r_[8*randn(1000,2)+array([70,75]),
8*randn(1000,2)+array([35,30]),
8*randn(1000,2)+array([25,70]),
8*randn(1000,2)+array([70,20]),
]
shuffle(data)
plotclusters(data)
from scipy.cluster import vq
nclusters = range(1,10)
errors = []
for i in nclusters:
vectors,error = vq.kmeans(data,i)
errors.append(error)
ylim((0,30))
plot(nclusters,errors)
errors = array(errors)
plot(errors[:-1]-errors[1:])
[<matplotlib.lines.Line2D at 0x2925ed0>]
from scipy.cluster import vq
nclusters = range(1,10)
merrors = []
for i in nclusters:
terrors = []
for j in range(10):
sample = array(pyrandom.sample(data,len(data)//2))
vectors,error = vq.kmeans(sample,i)
terrors.append(error)
merrors.append(mean(terrors))
ylim((0,30))
plot(nclusters,merrors)
[<matplotlib.lines.Line2D at 0x292edd0>]
data = r_[dot(randn(1000,2),diag([2,12]))+array([60,50]),
dot(randn(1000,2),diag([2,12]))+array([40,50]),
dot(randn(1000,2),diag([2,12]))+array([20,50]),
dot(randn(1000,2),diag([2,12]))+array([80,50])
]
shuffle(data)
plotclusters(data)
from scipy.cluster import vq
nclusters = range(1,10)
errors = []
for i in nclusters:
vectors,error = vq.kmeans(data,i)
errors.append(error)
ylim((0,30))
plot(nclusters,errors)
errors = array(errors)
plot(errors[:-1]-errors[1:])
[<matplotlib.lines.Line2D at 0x2af13d0>]
data = r_[8*randn(1000,2)+array([70,75]),
8*randn(1000,2)+array([35,30]),
8*randn(1000,2)+array([25,70]),
8*randn(1000,2)+array([70,20]),
]
shuffle(data)
plotclusters(data)
from scipy.cluster import vq
nclusters = range(3,4)
errors = []
for i in nclusters:
allvs = []
for j in range(10):
subset = array(pyrandom.sample(data,len(data)//2))
vectors,error = vq.kmeans(subset,i)
allvs += list(vectors)
allvs = array(allvs)
plot(allvs[:,0],allvs[:,1],'r+')
xlim((0,100)); ylim((0,100))
(0, 100)
from scipy.cluster import vq
nclusters = range(7,8)
errors = []
for i in nclusters:
allvs = []
for j in range(10):
subset = array(pyrandom.sample(data,len(data)//2))
vectors,error = vq.kmeans(subset,i)
allvs += list(vectors)
allvs = array(allvs)
plot(allvs[:,0],allvs[:,1],'r+')
xlim((0,100)); ylim((0,100))
(0, 100)
spreads = []
nclusters = range(1,10)
for i in nclusters:
allvs = []
for j in range(10):
subset = array(pyrandom.sample(data,len(data)//2))
vectors,error = vq.kmeans(subset,i)
allvs += list(vectors)
_,spread = vq.kmeans(array(allvs),i)
spreads.append(spread)
plot(nclusters,spreads)
[<matplotlib.lines.Line2D at 0x30d3490>]
data = r_[8*randn(1000,2)+array([70,70]),
8*randn(1000,2)+array([30,30]),
8*randn(1000,2)+array([30,70]),
8*randn(1000,2)+array([70,30]),
]
plotclusters(data)
from scipy.cluster import vq
nclusters = range(3,4)
errors = []
for i in nclusters:
allvs = []
for j in range(10):
subset = array(pyrandom.sample(data,len(data)//2))
vectors,error = vq.kmeans(subset,i)
allvs += list(vectors)
allvs = array(allvs)
plot(allvs[:,0],allvs[:,1],'r+')
xlim((0,100)); ylim((0,100))
(0, 100)
spreads = []
nclusters = range(1,10)
for i in nclusters:
allvs = []
for j in range(10):
subset = array(pyrandom.sample(data,len(data)//2))
vectors,error = vq.kmeans(subset,i)
allvs += list(vectors)
_,spread = vq.kmeans(array(allvs),i)
spreads.append(spread)
plot(nclusters,spreads)
[<matplotlib.lines.Line2D at 0x2c85b50>]