plt.prism()
from sklearn import datasets
_,data,target,_,_ = datasets.load_iris().values()
spfs = (4,4)
s = 3
plt.figure(figsize=spfs)
for i in xrange(4):
for j in xrange(4):
plt.subplot(4,4,i*4+j+1)
plt.scatter(data[:,i], data[:,j], alpha=.6,s=s)
plt.xticks(())
plt.yticks(())
plt.suptitle("$X$")
plt.savefig("presentation/pca-pics/iris-all-nocolor.pdf")
plt.figure(figsize=spfs)
for i in xrange(4):
for j in xrange(4):
plt.subplot(4,4,i*4+j+1)
plt.scatter(data[:,i], data[:,j],c=target, alpha=.6,s=s)
plt.xticks(())
plt.yticks(())
plt.suptitle("$X$, $Y$")
plt.savefig("presentation/pca-pics/iris-all.pdf")
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
plt.figure(figsize=(3,3))
data2 = pca.fit_transform(data)
plt.scatter(data2[:,0], data2[:,1])
plt.title("$X_{PCA}$")
plt.savefig("presentation/pca-pics/iris-2d-nocolor.pdf")
data2 = pca.fit_transform(data)
plt.scatter(data2[:,0], data2[:,1],c=target)
plt.savefig("presentation/pca-pics/iris-2d.pdf")
print pca.components_
data3 = pca.inverse_transform(data2)
plt.figure(figsize=spfs)
for i in xrange(4):
for j in xrange(4):
plt.subplot(4,4,i*4+j+1)
plt.scatter(data3[:,i], data3[:,j],c=target,alpha=.6,s=s)
plt.xticks(())
plt.yticks(())
plt.savefig("presentation/pca-pics/iris-bt.pdf")
data3 = pca.inverse_transform(data2)
plt.figure(figsize=spfs)
for i in xrange(4):
for j in xrange(4):
plt.subplot(4,4,i*4+j+1)
plt.scatter(data3[:,i], data3[:,j],alpha=.6,s=s)
plt.xticks(())
plt.yticks(())
plt.suptitle("$X_{\mathrm{clean}}$")
plt.savefig("presentation/pca-pics/iris-bt-nocolor.pdf")
from sklearn.cluster import KMeans
kmeans = KMeans(3)
labels = kmeans.fit(data).labels_
plt.scatter(data[:,0],data[:,1],c=labels)
plt.scatter(kmeans.cluster_centers_[:,0], kmeans.cluster_centers_[:,1], c=[0,1,2], s=100);
plt.savefig("presentation/kmeans-pics/cluster-centers.pdf")
plt.scatter(data[:,0],data[:,1])
plt.scatter(kmeans.cluster_centers_[:,0], kmeans.cluster_centers_[:,1], s=100);
plt.savefig("presentation/kmeans-pics/cluster-centers-nocolor.pdf")
from matplotlib.patches import Ellipse
def plotEllipse(pos,P,edge,face,line_width):
U, s , Vh = svd(P)
orient = math.atan2(U[1,0],U[0,0])*180/pi
ellipsePlot = Ellipse(xy=pos, width=2.0*math.sqrt(s[0]), height=2.0*math.sqrt(s[1]), angle=orient,facecolor=face, edgecolor=edge, lw=line_width)
ax = gca()
ax.add_patch(ellipsePlot);
return ellipsePlot;
plt.figure(figsize=(3,3))
plt.xlim(-4,6)
plt.ylim(-4,6)
Cov = [[2,-4],[-1,4]]
print np.linalg.det(Cov)
X = np.random.multivariate_normal([1,2], Cov, size=500)
plt.scatter(X[:,0], X[:,1],s=2,alpha=0.7)
plt.savefig("presentation/pca-pics/pointcloud-2d.pdf")
plt.figsize(3,3)
pca = PCA()
pca.fit_transform(X)
plt.xlim(-4,6)
plt.ylim(-4,6)
C = (pca.explained_variance_ * pca.components_.T).T
mu = pca.mean_
plotEllipse(mu, C.T, 'k', 'none', 2)
plt.scatter(X[:,0], X[:,1], s=2, alpha=.2)
plt.scatter([mu[0]],[mu[1]], s=40, c='red')
# the point cloud is modeled as an ellipse
plt.savefig("presentation/pca-pics/pointcloud-2d-model.pdf")
C = (np.sqrt(pca.explained_variance_) * pca.components_.T).T
a1 = plt.arrow(mu[0], mu[1], C[0,0], C[0,1], fc="b", ec='b', head_width=.5, head_length=.5, width=.1, length_includes_head=True)
# this is the direction with the maximum variance
plt.savefig("presentation/pca-pics/pointcloud-2d-vecs-1a.pdf")
a1.set_visible(False)
plt.plot(
[mu[0] - 10*C[0,0], mu[0] + 10*C[0,0]],
[mu[1] - 10*C[0,1], mu[1] + 10*C[0,1]])
L = []
for i in xrange(10):
d = np.dot(mu - X[i,:], pca.components_[1,:]) * pca.components_[1,:]
L.append(plt.arrow(X[i,0], X[i,1], d[0], d[1], head_width=.1, head_length=.1))
# we can project all points to this axis, and only use "little" information
plt.savefig("presentation/pca-pics/pointcloud-2d-vecs-proj1.pdf")
map(lambda x: x.set_visible(False), L)
plt.arrow(mu[0], mu[1], C[1,0], C[1,1], fc="b", ec='b', head_width=.5, head_length=.5, width=.1, length_includes_head=True);
# the next axis has less variance, and even less error
plt.savefig("presentation/pca-pics/pointcloud-2d-vecs-2a.pdf")
fs = (3,3)
s = 2
plt.figure(figsize=fs)
pca = PCA()
X3 = pca.fit_transform(X)
plt.xlim(-4,6)
plt.ylim(-4,6)
C = (pca.explained_variance_ * pca.components_.T).T
mu = pca.mean_
plt.scatter(X[:,0], X[:,1], s=s, alpha=.5)
plt.scatter([mu[0]],[mu[1]], s=40, c='red')
plt.savefig("presentation/pca-pics/pointcloud-2d-step1.pdf")
plt.figure(figsize=fs)
X2 = X - mu
plt.scatter(X2[:,0], X2[:,1], s=s, alpha=.5)
plt.xlim(-4-mu[0],6-mu[0])
plt.ylim(-4-mu[1],6-mu[1])
plt.scatter([0],[0], s=40, c='red')
plt.savefig("presentation/pca-pics/pointcloud-2d-step2.pdf")
plt.figure(figsize=fs)
plt.scatter(X2[:,0], X2[:,1], s=s, alpha=.5)
plt.xlim(-4-mu[0],6-mu[0])
plt.ylim(-4-mu[1],6-mu[1])
plotEllipse([0,0], C.T, 'k', 'none', 2)
plt.scatter([0],[0], s=40, c='red')
plt.savefig("presentation/pca-pics/pointcloud-2d-step3.pdf")
plt.figure(figsize=fs)
plt.xlim(-4-mu[0],6-mu[0])
plt.ylim(-4-mu[1],6-mu[1])
plt.scatter(X3[:,0], X3[:,1], s=4, alpha=.5)
plotEllipse([0,0], C.T, 'gray', 'none', 2)
plotEllipse([0,0], np.cov(X3, rowvar=False).T, 'k', 'none', 2)
plt.scatter([0],[0], s=40, c='red')
plt.savefig("presentation/pca-pics/pointcloud-2d-step4.pdf")
plt.figure(figsize=fs)
plt.xlim(-4-mu[0],6-mu[0])
plt.ylim(-4-mu[1],6-mu[1])
plt.scatter(X3[:,0], X3[:,1], s=s, alpha=.2)
plotEllipse([0,0], np.cov(X3, rowvar=False).T, 'k', 'none', 2)
plt.scatter([0],[0], s=40, c='red')
for i in xrange(10):
d = np.dot(- X3[i,:], [0,1]) * np.array([0,1])
plt.arrow(X3[i,0], X3[i,1], d[0], d[1], head_width=.1, head_length=.1)
plt.savefig("presentation/pca-pics/pointcloud-2d-step5.pdf")
plt.figure(figsize=fs)
plt.xlim(-4-mu[0],6-mu[0])
plt.ylim(-4-mu[1],6-mu[1])
plt.scatter(X3[:,0], X3[:,1], s=s, alpha=.2)
plotEllipse([0,0], np.cov(X3, rowvar=False).T, 'k', 'none', 2)
plt.scatter([0],[0], s=40, c='red')
for i in xrange(10):
d = np.dot(- X3[i,:], [1,0]) * np.array([1,0])
plt.arrow(X3[i,0], X3[i,1], d[0], d[1], head_width=.1, head_length=.1)
plt.savefig("presentation/pca-pics/pointcloud-2d-step6.pdf")