import matplotlib.pyplot as plt import os import numpy as np def simpleaxis(ax): #make adjustements to figures ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.get_xaxis().tick_bottom() ax.get_yaxis().tick_left() ax.set_xlabel('x',fontsize=fs) ax.set_ylabel('y',fontsize=fs) ax.set_xticks((0,1,2,3)) ax.set_yticks((0,1,2,3)) ax.set_xticklabels((' 0',' 1',' 2', '3'),fontsize=fs) ax.set_yticklabels((' 0',' 1',' 2', '3'),fontsize=fs) #short-hand notation of magnitude function (norm) on a vector. mag = lambda x: math.sqrt(sum(i**2 for i in x)) fs = 15 fw = 'bold' resdir = '/Users/cplanting/Projects/Courses/Math BCN/presentation' a = np.array([2,3]) figure1 = plt.figure(1,figsize=[5,5]) ax1 = figure1.add_subplot(111,autoscale_on=False, xlim=[0,4], ylim=[0,4]) plt.annotate( '', xy=(0,0), xycoords = 'data', xytext = a, textcoords = 'data', arrowprops = dict(facecolor='red', arrowstyle='<-', lw=3., color='red')) plt.annotate( 'a', xy=(1, 1.7), xycoords = 'data', xytext = (0, 10), textcoords = 'offset points', fontsize=fs,fontweight=fw) simpleaxis(ax1) plt.tight_layout() figure1.savefig(os.path.join(resdir,'figure1.png')) figure1 = plt.figure(1,figsize=[5,5]) ax1 = figure1.add_subplot(111,autoscale_on=False, xlim=[0,4], ylim=[0,4]) v = np.array([1,3]) w = np.array([2,1]) #draw v plt.annotate( '', xy=(0,0), xycoords = 'data', xytext = v, textcoords = 'data', arrowprops = dict(facecolor='red', arrowstyle='<-', lw=3., color='red')) plt.annotate( 'v', xy=(0.45, 1.8), xycoords = 'data', xytext = (0, 10), textcoords = 'offset points', fontsize=fs,fontweight=fw) #draw w plt.annotate( '', xy=(0,0), xycoords = 'data', xytext = w, textcoords = 'data', arrowprops = dict(facecolor='blue', arrowstyle='<-', lw=3., color='blue')) plt.annotate( 'w', xy=(1, 0.7), xycoords = 'data', xytext = (0, 10), textcoords = 'offset points', fontsize=fs,fontweight=fw) simpleaxis(ax1) plt.tight_layout() figure1.savefig(os.path.join(resdir,'figure2a.png')) figure1 = plt.figure(1,figsize=[5,5]) ax1 = figure1.add_subplot(111,autoscale_on=False, xlim=[0,4], ylim=[0,4]) #draw v plt.annotate( '', xy=(0,0), xycoords = 'data', xytext = v, textcoords = 'data', arrowprops = dict(facecolor='red', arrowstyle='<-', lw=3., color='red')) plt.annotate( 'v', xy=(0.45, 1.8), xycoords = 'data', xytext = (0, 10), textcoords = 'offset points', fontsize=fs,fontweight=fw) #draw w plt.annotate( '', xy=(0,0), xycoords = 'data', xytext = w, textcoords = 'data', arrowprops = dict(facecolor='blue', arrowstyle='<-', lw=3., color='blue')) plt.annotate( 'w', xy=(1, 0.7), xycoords = 'data', xytext = (0, 10), textcoords = 'offset points', fontsize=fs,fontweight=fw) #calculate v+w vw = v + w print vw #draw help-lines and ... plt.annotate( '', xy=(1,3), xycoords = 'data', xytext = vw, textcoords = 'data', arrowprops = dict(facecolor='k', arrowstyle='<-', lw=0.5, color='k')) plt.annotate( '', xy=(2,1), xycoords = 'data', xytext = vw, textcoords = 'data', arrowprops = dict(facecolor='k', arrowstyle='<-', lw=0.5, color='k')) #... draw 'vw' plt.annotate( 'v+w', xy=(1.5, 2.75), xycoords = 'data', xytext = (0, 10), textcoords = 'offset points', fontsize=fs,fontweight=fw) plt.annotate( '', xy=(0,0), xycoords = 'data', xytext = vw, textcoords = 'data', arrowprops = dict(facecolor='green', arrowstyle='<-', lw=3., color='green')) simpleaxis(ax1) plt.tight_layout() figure1.savefig(os.path.join(resdir,'figure2b.png')) figure1 = plt.figure(1,figsize=[5,5]) ax1 = figure1.add_subplot(111,autoscale_on=False, xlim=[0,4], ylim=[0,4]) v1 = np.array([0.5,1]) plt.annotate( '', xy=(0,0), xycoords = 'data', xytext = v1, textcoords = 'data', arrowprops = dict(facecolor='red', arrowstyle='<-', lw=3., color='red')) plt.annotate( 'v1', xy=(1, 1), xycoords = 'data', xytext = (0, 10), textcoords = 'offset points', fontsize=fs,fontweight=fw) simpleaxis(ax1) plt.tight_layout() figure1.savefig(os.path.join(resdir,'figure3a.png')) figure1 = plt.figure(1,figsize=[5,5]) ax1 = figure1.add_subplot(111,autoscale_on=False, xlim=[0,4], ylim=[0,4]) v1= np.array([0.5,1]) v13 = 3*v1 plt.annotate( '', xy=(0,0), xycoords = 'data', xytext = v1, textcoords = 'data', arrowprops = dict(facecolor='red', arrowstyle='<-', lw=3., color='red')) plt.annotate( 'v1', xy=(1, 1), xycoords = 'data', xytext = (0, 10), textcoords = 'offset points', fontsize=fs,fontweight=fw,color='r') plt.annotate( '', xy=(0,0), xycoords = 'data', xytext = v13, textcoords = 'data', arrowprops = dict(facecolor='green', arrowstyle='<-', lw=2., color='green')) plt.annotate( '3v1', xy=(1.5, 2), xycoords = 'data', xytext = (0, 10), textcoords = 'offset points', fontsize=fs,fontweight=fw,color='green') simpleaxis(ax1) plt.tight_layout() figure1.savefig(os.path.join(resdir,'figure3b.png')) figure1 = plt.figure(1,figsize=[5,5]) ax1 = figure1.add_subplot(111,autoscale_on=False, xlim=[0,1.5], ylim=[0,3.5]) v = np.array([1,3]) w= np.array([1,1]) #draw v plt.annotate( '', xy=(0,0), xycoords = 'data', xytext = v, textcoords = 'data',arrowprops = dict(facecolor='red', arrowstyle='<-', lw=3., color='red')) plt.annotate( 'v', xy=(0.45, 1.8), xycoords = 'data', xytext = (0, 10), textcoords = 'offset points', fontsize=fs,fontweight=fw) #draw w plt.annotate( '', xy=(0,0), xycoords = 'data', xytext = w, textcoords = 'data', arrowprops = dict(facecolor='blue', arrowstyle='<-', lw=3., color='blue')) plt.annotate( 'w', xy=(1, 0.7), xycoords = 'data', xytext = (0, 10), textcoords = 'offset points', fontsize=fs,fontweight=fw) simpleaxis(ax1) plt.tight_layout() figure1.savefig(os.path.join(resdir,'figure4a.png')) v = np.array([1,3]) w = np.array([1,1]) # calculate the projection p of w on v: p = v * (np.dot(v,w))/(mag(v)**2) print p figure1 = plt.figure(1,figsize=[5,5]) ax1 = figure1.add_subplot(111,autoscale_on=False, xlim=[0,1.5], ylim=[0,3.5]) #draw v plt.annotate( '', xy=(0,0), xycoords = 'data', xytext = v, textcoords = 'data', arrowprops = dict(facecolor='red', arrowstyle='<-', lw=3., color='red')) plt.annotate( 'v', xy=(0.45, 1.8), xycoords = 'data', xytext = (0, 10), textcoords = 'offset points', fontsize=fs,fontweight=fw) #draw w plt.annotate( '', xy=(0,0), xycoords = 'data', xytext = w, textcoords = 'data', arrowprops = dict(facecolor='blue', arrowstyle='<-', lw=3., color='blue')) plt.annotate( 'w', xy=(1, 0.7), xycoords = 'data', xytext = (0, 10), textcoords = 'offset points', fontsize=fs,fontweight=fw) #draw p plt.annotate( '', xy=(0,0), xycoords = 'data', xytext = p, textcoords = 'data', arrowprops = dict(facecolor='green', arrowstyle='<-', lw=3., color='green')) #draw line from p to w plt.plot([p[0],w[0]],[p[1],w[1]],'--',color='k') plt.annotate( 'p', xy=(0.1, 0.8), xycoords = 'data', xytext = (0, 10), textcoords = 'offset points', fontsize=fs,fontweight=fw) simpleaxis(ax1) plt.tight_layout() figure1.savefig(os.path.join(resdir,'figure4b.png')) #[source: http://nbviewer.ipython.org/github/mwaskom/Psych216/blob/master/vector_dot.ipynb] import seaborn seaborn.set() colors = seaborn.color_palette() #import utils from mpl_toolkits.mplot3d import Axes3D # First create two random vectors in 3 dimensional space v1 = rand(3, 1) v2 = rand(3, 1) # And scale them to unit length v1 = v1 / norm(v1) v2 = v2 / norm(v2) # Plot the vectors o = zeros(3) # origin # We'll use the object oriented plotting interface f = figure(figsize=(8, 8)) ax = f.add_subplot(111, projection="3d", axisbg="white") ax.plot(*[[o[i], v1[i]] for i in range(3)], linewidth=3, label="vector1") ax.plot(*[[o[i], v2[i]] for i in range(3)], linewidth=3, label="vector2") for axisl in ["x", "y", "z"]: getattr(ax, "set_%slabel" % axisl)(axisl) # Here's a fun trick legend(); f = figure(figsize=(8, 8)) ax = f.add_subplot(111, projection="3d", axisbg="white") ax.plot(*[[o[i], 2*v1[i]] for i in range(3)], linewidth=3, label="vector1") ax.plot(*[[o[i], 2*v2[i]] for i in range(3)], linewidth=3, label="vector2") for axisl in ["x", "y", "z"]: getattr(ax, "set_%slabel" % axisl)(axisl) # Here's a fun trick legend() for i in range(100): # generate a point that is a weighted sum of the 2 vectors w1 = randn(1) w2 = randn(1) point = w1 * v1 + w2 * v2 ax.plot(*point, marker=".", color="k") # We can find a vector that is orthogonal to the plane defined by v1 and v2 # by taking the vector cross product. See the wikipedia page for a # definition of cross product v3 = cross(v1.reshape(1, 3), v2.reshape(1, 3)).squeeze() # Must be right shape for cross() ax.plot(*[[o[i], 2*v3[i]] for i in range(3)], linewidth=3, label="orthogonal vector") legend(); #Since we're using unit vectors, the angle theta becomes simply a fnction of the inner product of the two vectors theta = arccos(dot(v2.T, v1)).squeeze() # and radians can be converted to degrees theta_deg = theta * (180 / pi) print theta, theta_deg age = 20*np.random.random_sample(size=100) #age between 0 and 20 y #impose correlation shoe = 10 + 1*age + 20*np.random.random(size=100) fs = 13 figure1 = plt.figure(1,figsize=[6,4]) ax1 = figure1.add_subplot(111,autoscale_on=False, xlim=[0,20], ylim=[0,50]) plt.plot(age,shoe, 'bo') ax1.spines['top'].set_visible(False) ax1.spines['right'].set_visible(False) ax1.get_xaxis().tick_bottom() ax1.get_yaxis().tick_left() ax1.set_xlabel('age',fontsize=fs) ax1.set_ylabel('shoe-size',fontsize=fs) age_demean = pylab.demean(age, axis=0) shoe_demean = pylab.demean(shoe, axis=0) figure1 = plt.figure(1,figsize=[6,4]) ax1 = figure1.add_subplot(111,autoscale_on=False, xlim=[-10,10], ylim=[-25,25]) plt.plot(age_demean,shoe_demean, 'ro') ax1.spines['top'].set_visible(False) ax1.spines['right'].set_visible(False) ax1.get_xaxis().tick_bottom() ax1.get_yaxis().tick_left() ax1.set_xlabel('age (demeaned)',fontsize=fs) ax1.set_ylabel('shoe-size (demeaned)',fontsize=fs) #inner product of age and shoe: c1 = np.dot(age_demean,shoe_demean) #respective norms (function mag) of the two vectors: age_norm = mag(age_demean) shoe_norm = mag(shoe_demean) #compare them: print 'standard deviation (np.std) : ' + str(np.std(age_demean)) print 'scaled norm of vector : ' + str(age_norm/sqrt(len(age))) print 'covariance (np.cov) : ' + str(np.cov(age_demean,shoe_demean)[0,1]) print 'scaled inner product : ' + str(c1/(len(age)-1)) #calculate the correlation by taking the inner product and dividing it by the norm of x multiplied by the norm of y. #also, calculate based on np.corrcoeff. corr = c1/(age_norm * shoe_norm) print 'correlation (np.corrcoeff) : ' + str(np.corrcoef(age_demean,shoe_demean)[0,1]) print 'correlation based on inproduct: ' + str(corr) matrix = np.vstack([age,shoe]).T figure1 = plt.figure(1,figsize=[4,8]) ax1 = figure1.add_subplot(211,autoscale_on=False, xlim=[0,1], ylim=[0,100]) ax1.matshow(matrix, cmap=cm.gray,interpolation='nearest') ax1.set_aspect('auto') cov_matrix = np.cov(matrix.T) ax2 = figure1.add_subplot(212,autoscale_on=False, xlim=[0,1], ylim=[0,1]) ax2.imshow(cov_matrix, cmap=cm.gray,interpolation='nearest') print cov_matrix #try to make vectors orthogonal: from scipy.linalg import sqrtm, inv # source: http://en.wikipedia.org/wiki/Orthogonal_matrix#Nearest_orthogonal_matrix def sym(w): return w.dot(inv(sqrtm(w.T.dot(w)))) matrix_orth = sym(matrix) #print matrix.shape figure1 = plt.figure(1,figsize=[4,8]) ax1 = figure1.add_subplot(121,autoscale_on=False, xlim=[0,1], ylim=[0,100]) ax1.matshow(matrix, cmap=cm.gray,interpolation='nearest') ax1.set_aspect('auto') ax2 = figure1.add_subplot(122,autoscale_on=False, xlim=[0,1], ylim=[0,100]) ax2.matshow(matrix_orth, cmap=cm.gray,interpolation='nearest') ax2.set_aspect('auto') print 'original inner prod : ' + str(np.dot(matrix[:,0],matrix[:,1])) print 'inner prod after orthogonalisation : ' + str(np.dot(matrix_orth[:,0],matrix_orth[:,1])) print 'correlation original : ' + str(np.corrcoef(matrix[:,0],matrix[:,1])[0,1]) print 'after orthogonalisation : ' + str(np.corrcoef(matrix_orth[:,0],matrix_orth[:,1])[0,1]) figure1 = plt.figure(1,figsize=[8,4]) ax1 = figure1.add_subplot(121,autoscale_on=True) plt.plot(matrix[:,0],matrix[:,1], 'ro') ax1.spines['top'].set_visible(False) ax1.spines['right'].set_visible(False) ax1.get_xaxis().tick_bottom() ax1.get_yaxis().tick_left() ax1.set_xlabel('age (demeaned)',fontsize=fs) ax1.set_ylabel('shoe-size (demeaned)',fontsize=fs) ax2 = figure1.add_subplot(122,autoscale_on=True) plt.plot(matrix_orth[:,0],matrix_orth[:,1], 'ro') ax2.spines['top'].set_visible(False) ax2.spines['right'].set_visible(False) ax2.get_xaxis().tick_bottom() ax2.get_yaxis().tick_left() ax2.set_xlabel('age (demeaned)',fontsize=fs) ax2.set_ylabel('shoe-size (demeaned)',fontsize=fs) #[source: http://nbviewer.ipython.org/url/atwallab.cshl.edu/teaching/PCA.ipynb] N=500 # number of datapoints sx=8 # standard deviation in the x direction sy=3 # standard deviation in the y direction x=np.random.normal(0,sx,N) # random samples from gaussian in x-direction y=np.random.normal(0,sy,N) # random samples from gaussian in y-direction axs=max(abs(x))*1.2 # axes plotting range t=linspace(-axs,axs,1000) # range of mesh points for contour plot cx,cy=np.meshgrid(t,t) # mesh array z=(1/(2*pi*sx*sy))*exp(-((cx*cx)/(2*sx*sx))-((cy*cy)/(2*sy*sy))) # actual 2d gaussian plt.figure(figsize=(10,5)) plt.xlim([-axs,axs]) plt.ylim([-axs,axs]) cm = plt.cm.RdBu # coloring scheme for contour plots plt.subplot(1,2,1) contour(t,t,z,linspace(0.0000001,1/(2*pi*sx*sy),20),cmap=cm) plt.grid() plt.title('contour map of 2d gaussian distribution\n $\sigma_x=$ %d, $\sigma_y=$ %d' % (sx,sy),fontsize=12) plt.xlabel('x',fontsize=12) plt.ylabel('y',fontsize=12) plt.subplot(1,2,2) contour(t,t,z,linspace(0.0000001,1/(2*pi*sx*sy),20),cmap=cm) plt.plot(x,y,'bo',markersize=5) plt.grid() plt.title('%d random samples from the 2d gaussian\n distribution' % N, fontsize=12) plt.xlabel('x',fontsize=12) plt.ylabel('y',fontsize=12) plt.show() degrees=30 # theta degrees theta=2*pi*degrees/360 # convert degrees to radians M=array([[cos(theta),-sin(theta)],[sin(theta),cos(theta)]]) #rotation matrix represented as 2d array print 'rotation matrix, \n M=', print M xyp=dot(M,vstack([x,y])).T # matrix multiplication xd=xyp[:,0] # new x-coordinates yd=xyp[:,1] # new y-coordinates plt.figure(figsize=(5,5)) plt.plot(xd,yd,'bo',markersize=5) plt.xlim([-axs,axs]) plt.ylim([-axs,axs]) plt.grid() plt.title('%d random samples from 2d gaussian distribution \n rotated by $\Theta$= %d degrees anticlockwise' % (N,degrees)) plt.xlabel('x') plt.ylabel('y') plt.show() cov(xyp.T) # covariance corrcoef(xyp.T) # correlation w,v=eig(cov(xyp.T)) w v v.shape ah=0.1 # size of arrow head f=1.1 # axes range plt.figure(figsize=(5,5)) plt.subplot(111,aspect='equal') plt.arrow(0,0,v[0,0],v[1,0],color='r',linewidth=2,head_width=ah,head_length=ah) plt.arrow(0,0,v[0,1],v[1,1],color='r',linewidth=2,head_width=ah,head_length=ah) plt.text(f*v[0,0],f*v[1,0],r'Eigenvector 1, $\vec{v_1}$ = %.2f $\vec{x}$ + %.2f $\vec{y}$' % (v[0,0],v[1,0]), fontsize=15) plt.text(f*v[0,1],f*v[1,1],r'Eigenvector 2, $\vec{v_1}$ = %.2f $\vec{x}$ + %.2f $\vec{y}$' % (v[0,1],v[1,1]), fontsize=15) plt.plot() plt.xlim([-f,f]) plt.ylim([-f,f]) plt.xlabel('x',fontsize=15) plt.ylabel('y',fontsize=15) plt.grid() plt.show() plt.figure(figsize=(5,5)) plt.plot(xd,yd,'bo',markersize=5,zorder=1,) plt.axis('equal') plt.xlim([-axs,axs]) plt.ylim([-axs,axs]) plt.grid() plt.title('Principal Components (eigenvectors) of random data', fontsize=12) plt.xlabel('x', fontsize=12) plt.ylabel('y', fontsize=12) plt.arrow(0,0,v[0,0]*sqrt(w[0]),v[1,0]*sqrt(w[0]),color='r',linewidth=2,head_width=1,head_length=1) plt.arrow(0,0,v[0,1]*sqrt(w[1]),v[1,1]*sqrt(w[1]),color='r',linewidth=2,head_width=1,head_length=1) plt.show() var_exp=100*w/sum(w) plt.figure(figsize=(10,5)) plt.subplot(1,2,1) plt.bar(np.arange(len(w)),w,width=0.6,color='r') plt.ylabel('Eigenvalue',fontsize=12) plt.xticks(np.arange(len(w))+0.3,np.arange(len(w))+1) plt.xlabel('Principal Component',fontsize=12) plt.subplot(1,2,2) plt.bar(np.arange(len(w)),var_exp,width=0.6,color='g') plt.ylabel('Variance explained (%)',fontsize=12) plt.xticks(np.arange(len(w))+0.3,np.arange(len(w))+1) plt.xlabel('Principal Component',fontsize=12) plt.show() a=dot(inv(v),xyp.T) plt.figure(figsize=(5,5)) plt.axis('equal') plt.grid() plot(a[0],a[1],'bo',markersize=5) plt.xlim([-axs,axs]) plt.ylim([-axs,axs]) plt.xlabel('Principal Component 1',fontsize=15) plt.ylabel('Principal Component 2',fontsize=15) plt.show() cov(a)