#!/usr/bin/env python # coding: utf-8 # In[ ]: get_ipython().run_line_magic('matplotlib', 'inline') from matplotlib import pyplot as plt from sklearn.metrics.pairwise import pairwise_distances from sklearn.metrics.pairwise import rbf_kernel from sklearn.decomposition import TruncatedSVD from sklearn.decomposition import RandomizedPCA from sklearn.decomposition import PCA from sklearn.cross_decomposition import CCA from sklearn.preprocessing import StandardScaler from sklearn.linear_model import Ridge from sklearn.svm import LinearSVC from scipy.stats import pearsonr from random import shuffle import cPickle, gzip import numpy as np import time def time_f(f,n): start = time.time() for i in xrange(n): f() return (time.time()-start)/n # download http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz f = gzip.open('mnist.pkl.gz', 'rb') (mnist,labels), _, _ = cPickle.load(f) f.close() # # Monte Carlo integration # In[ ]: def estimate_pi(n = 1000): return np.mean(np.sum(np.power(np.random.rand(int(n),2),2),1) <= 1)*4 # In[ ]: np.random.seed(0) n_arr = np.logspace(1,6,10) p_arr = np.zeros(len(n_arr)) n_reps = 10 for rep in xrange(n_reps): for i in xrange(len(n_arr)): p_arr[i] += np.abs(estimate_pi(n_arr[i])-np.pi)/n_reps # In[ ]: plt.plot(np.log(n_arr), np.log(p_arr)); plt.xlabel('number of samples'); plt.ylabel('absolute error'); # # Truncated SVD # In[ ]: def my_svd(A, k): Omega = np.random.randn(A.shape[1],2*k) Y = np.dot(A,Omega) Q, _ = np.linalg.qr(Y) B = np.dot(Q.T, A) S, Sigma, V = np.linalg.svd(B) U = np.dot(Q,S) U = U[:,0:k] Sigma = np.diag(Sigma[0:k]) V = V[0:k,:] return np.dot(np.dot(U,Sigma),V) t_n = [125,250,500,1000,2000] t_my = np.zeros(len(t_n)) t_random = np.zeros(len(t_n)) t_arpack = np.zeros(len(t_n)) t_random_e = np.zeros(len(t_n)) t_arpack_e = np.zeros(len(t_n)) t_my_e = np.zeros(len(t_n)) for n in xrange(len(t_n)): A = np.dot(np.random.randn(t_n[n],2*t_n[n]/10),np.random.randn(2*t_n[n]/10,t_n[n])) def f(): my_svd(A, t_n[n]/10) t_my[n] = time_f(f,10) t_my_e[n] = np.linalg.norm(my_svd(A, t_n[n]/10)-A)/t_n[n] svd = TruncatedSVD(t_n[n]/10, 'randomized') def f(): svd.fit(A) t_random[n] = time_f(f,10) t_random_e[n] = np.linalg.norm(svd.inverse_transform(svd.transform(A))-A)/t_n[n] svd = TruncatedSVD(t_n[n]/10, 'arpack') def f(): svd.fit(A) t_arpack[n] = time_f(f,10) t_arpack_e[n] = np.linalg.norm(svd.inverse_transform(svd.transform(A))-A)/t_n[n] plt.figure(figsize=(12,4)) plt.subplot(1,2,1) plt.plot(t_n,t_my, label='My beauty'); plt.plot(t_n,t_arpack, label='LAPACK'); plt.plot(t_n,t_random, label='Halko et al.'); plt.xlabel('m') plt.ylabel('running time') plt.legend(); plt.subplot(1,2,2) plt.plot(t_n,t_my_e, label='My beauty'); plt.plot(t_n,t_arpack_e, label='LAPACK'); plt.plot(t_n,t_random_e, label='Halko et al.'); plt.xlabel('m') plt.ylabel('reconstruction error') plt.legend(); # In[ ]: plt.figure(figsize=(12,4)) plt.subplot(1,2,1) plt.plot(t_n,t_my, label='My beauty'); plt.plot(t_n,t_arpack, label='LAPACK'); plt.plot(t_n,t_random, label='Halko et al.'); plt.xlabel('m') plt.ylabel('running time') plt.legend(); plt.subplot(1,2,2) plt.plot(t_n,t_my_e, label='My beauty'); plt.plot(t_n,t_arpack_e, label='LAPACK'); plt.plot(t_n,t_random_e, label='Halko et al.'); plt.xlabel('m') plt.ylabel('reconstruction error') plt.legend(); # # Dimensionality reduction # In[ ]: f = gzip.open('mnist.pkl.gz', 'rb') (mnist,labels), _, _ = cPickle.load(f) f.close() # In[ ]: i = 1 plt.figure(figsize=(10,10)) for k in [10,50,100,200]: plt.subplot(2,2,i) x1 = mnist[np.random.permutation(mnist.shape[0])[0:1000]] x2 = mnist[np.random.permutation(mnist.shape[0])[0:1000]] dx = pairwise_distances(x2,x2).ravel() A = np.random.randn(x1.shape[1],k)*1./np.sqrt(k) Ai = np.linalg.pinv(A, 1e-3) z2 = np.dot(x2,A) xz = np.dot(z2,Ai) dz = pairwise_distances(z2,z2).ravel() pca = PCA(k).fit(x1) p2 = pca.transform(x2) xp = pca.inverse_transform(p2) dp = pairwise_distances(p2,p2).ravel() plt.hist(dz/(dx+1e-6),bins=100, alpha=0.5, normed=True, label='random'); #plt.hist(dp/(dx+1e-6),bins=100, alpha=0.5, normed=True, label='pca'); plt.title(str(k) + ' projections') plt.legend() i = i+1 plt.tight_layout() # ## Beware! There are many ways of being "far away" from something! # In[ ]: k = 10 # ten latent components A = np.random.randn(x1.shape[1],k)*1./np.sqrt(k) Ai = np.linalg.pinv(A, 1e-3) z2 = np.dot(x2,A) xz = np.dot(z2,Ai) dz = pairwise_distances(z2,z2).ravel() pca = PCA(k).fit(x1) p2 = pca.transform(x2) xp = pca.inverse_transform(p2) dp = pairwise_distances(p2,p2).ravel() plt.figure(figsize=(10,10)) for i in xrange(100): plt.subplot(10,10,i+1) plt.imshow(xp[i].reshape(28,28), cmap='Greys_r') plt.axis('off') plt.show() plt.figure(figsize=(10,10)) for i in xrange(100): plt.subplot(10,10,i+1) plt.imshow(xz[i].reshape(28,28), cmap='Greys_r') plt.axis('off') plt.show() # # Kernel methods and random features # In[ ]: np.random.seed(1) def random_features(d,k,gamma): w = np.random.randn(d,k)*np.sqrt(2.*gamma) b = 2.*np.pi*np.random.rand(k) def foo(x): return np.sqrt(2./k)*np.cos(np.dot(x,w)+b) return foo def estimate_gamma(x,n=100): x_sub = x[np.random.permutation(x.shape[0])[0:n]] return 1./np.median(np.power(pairwise_distances(x_sub,x_sub),2).flatten()) x = mnist[np.random.permutation(mnist.shape[0])[0:100]] g = estimate_gamma(x) plt.figure(figsize=(6,6)) plt.subplot(2,2,1) K = rbf_kernel(x,x,g) plt.imshow(K) plt.title('exact kernel') plt.axis('off'); p = 1 for k in [100,1000,10000]: plt.subplot(2,2,p+1) rf = random_features(x.shape[1], k,g) plt.imshow(np.dot(rf(x),rf(x).T)) plt.title(k) plt.axis('off'); p += 1 plt.tight_layout() # ## Does it blend on MNIST? # In[ ]: f = gzip.open('mnist.pkl.gz', 'rb') (x_tr, y_tr), (x_va, y_va), (x_te, y_te) = cPickle.load(f) f.close() x_tr = np.vstack((x_tr,x_va)) y_tr = np.hstack((y_tr,y_va)) rf = random_features(x_tr.shape[1], 2048, estimate_gamma(x_tr)) z_tr = rf(x_tr) z_va = rf(x_va) z_te = rf(x_te) # In[ ]: svm = LinearSVC().fit(z_tr,y_tr) print np.mean(svm.predict(z_te)==y_te) # ## How do we deal with uncertainty? # The predictive distribution of Bayesian linear regression is # \begin{equation*} # p(f | x, X, y) = \mathcal{N} \left( \sigma^{-2}_n x^\top A^{-1} Xy, x^\top A^{-1}x\right), # \end{equation*} # where # \begin{equation*} # A = \sigma^{-2}_n XX^\top + \Sigma_p^{-1}. # \end{equation*} # where # \begin{equation*} # w \sim \mathcal{N}(0,\Sigma_p) # \end{equation*} # In[ ]: def bayesian_linear_regression(X,y,Sigma_p=None,sigma_n=None): if Sigma_p is None: Sigma_p = np.diag(np.repeat(1,X.shape[1])) if sigma_n is None: sigma_n = 1 A = np.power(sigma_n,-2)*np.dot(X.T,X)+np.linalg.inv(Sigma_p) Ai = np.linalg.inv(A) C = np.dot(np.dot(Ai,X.T),y) def mean(x): return np.power(sigma_n,-2)*np.dot(x,C) def variance(x): return 2*np.sqrt(np.diag(np.dot(np.dot(x,Ai),x.T))[:,np.newaxis]) return (mean, variance) def f(x): return np.sin(5*x)+np.random.randn(x.shape[0],1)*0.1 def estimate_gamma(x,n=100): x_sub = x[np.random.permutation(x.shape[0])[0:n]] return 1./np.median(np.power(pairwise_distances(x_sub,x_sub),2).flatten()) np.random.seed(0) N = 1000 K = 1000 x_tr = np.random.randn(N,1) y_tr = f(x_tr) s_x = StandardScaler().fit(x_tr) s_y = StandardScaler().fit(y_tr) x_tr = s_x.transform(x_tr) y_tr = s_y.transform(y_tr) rf = random_features(1,K,estimate_gamma(x_tr)) z_tr = rf(x_tr) s_z = StandardScaler().fit(z_tr) z_tr = s_z.transform(z_tr) (mean, variance) = bayesian_linear_regression(z_tr,y_tr,None,1) x_te = 0.75*np.sort(np.random.randn(N,1),0) #x_te = 1.5*np.sort(np.random.randn(N,1),0) y_te = f(x_te) x_te = s_x.transform(x_te) y_te = s_y.transform(y_te) z_te = rf(x_te) z_te = s_z.transform(z_te) plt.plot(x_te,y_te,'.', label='test data') plt.plot(x_tr,y_tr,'.',color='red', label='training data') plt.plot(x_te,mean(z_te),lw=3, label='prediction') plt.fill(np.concatenate([x_te, x_te[::-1]]), np.concatenate([mean(z_te) - 1.9600 * np.sqrt(variance(z_te)), (mean(z_te) + 1.9600 * np.sqrt(variance(z_te)))[::-1]]), alpha=.25, fc='g', ec='None', label='95% confidence interval') plt.xlim(np.min(x_te),np.max(x_te)); plt.legend(loc='best') plt.title(np.power(y_te-mean(z_te),2).mean()) # # Nonlinear Component analysis # In[ ]: f = gzip.open('mnist.pkl.gz', 'rb') (x_tr, y_tr), (x_va, y_va), (x_te, y_te) = cPickle.load(f) f.close() x_tr = np.vstack((x_tr,x_va)) y_tr = np.hstack((y_tr,y_va)) rf = random_features(x_tr.shape[1], 2048, estimate_gamma(x_tr)) z_tr = rf(x_tr) z_va = rf(x_va) z_te = rf(x_te) xz_tr = z_tr xz_te = z_te pca = RandomizedPCA(10).fit(z_tr) p_tr = pca.transform(z_tr) p_te = pca.transform(z_te) rf = random_features(p_tr.shape[1], 2048, estimate_gamma(p_tr)) pz_tr = rf(p_tr) pz_te = rf(p_te) ridge = Ridge().fit(pz_tr, x_tr) rec_tr = ridge.predict(pz_tr) rec_te = ridge.predict(pz_te) # In[ ]: plt.figure(figsize=(10,10)) for i in xrange(100): plt.subplot(10,10,i+1) plt.imshow(rec_tr[i].reshape(28,28), cmap='Greys_r') plt.axis('off') plt.show() plt.figure(figsize=(10,10)) for i in xrange(100): plt.subplot(10,10,i+1) plt.imshow(rec_te[i].reshape(28,28), cmap='Greys_r') plt.axis('off') plt.show() # # Dependence measurament # In[ ]: def random_features(d,k,gamma): w = np.random.randn(d,k)*np.sqrt(2.*gamma) b = 2.*np.pi*np.random.rand(k) def foo(x): return np.sqrt(2./k)*np.cos(np.dot(x,w)+b) return foo def experiment_sin(n=1000,k=100,freq=3,noise=.1): x = np.random.randn(n,1) y = np.sin(freq*x)+noise*np.random.randn(n,1) #t = 2*np.pi*np.random.rand(n,1) #x = np.cos(freq*t)+noise*np.random.randn(n,1) #y = np.sin(freq*t)+noise*np.random.randn(n,1) k = k rf_x = random_features(x.shape[1],k,estimate_gamma(x)) z_x = rf_x(x) rf_y = random_features(y.shape[1],k,estimate_gamma(y)) z_y = rf_x(y) f, g = CCA(n_components=1).fit_transform(z_x,z_y) return x, y, f, g # In[ ]: x, y, f, g = experiment_sin() plt.figure(figsize=(8,8)) plt.subplot(2,2,1) plt.plot(x,y,'.') plt.title('Correlation: ' + str(pearsonr(x,y)[0][0])) plt.subplot(2,2,2) plt.plot(f,g,'.') plt.title('Correlation: ' + str(pearsonr(f,g)[0][0])) plt.subplot(2,2,3) plt.plot(x,f,'.') plt.title('x-transformation') plt.xlabel('x') plt.ylabel('f') plt.subplot(2,2,4) plt.plot(y,g,'.') plt.title('y-transformation') plt.xlabel('y') plt.ylabel('g') plt.tight_layout() # In[ ]: freqs = [1,2,3,4,5,6,7,8,9,10] corr_freqs = np.zeros(len(freqs)) for i in xrange(10): for fi in xrange(len(freqs)): x, y, f, g = experiment_sin(freq=freqs[fi]) corr_freqs[fi] += pearsonr(f,g)[0][0] plt.plot(freqs, corr_freqs) plt.title('Degradation with respect to frequency') # # Low-dimensional kernel mean embeddings # ## Two-sample testing # In[ ]: f = gzip.open('mnist.pkl.gz', 'rb') (x_tr,y_tr), (x_va,y_va), (x_te,y_te) = cPickle.load(f) f.close() x5_tr = x_tr[np.where(y_tr==5)] x8_tr = x_tr[np.where(y_tr==8)] x5_va = x_va[np.where(y_va==5)] x8_va = x_va[np.where(y_va==8)] x5_te = x_te[np.where(y_te==5)] x8_te = x_te[np.where(y_te==8)] gamma = estimate_gamma(x5_tr, 100) rf = random_features(x5_tr.shape[1],100,gamma) def mmd(x,y,gamma): return rbf_kernel(x,gamma=gamma).mean()-\ 2*rbf_kernel(x,y,gamma=gamma).mean()+\ rbf_kernel(y,gamma=gamma).mean() def rmmd(x,y,rf): return np.power(np.linalg.norm(rf(x).mean(0)-rf(y).mean(0)),2) # In[ ]: print mmd(x5_tr,x8_tr,gamma) print rmmd(x5_tr,x8_tr,rf) # In[ ]: print mmd(x5_tr,x5_va,gamma) print rmmd(x5_tr,x5_va,rf) # In[ ]: def f(): mmd(x5_tr,x5_va,gamma) print time_f(f,10) def f(): rmmd(x5_tr,x5_va,rf) print time_f(f,10) # ## Distribution classification # In[ ]: from sklearn.ensemble import RandomForestClassifier as RFC from scipy.interpolate import UnivariateSpline as sp from sklearn.preprocessing import scale from sklearn.mixture import GMM def continuous_cause(n,k,p1,p2): g = GMM(k) g.means_ = p1*np.random.randn(k,1) g.covars_ = np.abs(p2*np.random.randn(k,1)+1) g.weights_ = abs(np.random.rand(k,1)) g.weights_ = g.weights_/sum(g.weights_) return scale(g.sample(n)) def noise(n,v): return v*np.random.randn(n,1) def mechanism(x,d): g = np.linspace(min(x)-np.std(x),max(x)+np.std(x),d) return sp(g,np.random.randn(d))(x.flatten())[:,np.newaxis] def pair(n,k,p1,p2,v,d): x = continuous_cause(n,k,p1,p2) y = scale(scale(mechanism(x,d))+noise(n,v)) return (x,y) def synset(N): res_x_fw = []; res_x_bw = [] for i in range(N): n = np.int(np.power(10,np.random.uniform(2,4))) k = np.random.randint(1,5) p1 = np.random.uniform(0,5) p2 = np.random.uniform(0,5) v = np.random.uniform(0,5) d = np.random.randint(4,5) (x,y) = pair(n,k,p1,p2,v,d) res_x_fw += [np.hstack((x,y))] res_x_bw += [np.hstack((y,x))] return (res_x_fw + res_x_bw,np.hstack((np.zeros(N),np.ones(N)))) def kdist(args): x,w = args fx = np.dot(x[:,0][:,np.newaxis],w[0,:][np.newaxis,:]) fy = np.dot(x[:,1][:,np.newaxis],w[1,:][np.newaxis,:]) return np.hstack((np.cos(fx).mean(0),np.cos(fy).mean(0),np.cos(fx+fy).mean(0))) def fdist(x,w): return np.array([kdist((scale(xi),w)) for xi in x]) def gen_random_parameter(M=100, G=[0.1,1,10]): return np.vstack([np.sqrt(g)*np.random.randn(M,2) for g in G]).T def gen_dataset(w,N=100): x,y = synset(N) z = fdist(x,w) p = np.random.permutation(2*N) x2 = [] for i in xrange(len(p)): x2.append(x[p[i]]) return x2,y[p],z[p] # In[ ]: w = gen_random_parameter() x,y,z = gen_dataset(w) plt.figure(figsize=(8,8)) p = np.random.permutation(len(x)) for i in xrange(25): plt.subplot(5,5,i+1) plt.plot(x[p[i]][:,0],x[p[i]][:,1],'.') plt.title(y[p[i]]) plt.axis('off') plt.tight_layout() # In[ ]: _,y_tr,z_tr = gen_dataset(w, N=1000) _,y_te,z_te = gen_dataset(w, N=1000) # In[ ]: from sklearn.ensemble import RandomForestClassifier print np.mean(RandomForestClassifier(n_estimators=500).fit(z_tr,y_tr).predict(z_te)==y_te)