%matplotlib inline
from matplotlib import pyplot as plt
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import RandomizedPCA
from sklearn.decomposition import PCA
from sklearn.cross_decomposition import CCA
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.svm import LinearSVC
from scipy.stats import pearsonr
from random import shuffle
import cPickle, gzip
import numpy as np
import time
def time_f(f,n):
start = time.time()
for i in xrange(n):
f()
return (time.time()-start)/n
# download http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
f = gzip.open('mnist.pkl.gz', 'rb')
(mnist,labels), _, _ = cPickle.load(f)
f.close()
def estimate_pi(n = 1000):
return np.mean(np.sum(np.power(np.random.rand(int(n),2),2),1) <= 1)*4
np.random.seed(0)
n_arr = np.logspace(1,6,10)
p_arr = np.zeros(len(n_arr))
n_reps = 10
for rep in xrange(n_reps):
for i in xrange(len(n_arr)):
p_arr[i] += np.abs(estimate_pi(n_arr[i])-np.pi)/n_reps
plt.plot(np.log(n_arr), np.log(p_arr));
plt.xlabel('number of samples');
plt.ylabel('absolute error');
def my_svd(A, k):
Omega = np.random.randn(A.shape[1],2*k)
Y = np.dot(A,Omega)
Q, _ = np.linalg.qr(Y)
B = np.dot(Q.T, A)
S, Sigma, V = np.linalg.svd(B)
U = np.dot(Q,S)
U = U[:,0:k]
Sigma = np.diag(Sigma[0:k])
V = V[0:k,:]
return np.dot(np.dot(U,Sigma),V)
t_n = [125,250,500,1000,2000]
t_my = np.zeros(len(t_n))
t_random = np.zeros(len(t_n))
t_arpack = np.zeros(len(t_n))
t_random_e = np.zeros(len(t_n))
t_arpack_e = np.zeros(len(t_n))
t_my_e = np.zeros(len(t_n))
for n in xrange(len(t_n)):
A = np.dot(np.random.randn(t_n[n],2*t_n[n]/10),np.random.randn(2*t_n[n]/10,t_n[n]))
def f(): my_svd(A, t_n[n]/10)
t_my[n] = time_f(f,10)
t_my_e[n] = np.linalg.norm(my_svd(A, t_n[n]/10)-A)/t_n[n]
svd = TruncatedSVD(t_n[n]/10, 'randomized')
def f(): svd.fit(A)
t_random[n] = time_f(f,10)
t_random_e[n] = np.linalg.norm(svd.inverse_transform(svd.transform(A))-A)/t_n[n]
svd = TruncatedSVD(t_n[n]/10, 'arpack')
def f(): svd.fit(A)
t_arpack[n] = time_f(f,10)
t_arpack_e[n] = np.linalg.norm(svd.inverse_transform(svd.transform(A))-A)/t_n[n]
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(t_n,t_my, label='My beauty');
plt.plot(t_n,t_arpack, label='LAPACK');
plt.plot(t_n,t_random, label='Halko et al.');
plt.xlabel('m')
plt.ylabel('running time')
plt.legend();
plt.subplot(1,2,2)
plt.plot(t_n,t_my_e, label='My beauty');
plt.plot(t_n,t_arpack_e, label='LAPACK');
plt.plot(t_n,t_random_e, label='Halko et al.');
plt.xlabel('m')
plt.ylabel('reconstruction error')
plt.legend();
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(t_n,t_my, label='My beauty');
plt.plot(t_n,t_arpack, label='LAPACK');
plt.plot(t_n,t_random, label='Halko et al.');
plt.xlabel('m')
plt.ylabel('running time')
plt.legend();
plt.subplot(1,2,2)
plt.plot(t_n,t_my_e, label='My beauty');
plt.plot(t_n,t_arpack_e, label='LAPACK');
plt.plot(t_n,t_random_e, label='Halko et al.');
plt.xlabel('m')
plt.ylabel('reconstruction error')
plt.legend();
f = gzip.open('mnist.pkl.gz', 'rb')
(mnist,labels), _, _ = cPickle.load(f)
f.close()
i = 1
plt.figure(figsize=(10,10))
for k in [10,50,100,200]:
plt.subplot(2,2,i)
x1 = mnist[np.random.permutation(mnist.shape[0])[0:1000]]
x2 = mnist[np.random.permutation(mnist.shape[0])[0:1000]]
dx = pairwise_distances(x2,x2).ravel()
A = np.random.randn(x1.shape[1],k)*1./np.sqrt(k)
Ai = np.linalg.pinv(A, 1e-3)
z2 = np.dot(x2,A)
xz = np.dot(z2,Ai)
dz = pairwise_distances(z2,z2).ravel()
pca = PCA(k).fit(x1)
p2 = pca.transform(x2)
xp = pca.inverse_transform(p2)
dp = pairwise_distances(p2,p2).ravel()
plt.hist(dz/(dx+1e-6),bins=100, alpha=0.5, normed=True, label='random');
#plt.hist(dp/(dx+1e-6),bins=100, alpha=0.5, normed=True, label='pca');
plt.title(str(k) + ' projections')
plt.legend()
i = i+1
plt.tight_layout()
k = 10 # ten latent components
A = np.random.randn(x1.shape[1],k)*1./np.sqrt(k)
Ai = np.linalg.pinv(A, 1e-3)
z2 = np.dot(x2,A)
xz = np.dot(z2,Ai)
dz = pairwise_distances(z2,z2).ravel()
pca = PCA(k).fit(x1)
p2 = pca.transform(x2)
xp = pca.inverse_transform(p2)
dp = pairwise_distances(p2,p2).ravel()
plt.figure(figsize=(10,10))
for i in xrange(100):
plt.subplot(10,10,i+1)
plt.imshow(xp[i].reshape(28,28), cmap='Greys_r')
plt.axis('off')
plt.show()
plt.figure(figsize=(10,10))
for i in xrange(100):
plt.subplot(10,10,i+1)
plt.imshow(xz[i].reshape(28,28), cmap='Greys_r')
plt.axis('off')
plt.show()
np.random.seed(1)
def random_features(d,k,gamma):
w = np.random.randn(d,k)*np.sqrt(2.*gamma)
b = 2.*np.pi*np.random.rand(k)
def foo(x):
return np.sqrt(2./k)*np.cos(np.dot(x,w)+b)
return foo
def estimate_gamma(x,n=100):
x_sub = x[np.random.permutation(x.shape[0])[0:n]]
return 1./np.median(np.power(pairwise_distances(x_sub,x_sub),2).flatten())
x = mnist[np.random.permutation(mnist.shape[0])[0:100]]
g = estimate_gamma(x)
plt.figure(figsize=(6,6))
plt.subplot(2,2,1)
K = rbf_kernel(x,x,g)
plt.imshow(K)
plt.title('exact kernel')
plt.axis('off');
p = 1
for k in [100,1000,10000]:
plt.subplot(2,2,p+1)
rf = random_features(x.shape[1], k,g)
plt.imshow(np.dot(rf(x),rf(x).T))
plt.title(k)
plt.axis('off');
p += 1
plt.tight_layout()
f = gzip.open('mnist.pkl.gz', 'rb')
(x_tr, y_tr), (x_va, y_va), (x_te, y_te) = cPickle.load(f)
f.close()
x_tr = np.vstack((x_tr,x_va))
y_tr = np.hstack((y_tr,y_va))
rf = random_features(x_tr.shape[1], 2048, estimate_gamma(x_tr))
z_tr = rf(x_tr)
z_va = rf(x_va)
z_te = rf(x_te)
svm = LinearSVC().fit(z_tr,y_tr)
print np.mean(svm.predict(z_te)==y_te)
The predictive distribution of Bayesian linear regression is p(f|x,X,y)=N(σ−2nx⊤A−1Xy,x⊤A−1x),
def bayesian_linear_regression(X,y,Sigma_p=None,sigma_n=None):
if Sigma_p is None:
Sigma_p = np.diag(np.repeat(1,X.shape[1]))
if sigma_n is None:
sigma_n = 1
A = np.power(sigma_n,-2)*np.dot(X.T,X)+np.linalg.inv(Sigma_p)
Ai = np.linalg.inv(A)
C = np.dot(np.dot(Ai,X.T),y)
def mean(x):
return np.power(sigma_n,-2)*np.dot(x,C)
def variance(x):
return 2*np.sqrt(np.diag(np.dot(np.dot(x,Ai),x.T))[:,np.newaxis])
return (mean, variance)
def f(x):
return np.sin(5*x)+np.random.randn(x.shape[0],1)*0.1
def estimate_gamma(x,n=100):
x_sub = x[np.random.permutation(x.shape[0])[0:n]]
return 1./np.median(np.power(pairwise_distances(x_sub,x_sub),2).flatten())
np.random.seed(0)
N = 1000
K = 1000
x_tr = np.random.randn(N,1)
y_tr = f(x_tr)
s_x = StandardScaler().fit(x_tr)
s_y = StandardScaler().fit(y_tr)
x_tr = s_x.transform(x_tr)
y_tr = s_y.transform(y_tr)
rf = random_features(1,K,estimate_gamma(x_tr))
z_tr = rf(x_tr)
s_z = StandardScaler().fit(z_tr)
z_tr = s_z.transform(z_tr)
(mean, variance) = bayesian_linear_regression(z_tr,y_tr,None,1)
x_te = 0.75*np.sort(np.random.randn(N,1),0)
#x_te = 1.5*np.sort(np.random.randn(N,1),0)
y_te = f(x_te)
x_te = s_x.transform(x_te)
y_te = s_y.transform(y_te)
z_te = rf(x_te)
z_te = s_z.transform(z_te)
plt.plot(x_te,y_te,'.', label='test data')
plt.plot(x_tr,y_tr,'.',color='red', label='training data')
plt.plot(x_te,mean(z_te),lw=3, label='prediction')
plt.fill(np.concatenate([x_te, x_te[::-1]]),
np.concatenate([mean(z_te) - 1.9600 * np.sqrt(variance(z_te)),
(mean(z_te) + 1.9600 * np.sqrt(variance(z_te)))[::-1]]),
alpha=.25, fc='g', ec='None', label='95% confidence interval')
plt.xlim(np.min(x_te),np.max(x_te));
plt.legend(loc='best')
plt.title(np.power(y_te-mean(z_te),2).mean())
f = gzip.open('mnist.pkl.gz', 'rb')
(x_tr, y_tr), (x_va, y_va), (x_te, y_te) = cPickle.load(f)
f.close()
x_tr = np.vstack((x_tr,x_va))
y_tr = np.hstack((y_tr,y_va))
rf = random_features(x_tr.shape[1], 2048, estimate_gamma(x_tr))
z_tr = rf(x_tr)
z_va = rf(x_va)
z_te = rf(x_te)
xz_tr = z_tr
xz_te = z_te
pca = RandomizedPCA(10).fit(z_tr)
p_tr = pca.transform(z_tr)
p_te = pca.transform(z_te)
rf = random_features(p_tr.shape[1], 2048, estimate_gamma(p_tr))
pz_tr = rf(p_tr)
pz_te = rf(p_te)
ridge = Ridge().fit(pz_tr, x_tr)
rec_tr = ridge.predict(pz_tr)
rec_te = ridge.predict(pz_te)
plt.figure(figsize=(10,10))
for i in xrange(100):
plt.subplot(10,10,i+1)
plt.imshow(rec_tr[i].reshape(28,28), cmap='Greys_r')
plt.axis('off')
plt.show()
plt.figure(figsize=(10,10))
for i in xrange(100):
plt.subplot(10,10,i+1)
plt.imshow(rec_te[i].reshape(28,28), cmap='Greys_r')
plt.axis('off')
plt.show()
def random_features(d,k,gamma):
w = np.random.randn(d,k)*np.sqrt(2.*gamma)
b = 2.*np.pi*np.random.rand(k)
def foo(x):
return np.sqrt(2./k)*np.cos(np.dot(x,w)+b)
return foo
def experiment_sin(n=1000,k=100,freq=3,noise=.1):
x = np.random.randn(n,1)
y = np.sin(freq*x)+noise*np.random.randn(n,1)
#t = 2*np.pi*np.random.rand(n,1)
#x = np.cos(freq*t)+noise*np.random.randn(n,1)
#y = np.sin(freq*t)+noise*np.random.randn(n,1)
k = k
rf_x = random_features(x.shape[1],k,estimate_gamma(x))
z_x = rf_x(x)
rf_y = random_features(y.shape[1],k,estimate_gamma(y))
z_y = rf_x(y)
f, g = CCA(n_components=1).fit_transform(z_x,z_y)
return x, y, f, g
x, y, f, g = experiment_sin()
plt.figure(figsize=(8,8))
plt.subplot(2,2,1)
plt.plot(x,y,'.')
plt.title('Correlation: ' + str(pearsonr(x,y)[0][0]))
plt.subplot(2,2,2)
plt.plot(f,g,'.')
plt.title('Correlation: ' + str(pearsonr(f,g)[0][0]))
plt.subplot(2,2,3)
plt.plot(x,f,'.')
plt.title('x-transformation')
plt.xlabel('x')
plt.ylabel('f')
plt.subplot(2,2,4)
plt.plot(y,g,'.')
plt.title('y-transformation')
plt.xlabel('y')
plt.ylabel('g')
plt.tight_layout()
freqs = [1,2,3,4,5,6,7,8,9,10]
corr_freqs = np.zeros(len(freqs))
for i in xrange(10):
for fi in xrange(len(freqs)):
x, y, f, g = experiment_sin(freq=freqs[fi])
corr_freqs[fi] += pearsonr(f,g)[0][0]
plt.plot(freqs, corr_freqs)
plt.title('Degradation with respect to frequency')
f = gzip.open('mnist.pkl.gz', 'rb')
(x_tr,y_tr), (x_va,y_va), (x_te,y_te) = cPickle.load(f)
f.close()
x5_tr = x_tr[np.where(y_tr==5)]
x8_tr = x_tr[np.where(y_tr==8)]
x5_va = x_va[np.where(y_va==5)]
x8_va = x_va[np.where(y_va==8)]
x5_te = x_te[np.where(y_te==5)]
x8_te = x_te[np.where(y_te==8)]
gamma = estimate_gamma(x5_tr, 100)
rf = random_features(x5_tr.shape[1],100,gamma)
def mmd(x,y,gamma):
return rbf_kernel(x,gamma=gamma).mean()-\
2*rbf_kernel(x,y,gamma=gamma).mean()+\
rbf_kernel(y,gamma=gamma).mean()
def rmmd(x,y,rf):
return np.power(np.linalg.norm(rf(x).mean(0)-rf(y).mean(0)),2)
print mmd(x5_tr,x8_tr,gamma)
print rmmd(x5_tr,x8_tr,rf)
print mmd(x5_tr,x5_va,gamma)
print rmmd(x5_tr,x5_va,rf)
def f(): mmd(x5_tr,x5_va,gamma)
print time_f(f,10)
def f(): rmmd(x5_tr,x5_va,rf)
print time_f(f,10)
from sklearn.ensemble import RandomForestClassifier as RFC
from scipy.interpolate import UnivariateSpline as sp
from sklearn.preprocessing import scale
from sklearn.mixture import GMM
def continuous_cause(n,k,p1,p2):
g = GMM(k)
g.means_ = p1*np.random.randn(k,1)
g.covars_ = np.abs(p2*np.random.randn(k,1)+1)
g.weights_ = abs(np.random.rand(k,1))
g.weights_ = g.weights_/sum(g.weights_)
return scale(g.sample(n))
def noise(n,v):
return v*np.random.randn(n,1)
def mechanism(x,d):
g = np.linspace(min(x)-np.std(x),max(x)+np.std(x),d)
return sp(g,np.random.randn(d))(x.flatten())[:,np.newaxis]
def pair(n,k,p1,p2,v,d):
x = continuous_cause(n,k,p1,p2)
y = scale(scale(mechanism(x,d))+noise(n,v))
return (x,y)
def synset(N):
res_x_fw = []; res_x_bw = []
for i in range(N):
n = np.int(np.power(10,np.random.uniform(2,4)))
k = np.random.randint(1,5)
p1 = np.random.uniform(0,5)
p2 = np.random.uniform(0,5)
v = np.random.uniform(0,5)
d = np.random.randint(4,5)
(x,y) = pair(n,k,p1,p2,v,d)
res_x_fw += [np.hstack((x,y))]
res_x_bw += [np.hstack((y,x))]
return (res_x_fw + res_x_bw,np.hstack((np.zeros(N),np.ones(N))))
def kdist(args):
x,w = args
fx = np.dot(x[:,0][:,np.newaxis],w[0,:][np.newaxis,:])
fy = np.dot(x[:,1][:,np.newaxis],w[1,:][np.newaxis,:])
return np.hstack((np.cos(fx).mean(0),np.cos(fy).mean(0),np.cos(fx+fy).mean(0)))
def fdist(x,w):
return np.array([kdist((scale(xi),w)) for xi in x])
def gen_random_parameter(M=100, G=[0.1,1,10]):
return np.vstack([np.sqrt(g)*np.random.randn(M,2) for g in G]).T
def gen_dataset(w,N=100):
x,y = synset(N)
z = fdist(x,w)
p = np.random.permutation(2*N)
x2 = []
for i in xrange(len(p)):
x2.append(x[p[i]])
return x2,y[p],z[p]
w = gen_random_parameter()
x,y,z = gen_dataset(w)
plt.figure(figsize=(8,8))
p = np.random.permutation(len(x))
for i in xrange(25):
plt.subplot(5,5,i+1)
plt.plot(x[p[i]][:,0],x[p[i]][:,1],'.')
plt.title(y[p[i]])
plt.axis('off')
plt.tight_layout()
_,y_tr,z_tr = gen_dataset(w, N=1000)
_,y_te,z_te = gen_dataset(w, N=1000)
from sklearn.ensemble import RandomForestClassifier
print np.mean(RandomForestClassifier(n_estimators=500).fit(z_tr,y_tr).predict(z_te)==y_te)