import theano
import theano.tensor as T
from sklearn import decomposition
import numpy as np
from numpy import linalg
import pandas as pd

## load data
data2d = np.loadtxt('data/pca_2d/pcaData.txt').T
print data2d.shape
plot(data2d[:, 0], data2d[:, 1], 'bo')
title('Raw data')
print data2d.mean(axis = 0)
print data2d.var(axis = 0)

normalized_data = data2d - data2d.mean(axis = 0) # featurewise/columnwise DC removal
print normalized_data.shape
plot(normalized_data[:, 0], normalized_data[:, 1], 'bo')
title('Normalized (feature-wise) data')
## remove DC - instance-wise NOT NCESSARY IN THIS EXAMPLE
data2d = data2d - np.mean(data2d, axis = 1).reshape(-1, 1)
plot(data2d[:, 0], data2d[:, 1], 'bo')
## find PCA basis 
## SVD == Eigen-decomposition for sysmetric & positive definite matrices (covariance matrix)
## The PC directions will be the columns of U
## The PC vectors will be the PC directions from the CENTER of the normalized data
U, S, V = linalg.svd(np.dot(normalized_data.T, normalized_data) / normalized_data.shape[0])
print U.shape, S.shape, V.shape
center = data2d.mean(axis = 0)
pc1, pc2 = U.T
print pc1
plot(normalized_data[:, 0], normalized_data[:, 1], 'g.')
plot([center[0], pc1[0]], [center[1], pc1[1]], 'b->')
plot([center[0], pc2[0]], [center[1], pc2[1]], 'b->')
title('PC of normalized data')
print np.dot(pc1, pc2), ' dot product == 0 means they are othogonal!!'

## calcualte xRot - X projection on the PC
xRot = np.dot(normalized_data, U)
print xRot.shape
plot(xRot[:, 0], xRot[:, 1], 'bo')
title('xRot')
ylim(-0.3, 0.5)
xlim(-1, 1.1)

## dimension reduction by PCA
## by selecting only the first pc
#xRot_pc1 = xRot[:, 1] = 0 # ignore the second PC ????
xRot_pc1 = xRot.copy() ## deep copy
xRot_pc1[:, 1] = 0
#print xRot
#print xRot_pc1
xHat = np.dot(xRot_pc1, U.T)
print xHat.shape
plot(xHat[:, 0], xHat[:, 1], 'bo')
title('xHat')

## PCA WHITENING - NORMALIZE U by S matrix diagonal elements
whitened_pcs = U / np.sqrt(S)
print whitened_pcs
print U
whitened_X = np.dot(normalized_data, whitened_pcs)
#whitened_X = np.dot(np.dot(normalized_data, whitened_pcs), U.T)
print whitened_X.shape
plot(whitened_X[:, 0], whitened_X[:, 1], 'bo')

## ZCA WHITENING - NORMALIZE U by S matrix diagonal elements
whitened_pcs = U / np.sqrt(S)
print whitened_pcs
print U
whitened_Z = np.dot(np.dot(normalized_data, whitened_pcs), U.T)
print whitened_Z.shape
plot(whitened_Z[:, 0], whitened_Z[:, 1], 'bo')

## load data
from scipy import io
images = io.loadmat('data/pca_exercise/IMAGES_RAW.mat')['IMAGESr']
print images.shape
gray()
"""
fig, axes = subplots(nrows = 5, ncols = 2, figsize = (1 *2, 1 * 5))
axes = axes.flatten()
for i in range(images.shape[2]):
    axes[i].imshow(images[:,:,i])
"""

## get random patches of 10000 x 12 x 12
patches = np.zeros((10000, 12, 12))
im_index = np.random.randint(low = 0, high = 10, size = 10000)
x_index = np.random.randint(low = 0, high = 512 - 12, size = 10000)
y_index = np.random.randint(low = 0, high = 512 - 12, size = 10000)
for i, (im, x, y) in enumerate(zip(im_index, x_index, y_index)):
    patches[i] = images[x:x+12, y:y+12, im]
print patches.shape
feats = patches.reshape(10000, -1)
print feats.shape

## patchwise zero-mean of the data
print feats.mean(axis = 1)
normalized_feats = feats - feats.mean(axis = 1).reshape(-1, 1)
print normalized_feats.shape
print normalized_feats.mean(axis = 1)

## PCA
cov_mat = np.dot(normalized_feats.T, normalized_feats) / normalized_feats.shape[0]
U, S, V = linalg.svd(cov_mat)
print U.shape, S.shape, V.shape

rot_feats = np.dot(normalized_feats, U)
rot_mat = np.dot(rot_feats.T, rot_feats) / rot_feats.shape[0]
fig, axes = subplots(nrows = 1, ncols = 2, figsize = (3 * 2, 3 * 1))
axes[0].imshow(cov_mat, cmap = cm.jet)
axes[0].set_title('cov before PC rotation')
axes[1].imshow(rot_mat, cmap = cm.jet)
axes[1].set_title('cov after PC rotation (diagonal)')

## find k to retain 90% and 99% variance
explained_variance = S.cumsum() / S.sum()
print explained_variance
k90 = np.argwhere(explained_variance >= 0.9)[0][0] + 1
k99 = np.argwhere(explained_variance >= 0.99)[0][0] + 1
print k90, k99

## get the ZCA
rot_feats90 = rot_feats.copy()
rot_feats90[:, k90:] = 0
rot_feats99 = rot_feats.copy()
rot_feats99[:, k99:] = 0
#feats90 = np.dot(rot_feats90, U.T)
#feats99 = np.dot(rot_feats99, U.T)
print feats90.shape, feats99.shape

## plotting - the first 25 patches
## original
figure()
fig, axes = subplots(nrows = 3, ncols = 3, figsize=(1 * 3, 1 * 3))
axes = axes.flatten()
fig.suptitle('original')
for i in xrange(9):
    axes[i].imshow(feats[i].reshape(12, 12), interpolation='none')
    axes[i].get_xaxis().set_visible(False)
    axes[i].get_yaxis().set_visible(False)
## 99% variation
figure()
fig, axes = subplots(nrows = 3, ncols = 3, figsize=(1 * 3, 1 * 3))
axes = axes.flatten()
fig.suptitle('99%')
for i in xrange(9):
    axes[i].imshow(feats99[i].reshape(12, 12), interpolation='none')
    axes[i].get_xaxis().set_visible(False)
    axes[i].get_yaxis().set_visible(False)
## 90% variation
figure()
fig, axes = subplots(nrows = 3, ncols = 3, figsize=(1 * 3, 1 * 3))
axes = axes.flatten()
fig.suptitle('90%')
for i in xrange(9):
    axes[i].imshow(feats90[i].reshape(12, 12), interpolation='none')
    axes[i].get_xaxis().set_visible(False)
    axes[i].get_yaxis().set_visible(False)

## ZCA whitening
epsilon = 0.1
white_U = U / (np.sqrt(S) + epsilon)
feats_zca = np.dot(np.dot(feats, white_U), U.T)
print feats_zca.shape
## plotting - the first 25 patches
## original
figure()
fig, axes = subplots(nrows = 3, ncols = 3, figsize=(1 * 3, 1 * 3))
axes = axes.flatten()
fig.suptitle('original')
for i in xrange(9):
    axes[i].imshow(feats[i].reshape(12, 12), interpolation='none')
    axes[i].get_xaxis().set_visible(False)
    axes[i].get_yaxis().set_visible(False)
## ZCA
figure()
fig, axes = subplots(nrows = 3, ncols = 3, figsize=(1 * 3, 1 * 3))
axes = axes.flatten()
fig.suptitle('ZCA whitening -- enhanced edges')
for i in xrange(9):
    axes[i].imshow(feats_zca[i].reshape(12, 12), interpolation='none')
    axes[i].get_xaxis().set_visible(False)
    axes[i].get_yaxis().set_visible(False)

## covariance of pca whitening (with/without reguarlization)
epsilon = 0.
white_U = U / (np.sqrt(S) + epsilon)
feats_pca = np.dot(feats, white_U)
figure(figsize = (4, 4))
imshow(np.dot(feats_pca.T, feats_pca) / feats_pca.shape[0], cmap = cm.jet)

epsilon = 1.
white_U = U / (np.sqrt(S) + epsilon)
feats_pca = np.dot(feats, white_U)
figure(figsize = (4, 4))
imshow(np.dot(feats_pca.T, feats_pca) / feats_pca.shape[0], cmap = cm.jet)