#!/usr/bin/env python
# coding: utf-8

# This notebook reproduces the experiment in Section 5.7. of the paper

# # efficient planar affine canonicalization
# 
# submitted to *Pattern Recognition*

# ## convolutional network with canonicalized inputs

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')

import matplotlib.pyplot as plt
import numpy             as np
import numpy.linalg      as la

import os
import urllib

def save(name):
    plt.savefig('results/'+name+'.pdf',bbox_inches='tight')

def figsize(width=6,height=4):
    '''Change figure size in the notebook'''
    plt.rc('figure', figsize = (width,height))


# In[2]:


import umucv.canonic as can
import cv2 as cv


# ## MNIST dataset

# In[3]:


# wget http://robot.inf.um.es/material/mnist.npz
if not os.path.isfile('mnist.npz'):
    filepath, _ = urllib.request.urlretrieve('http://robot.inf.um.es/material/mnist.npz', 'mnist.npz')
    statinfo = os.stat(filepath)
    print('Succesfully downloaded mnist.npz ({} bytes).'.format(statinfo.st_size))

mnist = np.load("mnist.npz")
xl,yl,xt,yt = [mnist[d] for d in ['xl', 'yl', 'xt', 'yt']]
print(xl.shape, yl.shape)
print(xt.shape, yt.shape)

cl = np.argmax(yl,axis=1)
ct = np.argmax(yt,axis=1)


# In[4]:


def shdig(v):
    '''Show mnist image'''
    x = np.reshape(v,[28,28])
    plt.imshow(1-x, 'gray', vmin=0, vmax=1, interpolation="nearest");


# In[5]:


shdig(xl[5])


# Show a few samples:

# In[6]:


figsize(6,6)
plt.imshow(-np.bmat([[ x.reshape(28,28) for x in xl[10*k:10*(k+1)] ] for k in range(10)]),'gray'); plt.axis('off');


# In[7]:


# optionally merge 6 and 9
if False:
    yl[cl==9] = np.array([0,0,0,0,0,0,1,0,0,0])
    yt[ct==9] = np.array([0,0,0,0,0,0,1,0,0,0])
    cl[cl==9] = 6
    ct[ct==9] = 6


# ## canonicalization

# Generate always only one canonic version.

# In[8]:


def only1Canonic(x,  sz=(100,100), eps_r=0.2, eps_t=0.75, eps_s=0.04, eps_w=10):
    _, H, W = can.whitenedMoments(x, th=eps_w)
    SBar, S, K   = can.KSsignature(W)
    Ts    = [ t @ H for t in can.canonicTransforms((SBar,K),eps_r=eps_r, eps_t=eps_t, eps_s=eps_s, debug=False) ]
    return can.warpW(x, Ts[0], sz=sz, sigma=3)


# In[9]:


# from vectorized to vectorized, same size
def cano(v):
    return np.ravel(only1Canonic(np.reshape(v,[28,28]),(28,28)))


# In[10]:


figsize()
shdig(xl[37])


# In[11]:


shdig(cano(xl[37]))


# We canonicalize the whole data set and save it in compressed numpy format.

# In[12]:


import os

if not os.path.isfile('mnist_canonic.npz'):
    xlc = xl.copy()
    for k in range(len(xl)):
        if k%5000 == 0: print(k)
        xlc[k] = cano(xl[k])

    xtc = xt.copy()
    for k in range(len(xt)):
        if k%1000 == 0: print(k)
        xtc[k] = cano(xt[k])

    np.savez_compressed('mnist_canonic',xlc=xlc,yl=yl,xtc=xtc,yt=yt)
else:
    mnist_canonic = np.load("mnist_canonic.npz")
    xlc,yl,xtc,yt = [mnist_canonic[d] for d in ['xlc', 'yl', 'xtc', 'yt']]

xlc=xlc.reshape(-1,28,28,1)
xtc=xtc.reshape(-1,28,28,1)


# In[13]:


print(xlc.shape)
print(xtc.shape)


# We can take a look at the canonic versions:

# In[14]:


def samp(k):
    sel = xlc
    return np.pad(np.bmat([[ x.reshape(28,28) for x in sel[10*k:10*(k+1)] ] for k in range(10)]),10,'constant')


# In[15]:


figsize(6,6)
plt.imshow(-samp(0),'gray'); plt.axis('off');


# ## convolutional network

# In[16]:


import tensorflow as tf

from IPython.display import clear_output


# We first define some utilities:

# In[17]:


def mkSaver(path):
    saver = tf.train.Saver(tf.all_variables())
    def save(n):
          saver.save(sess, path+'/model', global_step=n)
    def restore():
        saver.restore(sess, tf.train.latest_checkpoint(path))
    return save,restore


# In[18]:


def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                           strides=[1, 2, 2, 1], padding='SAME')


# We create the network described in the paper.

# In[19]:


g = tf.Graph()

with g.as_default():
    
    x = tf.placeholder("float", shape=[None, 784])
    y_ = tf.placeholder("float", shape=[None, 10])
    
    x_image = tf.reshape(x, [-1,28,28,1])
    
    
    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])
    
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)
    
    
    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])

    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)
    
    
    W_fc1 = weight_variable([7 * 7 * 64, 1024])
    b_fc1 = bias_variable([1024])

    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)


    keep_prob = tf.placeholder("float")
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    
    W_fc2 = weight_variable([1024, 10])
    b_fc2 = bias_variable([10])

    y = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
    
    cost = -tf.reduce_sum(y_*tf.log(y))
    
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cost)
    
    correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    
    init = tf.initialize_all_variables()
    save,restore = mkSaver('models/convo')
    
sess = tf.Session(graph=g)
sess.run(init)


# Convenience functions for evaluation of accuracy and training:

# In[20]:


def accu(X,Y):
    xb = X.reshape(-1,1000,784)
    yb = Y.reshape(-1,1000,10)
    acc = np.mean([sess.run(accuracy,feed_dict={x: xb[k], y_: yb[k], keep_prob: 1.0})
                   for k in range(len(xb))])
    return acc


# In[21]:


def epoch(X,Y, batch=100):
    n = len(X)
    perm = list(range(n))
    np.random.shuffle(perm)
    xb = X[perm]
    xb = xb.reshape(-1,batch,784)
    yb = Y[perm]
    yb = yb.reshape(-1,batch,10)
    for k in range(n//batch):
        sess.run(train_step,feed_dict={x: xb[k], y_: yb[k], keep_prob: 0.5})


# After initialization:

# In[22]:


accu(xtc,yt)


# After $\sim 50$ epochs:

# In[28]:


for p in range(10):
    epoch(xlc,yl)
    print(accu(xtc,yt))


# In[23]:


#save(6000)
restore()


# In[24]:


accu(xtc,yt), accu(xlc,yl)


# We get $97\%$ accuray in the test set. (There is slight overtraining as the training set is almost perfectly memorized.)

# We can now easily use the machine for prediction:

# In[25]:


dig = xtc[1235]

figsize()
shdig(dig)

pred = sess.run(y, feed_dict={x: dig.reshape(1,-1), keep_prob: 1.0})

print(pred)

np.argmax(pred,axis=1)


# ### confusion matrix

# In[26]:


from sklearn.metrics import confusion_matrix


# In[27]:


def maqpred(xs):
    return np.argmax(sess.run(y, feed_dict={x: xs, keep_prob: 1.0}),axis=1)


# In[28]:


def maqprobs(xs):
    return sess.run(y, feed_dict={x: xs, keep_prob: 1.0})


# In[29]:


ypred = maqpred(xtc.reshape(-1,784))
ypred


# In[30]:


ytrue = ct
ytrue


# In[31]:


confusion_matrix(ytrue, ypred)


# In[32]:


if False:
    from tabulate import tabulate

    print(tabulate(confusion_matrix(np.argmax(yt,axis=1), maqpred(xtc)), tablefmt='latex'))


# In[33]:


mistakes = ypred != ytrue
mistakes.sum()


# In[34]:


figsize(8,8)
plt.imshow(- np.pad(np.bmat([[ x.reshape(28,28) for x in xtc[mistakes][10*k:10*(k+1)] ] 
                             for k in range(18)]),10,'constant'),'gray', interpolation= 'nearest');
plt.axis('off');
#save('errorscan')


# In[35]:


figsize(8,8)
plt.imshow(- np.pad(np.bmat([[ x.reshape(28,28) for x in xt[mistakes][10*k:10*(k+1)] ] 
                             for k in range(18)]),10,'constant'),'gray', interpolation= 'nearest');
plt.axis('off');
#save('errorsorig')


# In[36]:


figsize(8,8)
plt.imshow(- np.pad(np.bmat([[ x.reshape(28,28) for x in xtc[np.logical_not(mistakes)][20*k:20*(k+1)] ] 
                             for k in range(20)]),10,'constant'),'gray', interpolation= 'nearest');
plt.axis('off');
#save('noerrorscan')


# ### real image

# In[37]:


from skimage import io

path = "http://robot.inf.um.es/material/va/images/"

view = io.imread(path+"handdigits.png")


# In[38]:


figsize()
plt.imshow(view);


# In[54]:


def findShapes(x, th=128, area=(100,1000)):
    ret, gt = cv.threshold(cv.cvtColor(view,cv.COLOR_RGB2GRAY),th,255,cv.THRESH_BINARY_INV)
    n,cc = cv.connectedComponents(gt.copy())
    ok = [ cc==k for k in range(1,n) ]
    def extract(mask):
        thing = np.argwhere(mask)
        (x1, y1), (x2, y2) = thing.min(0), thing.max(0)
        box = mask[x1:x2+1,y1:y2+1].astype(np.float32)
        return ((x1+x2)/2,(y1+y2)/2), box
    detected = [ extract(s) for s in ok ]
    return [ p for p in detected if area[0] < p[1].sum() < area[1] ]


# In[55]:


detected = findShapes(view,125,(50,500))
print(len(detected))


# In[56]:


figsize(12,4)
plt.imshow(-np.hstack([can.normalize(p,(100,100)) for _,p in detected]),'gray'); plt.axis('off');
#save('hand1')


# In[42]:


plt.imshow(-np.hstack([only1Canonic(p,sz=(28,28),eps_w=11) for _,p in detected]),'gray'); plt.axis('off');
#save('hand2')


# In[43]:


maqpred([only1Canonic(p,sz=(28,28),eps_w=11).ravel() for _,p in detected])


# In[44]:


figsize()
plt.imshow(view)

for (x1,y1), box in detected:    
    [l] = maqpred([only1Canonic(box,sz=(28,28)).ravel()])
    plt.annotate(l,(y1,x1),color='red',fontsize=14)

#save('hand3b')


# In[45]:


for _, box in detected:
    [l] = maqprobs([only1Canonic(box,sz=(28,28)).ravel()])
    print(np.argmax(l), max(l/l.sum()))


# ### discrimination 6 vs 9

# In[46]:


sum(maqpred(xtc[ct==6].reshape(-1,28*28)) == 9)


# In[47]:


sum(maqpred(xtc[ct==9].reshape(-1,28*28)) == 6)


# ### synthetic weak perspective views

# In[48]:


from umucv.htrans import tilt
from numpy.random import randint

def can1(x):
    return only1Canonic(x,(28,28))

views = [(0,0)]+[(a,b) for a in range(0,360,30) for b in range(0,360,45)]

# f:  fov  (degrees, e.g. 30)
# t:  tilt (degrees, e.g. 40)
# sz: desired size (default = same as input)
def mkVariations(f,t,x,sz=None):
    if sz:
        xn =  can.normalize(x,sz)
    else:
        xn = x
    return [can1(xn)]+[can1(tilt(f, a, t, b, xn)) for a in range(0,360,30) for b in range(0,360,45)]

def randomView(f,t,x,sz=None):
    if sz:
        xn =  can.normalize(x,sz)
    else:
        xn = x
    a,b = views[randint(len(views))]
    return can1(tilt(f, a, t, b, xn)) 


# In[50]:


# full expansion of test set
if False:
    bigx = np.array([ mkVariations(30,40,x.reshape(28,28),(100,100)) for x in xtc[:5000]]).reshape(-1,28,28,1)
    bigy = np.array([ [y for k in range(97)] for y in yt[:5000]]).reshape(-1,10)
    bigc = np.argmax(bigy,axis=1)

    print(bigx.shape)
    print(bigy.shape)
    print(bigc.shape)

    accu(bigx,bigy)


# Canonicalization of randomly chosen synthetic perspective views:

# In[51]:


ranx = np.array([ randomView(30,40,x.reshape(28,28),(100,100)) for x in xtc]).reshape(-1,28,28,1)


# In[52]:


accu(ranx,yt)


# ### end

# In[53]:


# custom notebook style
from IPython.display import HTML
HTML(open('nb1.css').read())