#!/usr/bin/env python # coding: utf-8 # This notebook reproduces the experiment in Section 5.7. of the paper # # efficient planar affine canonicalization # # submitted to *Pattern Recognition* # ## convolutional network with canonicalized inputs # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import numpy as np import numpy.linalg as la import os import urllib def save(name): plt.savefig('results/'+name+'.pdf',bbox_inches='tight') def figsize(width=6,height=4): '''Change figure size in the notebook''' plt.rc('figure', figsize = (width,height)) # In[2]: import umucv.canonic as can import cv2 as cv # ## MNIST dataset # In[3]: # wget http://robot.inf.um.es/material/mnist.npz if not os.path.isfile('mnist.npz'): filepath, _ = urllib.request.urlretrieve('http://robot.inf.um.es/material/mnist.npz', 'mnist.npz') statinfo = os.stat(filepath) print('Succesfully downloaded mnist.npz ({} bytes).'.format(statinfo.st_size)) mnist = np.load("mnist.npz") xl,yl,xt,yt = [mnist[d] for d in ['xl', 'yl', 'xt', 'yt']] print(xl.shape, yl.shape) print(xt.shape, yt.shape) cl = np.argmax(yl,axis=1) ct = np.argmax(yt,axis=1) # In[4]: def shdig(v): '''Show mnist image''' x = np.reshape(v,[28,28]) plt.imshow(1-x, 'gray', vmin=0, vmax=1, interpolation="nearest"); # In[5]: shdig(xl[5]) # Show a few samples: # In[6]: figsize(6,6) plt.imshow(-np.bmat([[ x.reshape(28,28) for x in xl[10*k:10*(k+1)] ] for k in range(10)]),'gray'); plt.axis('off'); # In[7]: # optionally merge 6 and 9 if False: yl[cl==9] = np.array([0,0,0,0,0,0,1,0,0,0]) yt[ct==9] = np.array([0,0,0,0,0,0,1,0,0,0]) cl[cl==9] = 6 ct[ct==9] = 6 # ## canonicalization # Generate always only one canonic version. # In[8]: def only1Canonic(x, sz=(100,100), eps_r=0.2, eps_t=0.75, eps_s=0.04, eps_w=10): _, H, W = can.whitenedMoments(x, th=eps_w) SBar, S, K = can.KSsignature(W) Ts = [ t @ H for t in can.canonicTransforms((SBar,K),eps_r=eps_r, eps_t=eps_t, eps_s=eps_s, debug=False) ] return can.warpW(x, Ts[0], sz=sz, sigma=3) # In[9]: # from vectorized to vectorized, same size def cano(v): return np.ravel(only1Canonic(np.reshape(v,[28,28]),(28,28))) # In[10]: figsize() shdig(xl[37]) # In[11]: shdig(cano(xl[37])) # We canonicalize the whole data set and save it in compressed numpy format. # In[12]: import os if not os.path.isfile('mnist_canonic.npz'): xlc = xl.copy() for k in range(len(xl)): if k%5000 == 0: print(k) xlc[k] = cano(xl[k]) xtc = xt.copy() for k in range(len(xt)): if k%1000 == 0: print(k) xtc[k] = cano(xt[k]) np.savez_compressed('mnist_canonic',xlc=xlc,yl=yl,xtc=xtc,yt=yt) else: mnist_canonic = np.load("mnist_canonic.npz") xlc,yl,xtc,yt = [mnist_canonic[d] for d in ['xlc', 'yl', 'xtc', 'yt']] xlc=xlc.reshape(-1,28,28,1) xtc=xtc.reshape(-1,28,28,1) # In[13]: print(xlc.shape) print(xtc.shape) # We can take a look at the canonic versions: # In[14]: def samp(k): sel = xlc return np.pad(np.bmat([[ x.reshape(28,28) for x in sel[10*k:10*(k+1)] ] for k in range(10)]),10,'constant') # In[15]: figsize(6,6) plt.imshow(-samp(0),'gray'); plt.axis('off'); # ## convolutional network # In[16]: import tensorflow as tf from IPython.display import clear_output # We first define some utilities: # In[17]: def mkSaver(path): saver = tf.train.Saver(tf.all_variables()) def save(n): saver.save(sess, path+'/model', global_step=n) def restore(): saver.restore(sess, tf.train.latest_checkpoint(path)) return save,restore # In[18]: def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # We create the network described in the paper. # In[19]: g = tf.Graph() with g.as_default(): x = tf.placeholder("float", shape=[None, 784]) y_ = tf.placeholder("float", shape=[None, 10]) x_image = tf.reshape(x, [-1,28,28,1]) W_conv1 = weight_variable([5, 5, 1, 32]) b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) h_pool1 = max_pool_2x2(h_conv1) W_conv2 = weight_variable([5, 5, 32, 64]) b_conv2 = bias_variable([64]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) W_fc1 = weight_variable([7 * 7 * 64, 1024]) b_fc1 = bias_variable([1024]) h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) keep_prob = tf.placeholder("float") h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) W_fc2 = weight_variable([1024, 10]) b_fc2 = bias_variable([10]) y = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) cost = -tf.reduce_sum(y_*tf.log(y)) train_step = tf.train.AdamOptimizer(1e-4).minimize(cost) correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) init = tf.initialize_all_variables() save,restore = mkSaver('models/convo') sess = tf.Session(graph=g) sess.run(init) # Convenience functions for evaluation of accuracy and training: # In[20]: def accu(X,Y): xb = X.reshape(-1,1000,784) yb = Y.reshape(-1,1000,10) acc = np.mean([sess.run(accuracy,feed_dict={x: xb[k], y_: yb[k], keep_prob: 1.0}) for k in range(len(xb))]) return acc # In[21]: def epoch(X,Y, batch=100): n = len(X) perm = list(range(n)) np.random.shuffle(perm) xb = X[perm] xb = xb.reshape(-1,batch,784) yb = Y[perm] yb = yb.reshape(-1,batch,10) for k in range(n//batch): sess.run(train_step,feed_dict={x: xb[k], y_: yb[k], keep_prob: 0.5}) # After initialization: # In[22]: accu(xtc,yt) # After $\sim 50$ epochs: # In[28]: for p in range(10): epoch(xlc,yl) print(accu(xtc,yt)) # In[23]: #save(6000) restore() # In[24]: accu(xtc,yt), accu(xlc,yl) # We get $97\%$ accuray in the test set. (There is slight overtraining as the training set is almost perfectly memorized.) # We can now easily use the machine for prediction: # In[25]: dig = xtc[1235] figsize() shdig(dig) pred = sess.run(y, feed_dict={x: dig.reshape(1,-1), keep_prob: 1.0}) print(pred) np.argmax(pred,axis=1) # ### confusion matrix # In[26]: from sklearn.metrics import confusion_matrix # In[27]: def maqpred(xs): return np.argmax(sess.run(y, feed_dict={x: xs, keep_prob: 1.0}),axis=1) # In[28]: def maqprobs(xs): return sess.run(y, feed_dict={x: xs, keep_prob: 1.0}) # In[29]: ypred = maqpred(xtc.reshape(-1,784)) ypred # In[30]: ytrue = ct ytrue # In[31]: confusion_matrix(ytrue, ypred) # In[32]: if False: from tabulate import tabulate print(tabulate(confusion_matrix(np.argmax(yt,axis=1), maqpred(xtc)), tablefmt='latex')) # In[33]: mistakes = ypred != ytrue mistakes.sum() # In[34]: figsize(8,8) plt.imshow(- np.pad(np.bmat([[ x.reshape(28,28) for x in xtc[mistakes][10*k:10*(k+1)] ] for k in range(18)]),10,'constant'),'gray', interpolation= 'nearest'); plt.axis('off'); #save('errorscan') # In[35]: figsize(8,8) plt.imshow(- np.pad(np.bmat([[ x.reshape(28,28) for x in xt[mistakes][10*k:10*(k+1)] ] for k in range(18)]),10,'constant'),'gray', interpolation= 'nearest'); plt.axis('off'); #save('errorsorig') # In[36]: figsize(8,8) plt.imshow(- np.pad(np.bmat([[ x.reshape(28,28) for x in xtc[np.logical_not(mistakes)][20*k:20*(k+1)] ] for k in range(20)]),10,'constant'),'gray', interpolation= 'nearest'); plt.axis('off'); #save('noerrorscan') # ### real image # In[37]: from skimage import io path = "http://robot.inf.um.es/material/va/images/" view = io.imread(path+"handdigits.png") # In[38]: figsize() plt.imshow(view); # In[54]: def findShapes(x, th=128, area=(100,1000)): ret, gt = cv.threshold(cv.cvtColor(view,cv.COLOR_RGB2GRAY),th,255,cv.THRESH_BINARY_INV) n,cc = cv.connectedComponents(gt.copy()) ok = [ cc==k for k in range(1,n) ] def extract(mask): thing = np.argwhere(mask) (x1, y1), (x2, y2) = thing.min(0), thing.max(0) box = mask[x1:x2+1,y1:y2+1].astype(np.float32) return ((x1+x2)/2,(y1+y2)/2), box detected = [ extract(s) for s in ok ] return [ p for p in detected if area[0] < p[1].sum() < area[1] ] # In[55]: detected = findShapes(view,125,(50,500)) print(len(detected)) # In[56]: figsize(12,4) plt.imshow(-np.hstack([can.normalize(p,(100,100)) for _,p in detected]),'gray'); plt.axis('off'); #save('hand1') # In[42]: plt.imshow(-np.hstack([only1Canonic(p,sz=(28,28),eps_w=11) for _,p in detected]),'gray'); plt.axis('off'); #save('hand2') # In[43]: maqpred([only1Canonic(p,sz=(28,28),eps_w=11).ravel() for _,p in detected]) # In[44]: figsize() plt.imshow(view) for (x1,y1), box in detected: [l] = maqpred([only1Canonic(box,sz=(28,28)).ravel()]) plt.annotate(l,(y1,x1),color='red',fontsize=14) #save('hand3b') # In[45]: for _, box in detected: [l] = maqprobs([only1Canonic(box,sz=(28,28)).ravel()]) print(np.argmax(l), max(l/l.sum())) # ### discrimination 6 vs 9 # In[46]: sum(maqpred(xtc[ct==6].reshape(-1,28*28)) == 9) # In[47]: sum(maqpred(xtc[ct==9].reshape(-1,28*28)) == 6) # ### synthetic weak perspective views # In[48]: from umucv.htrans import tilt from numpy.random import randint def can1(x): return only1Canonic(x,(28,28)) views = [(0,0)]+[(a,b) for a in range(0,360,30) for b in range(0,360,45)] # f: fov (degrees, e.g. 30) # t: tilt (degrees, e.g. 40) # sz: desired size (default = same as input) def mkVariations(f,t,x,sz=None): if sz: xn = can.normalize(x,sz) else: xn = x return [can1(xn)]+[can1(tilt(f, a, t, b, xn)) for a in range(0,360,30) for b in range(0,360,45)] def randomView(f,t,x,sz=None): if sz: xn = can.normalize(x,sz) else: xn = x a,b = views[randint(len(views))] return can1(tilt(f, a, t, b, xn)) # In[50]: # full expansion of test set if False: bigx = np.array([ mkVariations(30,40,x.reshape(28,28),(100,100)) for x in xtc[:5000]]).reshape(-1,28,28,1) bigy = np.array([ [y for k in range(97)] for y in yt[:5000]]).reshape(-1,10) bigc = np.argmax(bigy,axis=1) print(bigx.shape) print(bigy.shape) print(bigc.shape) accu(bigx,bigy) # Canonicalization of randomly chosen synthetic perspective views: # In[51]: ranx = np.array([ randomView(30,40,x.reshape(28,28),(100,100)) for x in xtc]).reshape(-1,28,28,1) # In[52]: accu(ranx,yt) # ### end # In[53]: # custom notebook style from IPython.display import HTML HTML(open('nb1.css').read())