This notebook reproduces the experiment in Section 5.7. of the paper
submitted to Pattern Recognition
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import numpy.linalg as la
import os
import urllib
def save(name):
plt.savefig('results/'+name+'.pdf',bbox_inches='tight')
def figsize(width=6,height=4):
'''Change figure size in the notebook'''
plt.rc('figure', figsize = (width,height))
import umucv.canonic as can
import cv2 as cv
# wget http://robot.inf.um.es/material/mnist.npz
if not os.path.isfile('mnist.npz'):
filepath, _ = urllib.request.urlretrieve('http://robot.inf.um.es/material/mnist.npz', 'mnist.npz')
statinfo = os.stat(filepath)
print('Succesfully downloaded mnist.npz ({} bytes).'.format(statinfo.st_size))
mnist = np.load("mnist.npz")
xl,yl,xt,yt = [mnist[d] for d in ['xl', 'yl', 'xt', 'yt']]
print(xl.shape, yl.shape)
print(xt.shape, yt.shape)
cl = np.argmax(yl,axis=1)
ct = np.argmax(yt,axis=1)
(60000, 784) (60000, 10) (10000, 784) (10000, 10)
def shdig(v):
'''Show mnist image'''
x = np.reshape(v,[28,28])
plt.imshow(1-x, 'gray', vmin=0, vmax=1, interpolation="nearest");
shdig(xl[5])
Show a few samples:
figsize(6,6)
plt.imshow(-np.bmat([[ x.reshape(28,28) for x in xl[10*k:10*(k+1)] ] for k in range(10)]),'gray'); plt.axis('off');
# optionally merge 6 and 9
if False:
yl[cl==9] = np.array([0,0,0,0,0,0,1,0,0,0])
yt[ct==9] = np.array([0,0,0,0,0,0,1,0,0,0])
cl[cl==9] = 6
ct[ct==9] = 6
Generate always only one canonic version.
def only1Canonic(x, sz=(100,100), eps_r=0.2, eps_t=0.75, eps_s=0.04, eps_w=10):
_, H, W = can.whitenedMoments(x, th=eps_w)
SBar, S, K = can.KSsignature(W)
Ts = [ t @ H for t in can.canonicTransforms((SBar,K),eps_r=eps_r, eps_t=eps_t, eps_s=eps_s, debug=False) ]
return can.warpW(x, Ts[0], sz=sz, sigma=3)
# from vectorized to vectorized, same size
def cano(v):
return np.ravel(only1Canonic(np.reshape(v,[28,28]),(28,28)))
figsize()
shdig(xl[37])
shdig(cano(xl[37]))
We canonicalize the whole data set and save it in compressed numpy format.
import os
if not os.path.isfile('mnist_canonic.npz'):
xlc = xl.copy()
for k in range(len(xl)):
if k%5000 == 0: print(k)
xlc[k] = cano(xl[k])
xtc = xt.copy()
for k in range(len(xt)):
if k%1000 == 0: print(k)
xtc[k] = cano(xt[k])
np.savez_compressed('mnist_canonic',xlc=xlc,yl=yl,xtc=xtc,yt=yt)
else:
mnist_canonic = np.load("mnist_canonic.npz")
xlc,yl,xtc,yt = [mnist_canonic[d] for d in ['xlc', 'yl', 'xtc', 'yt']]
xlc=xlc.reshape(-1,28,28,1)
xtc=xtc.reshape(-1,28,28,1)
print(xlc.shape)
print(xtc.shape)
(60000, 28, 28, 1) (10000, 28, 28, 1)
We can take a look at the canonic versions:
def samp(k):
sel = xlc
return np.pad(np.bmat([[ x.reshape(28,28) for x in sel[10*k:10*(k+1)] ] for k in range(10)]),10,'constant')
figsize(6,6)
plt.imshow(-samp(0),'gray'); plt.axis('off');
import tensorflow as tf
from IPython.display import clear_output
We first define some utilities:
def mkSaver(path):
saver = tf.train.Saver(tf.all_variables())
def save(n):
saver.save(sess, path+'/model', global_step=n)
def restore():
saver.restore(sess, tf.train.latest_checkpoint(path))
return save,restore
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
We create the network described in the paper.
g = tf.Graph()
with g.as_default():
x = tf.placeholder("float", shape=[None, 784])
y_ = tf.placeholder("float", shape=[None, 10])
x_image = tf.reshape(x, [-1,28,28,1])
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cost = -tf.reduce_sum(y_*tf.log(y))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cost)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
init = tf.initialize_all_variables()
save,restore = mkSaver('models/convo')
sess = tf.Session(graph=g)
sess.run(init)
Convenience functions for evaluation of accuracy and training:
def accu(X,Y):
xb = X.reshape(-1,1000,784)
yb = Y.reshape(-1,1000,10)
acc = np.mean([sess.run(accuracy,feed_dict={x: xb[k], y_: yb[k], keep_prob: 1.0})
for k in range(len(xb))])
return acc
def epoch(X,Y, batch=100):
n = len(X)
perm = list(range(n))
np.random.shuffle(perm)
xb = X[perm]
xb = xb.reshape(-1,batch,784)
yb = Y[perm]
yb = yb.reshape(-1,batch,10)
for k in range(n//batch):
sess.run(train_step,feed_dict={x: xb[k], y_: yb[k], keep_prob: 0.5})
After initialization:
accu(xtc,yt)
0.0656
After $\sim 50$ epochs:
for p in range(10):
epoch(xlc,yl)
print(accu(xtc,yt))
0.9705 0.9705 0.971 0.9715 0.9707 0.9717 0.9675 0.9716 0.9702 0.9718
#save(6000)
restore()
accu(xtc,yt), accu(xlc,yl)
(0.97180003, 0.99703336)
We get $97\%$ accuray in the test set. (There is slight overtraining as the training set is almost perfectly memorized.)
We can now easily use the machine for prediction:
dig = xtc[1235]
figsize()
shdig(dig)
pred = sess.run(y, feed_dict={x: dig.reshape(1,-1), keep_prob: 1.0})
print(pred)
np.argmax(pred,axis=1)
[[ 4.16875388e-12 8.19726286e-13 9.36033138e-14 5.43449119e-07 7.14271697e-10 9.99995708e-01 1.57225681e-08 1.19578758e-09 1.10670202e-08 3.63725940e-06]]
array([5])
from sklearn.metrics import confusion_matrix
def maqpred(xs):
return np.argmax(sess.run(y, feed_dict={x: xs, keep_prob: 1.0}),axis=1)
def maqprobs(xs):
return sess.run(y, feed_dict={x: xs, keep_prob: 1.0})
ypred = maqpred(xtc.reshape(-1,784))
ypred
array([7, 2, 1, ..., 4, 5, 6])
ytrue = ct
ytrue
array([7, 2, 1, ..., 4, 5, 6])
confusion_matrix(ytrue, ypred)
array([[ 973, 0, 1, 0, 0, 1, 1, 1, 0, 3], [ 0, 1128, 0, 0, 1, 0, 0, 6, 0, 0], [ 3, 1, 1012, 1, 0, 0, 0, 9, 3, 3], [ 3, 0, 3, 996, 3, 1, 1, 1, 2, 0], [ 0, 1, 2, 0, 968, 1, 2, 1, 4, 3], [ 2, 0, 0, 2, 6, 868, 2, 3, 0, 9], [ 2, 0, 0, 0, 2, 3, 896, 17, 0, 38], [ 0, 1, 9, 1, 10, 2, 0, 1004, 0, 1], [ 2, 0, 3, 4, 3, 0, 1, 1, 958, 2], [ 0, 2, 1, 1, 4, 15, 66, 2, 3, 915]])
if False:
from tabulate import tabulate
print(tabulate(confusion_matrix(np.argmax(yt,axis=1), maqpred(xtc)), tablefmt='latex'))
mistakes = ypred != ytrue
mistakes.sum()
282
figsize(8,8)
plt.imshow(- np.pad(np.bmat([[ x.reshape(28,28) for x in xtc[mistakes][10*k:10*(k+1)] ]
for k in range(18)]),10,'constant'),'gray', interpolation= 'nearest');
plt.axis('off');
#save('errorscan')
figsize(8,8)
plt.imshow(- np.pad(np.bmat([[ x.reshape(28,28) for x in xt[mistakes][10*k:10*(k+1)] ]
for k in range(18)]),10,'constant'),'gray', interpolation= 'nearest');
plt.axis('off');
#save('errorsorig')
figsize(8,8)
plt.imshow(- np.pad(np.bmat([[ x.reshape(28,28) for x in xtc[np.logical_not(mistakes)][20*k:20*(k+1)] ]
for k in range(20)]),10,'constant'),'gray', interpolation= 'nearest');
plt.axis('off');
#save('noerrorscan')
from skimage import io
path = "http://robot.inf.um.es/material/va/images/"
view = io.imread(path+"handdigits.png")
figsize()
plt.imshow(view);
def findShapes(x, th=128, area=(100,1000)):
ret, gt = cv.threshold(cv.cvtColor(view,cv.COLOR_RGB2GRAY),th,255,cv.THRESH_BINARY_INV)
n,cc = cv.connectedComponents(gt.copy())
ok = [ cc==k for k in range(1,n) ]
def extract(mask):
thing = np.argwhere(mask)
(x1, y1), (x2, y2) = thing.min(0), thing.max(0)
box = mask[x1:x2+1,y1:y2+1].astype(np.float32)
return ((x1+x2)/2,(y1+y2)/2), box
detected = [ extract(s) for s in ok ]
return [ p for p in detected if area[0] < p[1].sum() < area[1] ]
detected = findShapes(view,125,(50,500))
print(len(detected))
20
figsize(12,4)
plt.imshow(-np.hstack([can.normalize(p,(100,100)) for _,p in detected]),'gray'); plt.axis('off');
#save('hand1')
plt.imshow(-np.hstack([only1Canonic(p,sz=(28,28),eps_w=11) for _,p in detected]),'gray'); plt.axis('off');
#save('hand2')
maqpred([only1Canonic(p,sz=(28,28),eps_w=11).ravel() for _,p in detected])
array([0, 1, 2, 9, 3, 8, 4, 7, 5, 6, 6, 5, 7, 8, 4, 9, 3, 2, 1, 0])
figsize()
plt.imshow(view)
for (x1,y1), box in detected:
[l] = maqpred([only1Canonic(box,sz=(28,28)).ravel()])
plt.annotate(l,(y1,x1),color='red',fontsize=14)
#save('hand3b')
for _, box in detected:
[l] = maqprobs([only1Canonic(box,sz=(28,28)).ravel()])
print(np.argmax(l), max(l/l.sum()))
0 1.0 1 1.0 2 0.999998 9 0.999696 3 1.0 8 1.0 4 0.994683 7 0.999999 5 0.999769 6 0.601738 6 0.971846 5 1.0 7 0.999997 8 1.0 4 0.999999 9 0.998966 3 1.0 2 1.0 1 0.989828 0 1.0
sum(maqpred(xtc[ct==6].reshape(-1,28*28)) == 9)
38
sum(maqpred(xtc[ct==9].reshape(-1,28*28)) == 6)
66
from umucv.htrans import tilt
from numpy.random import randint
def can1(x):
return only1Canonic(x,(28,28))
views = [(0,0)]+[(a,b) for a in range(0,360,30) for b in range(0,360,45)]
# f: fov (degrees, e.g. 30)
# t: tilt (degrees, e.g. 40)
# sz: desired size (default = same as input)
def mkVariations(f,t,x,sz=None):
if sz:
xn = can.normalize(x,sz)
else:
xn = x
return [can1(xn)]+[can1(tilt(f, a, t, b, xn)) for a in range(0,360,30) for b in range(0,360,45)]
def randomView(f,t,x,sz=None):
if sz:
xn = can.normalize(x,sz)
else:
xn = x
a,b = views[randint(len(views))]
return can1(tilt(f, a, t, b, xn))
# full expansion of test set
if False:
bigx = np.array([ mkVariations(30,40,x.reshape(28,28),(100,100)) for x in xtc[:5000]]).reshape(-1,28,28,1)
bigy = np.array([ [y for k in range(97)] for y in yt[:5000]]).reshape(-1,10)
bigc = np.argmax(bigy,axis=1)
print(bigx.shape)
print(bigy.shape)
print(bigc.shape)
accu(bigx,bigy)
Canonicalization of randomly chosen synthetic perspective views:
ranx = np.array([ randomView(30,40,x.reshape(28,28),(100,100)) for x in xtc]).reshape(-1,28,28,1)
accu(ranx,yt)
0.95679998
# custom notebook style
from IPython.display import HTML
HTML(open('nb1.css').read())