MAT 201A Winter 2016 HW 3 Qiaodong Cui
%pylab inline
rcParams['figure.figsize'] = (10, 10) #wide graphs by default
from __future__ import print_function
from __future__ import division
from scipy.io import wavfile
%matplotlib inline
Populating the interactive namespace from numpy and matplotlib
/usr/local/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment. warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')
img = imread("mnist.png") #load and display the image
figure(figsize = (20,40))
imshow(img, cmap=cm.gray)
img.shape
(420, 980)
dig0 = imread("0.png") #load individual digit
dig1 = imread("1.png")
dig2 = imread("2.png")
dig3 = imread("3.png")
#conver image to 2d array
def convert(dig):
return (dig[:,:,0] + dig[:,:,1] + dig[:,:,2] + dig[:,:,3])/4.0
dig0 = convert(dig0)
dig1 = convert(dig1)
dig2 = convert(dig2)
dig3 = convert(dig3)
subplot(141)
imshow(dig0, cmap=cm.gray)
subplot(142)
imshow(dig1, cmap=cm.gray)
subplot(143)
imshow(dig2, cmap=cm.gray)
subplot(144)
imshow(dig3, cmap=cm.gray)
dig0.shape
(24, 24)
#do cross corrilation for digit zero
from scipy.signal import correlate2d
cc = correlate2d(img, dig0)
figure(figsize = (20,40))
imshow(cc)
<matplotlib.image.AxesImage at 0x113d02510>
#show the result
figure(figsize = (20,40))
imshow(where(cc > 100, 1, 0), interpolation='nearest', cmap=cm.gray)
<matplotlib.image.AxesImage at 0x11307e090>
#blend the result with the original data, better visualze the result
cc.shape[0]
blend = zeros((cc.shape[0],cc.shape[1], 3))
cc = where(cc > 100, 1, 0)
blend[12:img.shape[0]+12,12:img.shape[1]+12,0] = img
blend[:,:,1] = cc #green channel is the cross correlation result
figure(figsize = (20,40))
imshow(blend) #show blended reslult, as we can see, the green dot is near the digit zero, however there is some
# misclassify
<matplotlib.image.AxesImage at 0x10ab36ad0>
# do the cross correlation using image and target, wei is the input parameter to threshold the displayed value.
#
def Blend_show(img, target, wei):
from scipy.signal import correlate2d
target = where(target > 0.8, 1, -0.5) # the minus value can be adjusted, add minus vaule for pixesl fall out of target
cc = correlate2d(img, target)
blend = zeros((cc.shape[0],cc.shape[1], 3))
cc = where(cc > wei, 1, 0)
blend[12:img.shape[0]+12,12:img.shape[1]+12,0] = img
blend[:,:,1] = cc
figure(figsize = (20,40))
imshow(blend)
Blend_show(img, dig0, 60) #new cross correlation result for zero
Blend_show(img, dig1, 7) # cross correlation for digit 1 , this is a good one, however, it is not able to detect 1 with
#slight rotation.
Blend_show(img, dig2, 40) #cross correlation for digit 2, observe the most misclassified digit is 7
Blend_show(img, dig3, 40) #cross correlation for digit 3, some 3 is not classified, because they are to 'thin', or rotated
In all, the cross correlation is able to do some pattern recognization. However it performs poorly, as shows in the example. One potential reason it is not invariant to rotation, as slightly rotated digit is not recoginzed. Since it performs single multiply and addition, across the region. It's only able to recoginze the digit that stays most same to the original image. This kind of recognition is only invariant to translation, make it unable to distinguish external 'noise' (like slightly rotation) from the internal structure of the digit. Other machine learning method like CNN or SVM performs way better.