Notebook

In [414]:

# interactive tool, put on top so we can find it easily
ks = "abcd"
kwargs = {}
for i in range(4):
    kwargs["p"+ks[i]]=widgets.IntSliderWidget(min=0, max=SW if i%2 else SH, step=1, value=p[i])
    kwargs["q"+ks[i]]=widgets.IntSliderWidget(min=0, max=slide_w if i%2 else slide_h, step=1, value=q[i])
kwargs['blur_factorx']=widgets.IntSliderWidget(min=0, max=40, step=1, value=blur_factor)
i = interact(iicompare, **kwargs)

(126, 1, 443, 631) , (6, 12, 1125, 2000) , 12 0.973290029063

In [3]:

%matplotlib inline
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
try:
    import cv
    CAP_FRAME_COUNT = cv.CV_CAP_PROP_FRAME_COUNT
    CAP_FRAME_WIDTH  = cv.CV_CAP_PROP_FRAME_WIDTH
    CAP_FRAME_HEIGHT = cv.CV_CAP_PROP_FRAME_HEIGHT
    CAP_FPS = cv.CV_CAP_PROP_FPS
    CAP_POS_FRAMES = cv.CV_CAP_PROP_POS_FRAMES
except:
    CAP_FRAME_COUNT = cv2.CAP_PROP_FRAME_COUNT
    CAP_FRAME_WIDTH  = cv2.CAP_PROP_FRAME_WIDTH
    CAP_FRAME_HEIGHT = cv2.CAP_PROP_FRAME_HEIGHT
    CAP_FPS = cv2.CAP_PROP_FPS
    CAP_POS_FRAMES = cv2.CAP_PROP_POS_FRAMES
    
    print "no cv"
from IPython.html.widgets import interact, interactive, fixed
from IPython.display import clear_output, display, HTML
from IPython.html import widgets

In [4]:

NAME = "openstack"
fn_base = "%s/%s"%(NAME,NAME)
if  os.path.isfile(fn_base+".mp4"):
    fn = fn_base+".mp4"
elif os.path.isfile(fn_base+".avi"):
    fn = fn_base+".avi"
else:
    print "original video file does not exist"    
v2 = cv2.VideoCapture(fn)
SW, SH = v2.get(CAP_FRAME_WIDTH), v2.get(CAP_FRAME_HEIGHT)
FRAMES = v2.get(CAP_FRAME_COUNT)
v2.get(CAP_FPS), v2.get(CAP_FRAME_WIDTH), v2.get(CAP_FRAME_HEIGHT), v2.get(CAP_FRAME_COUNT)

original video file does not exist

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-dcdde810701a> in <module>()
      7 else:
      8     print "original video file does not exist"
----> 9 v2 = cv2.VideoCapture(fn)
     10 SW, SH = v2.get(CAP_FRAME_WIDTH), v2.get(CAP_FRAME_HEIGHT)
     11 FRAMES = v2.get(CAP_FRAME_COUNT)

NameError: name 'fn' is not defined

In [403]:

from io import BytesIO
import PIL
from IPython.display import display, Image

def display_img_array(ima, cvt=None, **kwargs):
    if cvt:
        ima = cv2.cvtColor(ima, cvt)
    im = PIL.Image.fromarray(ima)
    bio = BytesIO()
    im.save(bio, format='png')
    display(Image(bio.getvalue(), format='png', **kwargs))

In [404]:

frame = None
frame_n = 0
slide_n = 0
slide = None

In [405]:

# select frame
def select_frame(n):
    global frame, frame_n
    frame_n = n
    v2.set(CAP_POS_FRAMES, n)
    ret, frame = v2.read()
    display_img_array(frame, width=480, cvt = cv2.COLOR_BGR2RGB)
interact(select_frame, n=widgets.IntSliderWidget(min=0, max=FRAMES-1, step=1, value=frame_n))

In [406]:

def load_original_slides(name):
    original_slides = []
    i = 0
    text = widgets.TextWidget()
    display(text)
    while True:    
        img = cv2.imread("%s/%s-%d.png"%(name, name, i))        
        if img is None:
            break
        text.value = "%s, %d, %d"%(img.shape, len(original_slides),i)
        original_slides.append(img)
        i+=1
    return original_slides

In [407]:

def normalize(im):    
    im=cv2.cvtColor(im, cv2.CV_32F)
    im=cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    #im=cv2.equalizeHist(im)
    return im
original_slides = load_original_slides(NAME)
#normalized_slides = [cv2.blur(normalize(s), (11,11))  for s in original_slides]

In [408]:

# select slide
def select_slide(n):
    global slide, slide_n
    slide_n = n
    slide = original_slides[n]
    display_img_array(slide, width=480, cvt=cv2.COLOR_BGR2RGB)
interact(select_slide, n=widgets.IntSliderWidget(min=0, max=len(original_slides)-1, step=1, value=slide_n))

slide_h, slide_w = slide.shape[:2]
print slide_w, slide_h

In [368]:

p=(0,0, SH, SW)
q=(0,0,slide_h,slide_w)
blur_factor=16
p,q = (11, 135, 1080, 1753) , (0, 0, 2113, 2844) 
#p,q = (10, 159, 1080, 1750) , (0, 35, 2115, 2844) #ls
#p,q =  (10, 159, 1080, 1754) , (0, 39, 2112, 2844)   #graphtool

In [369]:

def gray2vector(img):
    v = img.reshape(-1).astype(float) 
    v = v - np.average(v) 
    return v/np.linalg.norm(v)
# interactive tool to find p,q
def i_compare(img1, img2, px, qx, blur_factorx):
    global p,q, blur_factor
    p = px
    q = qx
    blur_factor = blur_factorx
    gray1 = normalize(img1)[p[0]:p[2],p[1]:p[3]]    
    gray2 = cv2.blur(normalize(img2), (blur_factor,blur_factor)) [q[0]:q[2], q[1]:q[3]]
    vector1 = gray2vector(cv2.resize(gray1, (128,128)))
    vector2 = gray2vector(cv2.resize(gray2, (128,128)))                                
    score = np.dot(vector1, vector2)
    print p, ",", q, ",", blur_factor, score
    
    r1 = cv2.resize(gray1, (512,512), interpolation = cv2.INTER_CUBIC)
    r2 = cv2.resize(gray2, (512,512), interpolation = cv2.INTER_CUBIC)  
    img3 = cv2.addWeighted(r1, 0.5, r2, 0.5, 0)
    r3 = np.concatenate((r1[:256,:256],r2[:256,256:]), axis=1)
    r4 = np.concatenate((r2[256:,:256],r1[256:,256:]), axis=1)
    r5 = np.concatenate((r3,r4), axis=0)
    img5 = np.concatenate((r1[:256,:],r2[256:,:]), axis=0)
    out = np.concatenate((img3,r5), axis=1)    
    if 1:
        img6 = img1.copy()
        poly =  [(p[1],p[0]), (p[1],p[2]), (p[3],p[2]), (p[3],p[0])]
        cv2.polylines(img6, np.int32([poly]), True, (0,255,255), 3)
        display_img_array(img6, width=500, cvt=cv2.COLOR_BGR2RGB)
    display_img_array(out, width=800)

In [370]:

def iicompare(pa, pb, pc, pd, qa, qb,qc, qd, blur_factorx):
    i_compare(frame, original_slides[slide_n], (pa, pb, pc, pd), (qa, qb,qc, qd), blur_factorx)
ks = "abcd"
kwargs = {}
for i in range(4):
    kwargs["p"+ks[i]]=widgets.IntSliderWidget(min=0, max=SW if i%2 else SH, step=1, value=p[i])
    kwargs["q"+ks[i]]=widgets.IntSliderWidget(min=0, max=slide_w if i%2 else slide_h, step=1, value=q[i])
kwargs['blur_factorx']=widgets.IntSliderWidget(min=0, max=40, step=1, value=blur_factor)
i = interact(iicompare, **kwargs)

(11, 135, 1080, 1753) , (0, 0, 2113, 2844) , 16 0.0162748352803

In [409]:

# Find key points
#orb = cv2.ORB()
sift = cv2.SIFT()
img = original_slides[slide_n]
kp1, des1 = sift.detectAndCompute(frame, None)
kp2, des2 = sift.detectAndCompute(img, None)
k1 = cv2.drawKeypoints(frame, kp1)
display_img_array(k1,cvt=cv2.COLOR_BGR2RGB)
k2 = cv2.drawKeypoints(img, kp2)
display_img_array(k2,cvt=cv2.COLOR_BGR2RGB)

In [410]:

# Match Key points
if 1: # Brute Force Match
    #bf = cv2.BFMatcher(cv2.NORM_HAMMING) #, crossCheck=True)
    bf = cv2.BFMatcher(cv2.NORM_L2) #, crossCheck=True)
    matches = bf.knnMatch(des1, des2, k=2)
else: #Flann Match
    index_params= dict(algorithm = 0, tree=5)
    search_params = dict(checks=50) 
    flann = cv2.FlannBasedMatcher(index_params, search_params)
    matches = flann.knnMatch(des1, des2, k=2)

In [411]:

# Draw Matches
H1, W1 = frame.shape[:2]
slide_h, slide_w = H2,W2 = img.shape[:2]
#print img.shape, frame.shape
from random import randint
img2 = cv2.resize(img, ( 800, H2*800/W2))
NH1 = H1*800/W1
frame2 = cv2.resize(frame, (800, NH1))
outimg = np.concatenate((frame2, img2), axis=0)
def draw_match_line(pt1, pt2):
    draw1(pt1)
    draw2(pt2)
    pt1 = pt1[0]*800/W1, pt1[1]*800/W1
    pt2 = pt2[0]*800/W2, pt2[1]*800/W2+NH1
    poly =  [pt1, pt2]
    cv2.polylines(outimg, np.int32([poly]), True, (randint(0,255), randint(0,255), randint(0,255)), 1)
def draw1(pt):
    pt = np.float32(pt)*800/W1
    pt = tuple(np.int32(pt))
    cv2.circle(outimg, pt, 3, (0,0,255))
def draw2(pt2):
    pt2 = int(pt2[0]*800/W2), int(pt2[1]*800/W2+NH1)
    cv2.circle(outimg, pt2, 3, (0,0,255))
    #cv2.polylines(outimg, np.int32([poly]), True, (0,255,255), 3)
m2 = []
for x in matches:
    if len(x)==2 and x[0].distance> 0.6*x[1].distance:
        continue
    if len(x)==0:
        continue
    m2.append(x[0])
m2.sort(key=lambda x:x.distance)
for x in m2[:20]:
    #print x.queryIdx, x.trainIdx
    pt1 = kp1[x.queryIdx].pt
    pt2 = kp2[x.trainIdx].pt
    # print kp1[x.queryIdx].angle,  kp2[x.trainIdx].angle
    print pt1, pt2
    if abs(kp1[x.queryIdx].angle-kp2[x.trainIdx].angle)<30 or abs(kp1[x.queryIdx].angle-kp2[x.trainIdx].angle)>330:
        draw_match_line(pt1,pt2)
display_img_array(outimg, cvt=cv2.COLOR_BGR2RGB)

(161.99903869628906, 224.96372985839844) (518.3925170898438, 354.91278076171875)
(331.57733154296875, 225.7040557861328) (1053.552490234375, 356.93701171875)
(189.8707733154297, 267.4158935546875) (611.198486328125, 505.308349609375)
(276.41949462890625, 225.6761474609375) (881.190673828125, 356.9822082519531)
(214.9832763671875, 225.5186767578125) (686.2059936523438, 356.61883544921875)
(214.6118927001953, 255.2736053466797) (687.6724243164062, 461.0347595214844)
(185.77655029296875, 281.2881774902344) (594.4990234375, 553.4267578125)
(241.54348754882812, 297.7165832519531) (770.9839477539062, 610.8389282226562)
(242.3095703125, 323.9939270019531) (773.7705078125, 704.0308837890625)
(203.71231079101562, 338.2104797363281) (658.8325805664062, 753.5855712890625)
(302.68109130859375, 224.83151245117188) (960.25927734375, 354.49188232421875)
(187.41436767578125, 295.66485595703125) (601.0609130859375, 602.9800415039062)
(107.53614807128906, 239.78465270996094) (351.013427734375, 406.6133117675781)
(431.9972839355469, 225.6212158203125) (1368.0989990234375, 356.9618225097656)
(203.71231079101562, 338.2104797363281) (658.8325805664062, 753.5855712890625)
(289.3377685546875, 236.04367065429688) (918.9222412109375, 391.6338806152344)
(185.77655029296875, 281.2881774902344) (594.4990234375, 553.4267578125)
(270.7696838378906, 296.1000061035156) (866.5843505859375, 606.5394287109375)
(415.5755615234375, 338.00750732421875) (1317.5523681640625, 752.4063110351562)
(174.84124755859375, 235.9998779296875) (561.5839233398438, 393.2644958496094)

In [412]:

# Use Matches to estimate p,q 
a = None
b = None
for x in m2[:20]:
    pt1 = kp1[x.queryIdx].pt
    pt2 = kp2[x.trainIdx].pt    
    if abs(kp1[x.queryIdx].angle-kp2[x.trainIdx].angle)<30 or abs(kp1[x.queryIdx].angle-kp2[x.trainIdx].angle)>330:
        #print pt1, pt2
        v = np.array(pt2)
        if a is None:
            a = v
        else:        
            a = np.vstack([a, v])        
        v = np.array([pt1[1], pt1[0]])
        if b is None:
            b = v
        else:
            b = np.vstack([b, v])            
a = np.hstack([a, np.ones((a.shape[0], 1))])
r = np.linalg.lstsq(a,b)[0]
#print a
#print b
def compute_p(r,q):
    est = np.dot (np.array([[q[1],q[0],1], (q[3],q[2], 1)]), r)
    return tuple(est.astype(int)[0])+tuple(est.astype(int)[1])  

def estimate0(img1, img2, r, q):
    p = compute_p(r, q)
    gray1 = normalize(img1)[max(0,p[0]):p[2],max(0,p[1]):p[3]]    
    gray2 = cv2.blur(normalize(img2), (blur_factor,blur_factor)) [max(0,q[0]):q[2], max(0,q[1]):q[3]]
    L = 256
    vector1 = gray2vector(cv2.resize(gray1, (L,L)))
    vector2 = gray2vector(cv2.resize(gray2, (L,L)))   
    score = np.dot(vector1, vector2)
    #print p, ",", q, ",", blur_factor, score, n1, n2
    return p, score

def estimate(img1, img2, r):
    q = [0,0,slide_h, slide_w]
    while True:
        p = compute_p(r, q)
        if p[0]<0:
            q[0]+=1
        elif p[1]<0:
            q[1]+=1
        elif p[2]>H1:
            q[2]-=1
        elif p[3]>W1:
            q[3]-=1
        else:
            break        
    best_q = tuple(q)
    print r,q 
    best_p, best_score = estimate0(img1, img2, r, q)
    #print best_p, best_q, best_score
    for i in range(4):
        while True:
            q = list(best_q)
            p = best_p
            while  compute_p(r,q)[i] == best_p[i]:
                if i < 2:                
                    q[i]+=1
                else:
                    q[i]-=1               
            p, score = estimate0(img1, img2, r, q)            
            if score > best_score:
                best_score, best_p, best_q = score, p, tuple(q)                
            else:
                print best_score, best_p,best_q
                break
    return best_p, best_q
        
p,q =  estimate(frame, img, r)
q2 = [0,0,slide_h, slide_w]

while True:
    p2 = compute_p(r, q2)
    if p2[0]<0:
        q2[0]+=1
    elif p2[1]<0:
        q2[1]+=1
    elif p2[2]>H1:
        q2[2]-=1
    elif p2[3]>W1:
        q2[3]-=1
    else:
        break
        
print "p1,q1, blur_factor=%s, %s, %s"%(p,q, blur_factor)
print "p2,q2=%s, %s"%(p2,q2)

[[  1.19798445e-04   3.18916565e-01]
 [  2.83634885e-01  -3.54573670e-03]
 [  1.24323924e+02  -2.69111058e+00]] [0, 6, 1125, 2000]
0.935130642066 (126, 0, 443, 631) (6, 6, 1125, 2000)
0.945250312301 (126, 1, 443, 631) (6, 12, 1125, 2000)
0.945250312301 (126, 1, 443, 631) (6, 12, 1125, 2000)
0.945250312301 (126, 1, 443, 631) (6, 12, 1125, 2000)
p1,q1, blur_factor=(126, 1, 443, 631), (6, 12, 1125, 2000), 14
p2,q2=(124, 0, 443, 631), [0, 6, 1125, 2000]

In [315]:

print a
print b
print r

[[  1.02217798e+03   1.10026440e+03   1.00000000e+00]
 [  1.16708862e+03   1.09134570e+03   1.00000000e+00]
 [  1.91477148e+03   1.11786914e+03   1.00000000e+00]
 [  9.24786865e+02   1.10114709e+03   1.00000000e+00]
 [  1.16344507e+03   1.09387708e+03   1.00000000e+00]
 [  1.04745715e+03   1.06944519e+03   1.00000000e+00]
 [  1.80476355e+03   1.05811792e+03   1.00000000e+00]
 [  1.80867627e+03   1.14804822e+03   1.00000000e+00]
 [  1.19675635e+03   1.11490979e+03   1.00000000e+00]
 [  1.04745715e+03   1.06944519e+03   1.00000000e+00]
 [  1.13015344e+03   1.11507751e+03   1.00000000e+00]
 [  1.10480042e+03   1.07255151e+03   1.00000000e+00]
 [  1.72565710e+03   1.10005322e+03   1.00000000e+00]
 [  1.02930823e+03   1.10832776e+03   1.00000000e+00]
 [  1.16344507e+03   1.09387708e+03   1.00000000e+00]
 [  1.13015344e+03   1.11507751e+03   1.00000000e+00]
 [  1.65626440e+03   1.13446399e+03   1.00000000e+00]
 [  1.04783923e+03   1.13166785e+03   1.00000000e+00]
 [  1.10579199e+03   1.16299585e+03   1.00000000e+00]
 [  1.10579199e+03   1.16299585e+03   1.00000000e+00]]
[[ 564.23040771  508.11703491]
 [ 559.6427002   582.16644287]
 [ 570.67175293  960.28912354]
 [ 564.472229    458.94497681]
 [ 560.58129883  579.92053223]
 [ 550.10772705  520.59680176]
 [ 547.01080322  903.74041748]
 [ 583.57598877  905.43280029]
 [ 571.074646    596.10461426]
 [ 550.10772705  520.59680176]
 [ 571.00958252  563.27270508]
 [ 552.94390869  551.15997314]
 [ 564.16503906  864.84680176]
 [ 570.29431152  513.76660156]
 [ 560.58129883  579.92053223]
 [ 570.97912598  563.41577148]
 [ 579.70501709  831.00036621]
 [ 578.63458252  520.81652832]
 [ 591.18731689  551.18096924]
 [ 591.01019287  551.35614014]]
[[ -4.98581195e-04   5.05893600e-01]
 [  4.31578739e-01   5.78007709e-03]
 [  9.00070427e+01  -1.49415219e+01]]

In [88]:

# Compare rendered video with original one
_NAME = "graphtool"
_v1 = cv2.VideoCapture("%s/better_%s.avi"%(_NAME, _NAME))
_v2 = cv2.VideoCapture("%s/%s.avi"%(_NAME, _NAME))
_SW, _SH = _v2.get(cv.CV_CAP_PROP_FRAME_WIDTH), _v2.get(cv.CV_CAP_PROP_FRAME_HEIGHT)
_FRAMES = _v2.get(cv.CV_CAP_PROP_FRAME_COUNT)
def compare_frame(n):    
    _v1.set(cv.CV_CAP_PROP_POS_FRAMES, n)
    _v2.set(cv.CV_CAP_PROP_POS_FRAMES, n)
    frame1, frame2 = _v1.read()[1],  _v2.read()[1]
    display_img_array(np.concatenate((frame1, frame2), axis=1), width=1600, cvt=cv.CV_BGR2RGB)
interact(compare_frame, n=widgets.IntSliderWidget(min=0, max=_FRAMES-1, step=1, value=0))

In [167]:

# Try pyopencl
def test1(img1, img2):    
    blur_factor = 10
    gray1 = normalize(img1)[p[0]:p[2],p[1]:p[3]]    
    gray2 = cv2.blur(normalize(img2), (blur_factor,blur_factor)) [q[0]:q[2], q[1]:q[3]]
    L = 256
    vector1 = gray2vector(cv2.resize(gray1, (L,L)))
    vector2 = gray2vector(cv2.resize(gray2, (L,L)))   
    score = np.dot(vector1, vector2)
    #print p, ",", q, ",", blur_factor, score, n1, n2
%timeit test1(frame, original_slides[slide_n])
%load_ext pyopencl.ipython_ext
import pyopencl as cl
from pyopencl.reduction import ReductionKernel
import pyopencl.array
ctx = cl.create_some_context(interactive=True)
queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
krnl = ReductionKernel(ctx, np.float32, neutral="0",
        reduce_expr="a+b", map_expr="x[i]*y[i]",
        arguments="__global float *x, __global float *y")
def test2(img1, img2):
    blur_factor = 10
    gray1 = normalize(img1)[p[0]:p[2],p[1]:p[3]]
    gray2 = cv2.blur(normalize(img2), (blur_factor,blur_factor)) [q[0]:q[2], q[1]:q[3]]
    L = 256
    vector1 = gray2vector(cv2.resize(gray1, (L,L)))
    vector2 = gray2vector(cv2.resize(gray2, (L,L)))
    clv1 = cl.array.to_device(queue, vector1)
    clv2 = cl.array.to_device(queue, vector2)    
    #score = krnl(clv1, clv2, queue=queue).get()
    score = cl.array.dot(clv1, clv2, queue=queue)
    #print p, ",", q, ",", blur_factor, score, n1, n2
%timeit test2(frame, original_slides[slide_n])

10 loops, best of 3: 107 ms per loop