In [1]:
%matplotlib inline
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
import cv
import sys
try:
    import cv
    CAP_FRAME_COUNT = cv.CV_CAP_PROP_FRAME_COUNT
    CAP_FRAME_WIDTH  = cv.CV_CAP_PROP_FRAME_WIDTH
    CAP_FRAME_HEIGHT = cv.CV_CAP_PROP_FRAME_HEIGHT
    CAP_FPS = cv.CV_CAP_PROP_FPS
    CAP_POS_FRAMES = cv.CV_CAP_PROP_POS_FRAMES
except:
    CAP_FRAME_COUNT = cv2.CAP_PROP_FRAME_COUNT
    CAP_FRAME_WIDTH  = cv2.CAP_PROP_FRAME_WIDTH
    CAP_FRAME_HEIGHT = cv2.CAP_PROP_FRAME_HEIGHT
    CAP_FPS = cv2.CAP_PROP_FPS
    CAP_POS_FRAMES = cv2.CAP_PROP_POS_FRAMES
In [2]:
from IPython.html.widgets import interact, interactive, fixed
from IPython.display import clear_output, display, HTML
from IPython.html import widgets
In [3]:
from io import BytesIO
import PIL
from IPython.display import display, Image
def img_to_png(ima, cvt=None):
    if cvt:
        ima = cv2.cvtColor(ima, cvt)
    im = PIL.Image.fromarray(ima)
    bio = BytesIO()
    im.save(bio, format='png')
    return bio.getvalue()
def display_img_array(ima, cvt=None, **kwargs):
    display(Image(img_to_png(ima, cvt=cvt) , format='png', **kwargs))
In [4]:
def normalize(im):    
    im=cv2.cvtColor(im, cv2.CV_32F)
    im=cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    #im=cv2.equalizeHist(im)
    return im
In [ ]:
def gray2vector(img):
    v = img.reshape(-1).astype(float) 
    v = v - np.average(v) 
    return v/np.linalg.norm(v)
In [ ]:
def diff_i(gray, i):
    v1 = gray2vector(gray)
    return np.dot(v1, slides_v[i])

def compare_slides(gray, slides_v):
    v1 = gray2vector(gray)
    r = np.dot(slides_v, v1)
    i = np.argmax(r)
    return r[i], i

def compare_absdiff(gray):
    return sorted( (cv2.absdiff(slides[i], gray).sum() , i)  for i in range(len(slides)))
In [57]:
def frame_to_time(f, r):
    s = f/r
    m = int(s/60)
    s = s -60*m
    return "%d:%04.1f"%(m,s)
def sync_video( fn, p, slides, slides_v, threshold=0.8, step=10, dark=1000, STOP=-1, debug=False):
    cap = cv2.VideoCapture(fn)
    frame_rate = cap.get(cv.CV_CAP_PROP_FPS)
    print "frame_rate", frame_rate
    num_of_frames = cap.get(cv.CV_CAP_PROP_FRAME_COUNT)
    frame_index =-1
    last_slide = -1
    last_start = -1
    frame_list = []
    progress = widgets.IntProgressWidget(min=0, max = num_of_frames - 1, value=0)
    progress_text = widgets.TextWidget()
    progress.set_css('background', 'black')
    display(progress)
    display(progress_text)   
    img_widget = widgets.ImageWidget()
    display(img_widget)
    slide_h, slide_w = original_slides[0].shape[:2]
    while cap.isOpened():
        frame_index +=1
        ret, frame = cap.read()
        if not ret:
            break        
        if frame_index%step ==0:
            if STOP!=-1 and frame_index > STOP:
                break                        
            gray = cv2.resize(normalize(frame)[p[0]:p[2],p[1]:p[3]], (256,256))        
            darklevel = np.linalg.norm(gray.reshape(-1).astype(float)) 
            if darklevel < dark:
                # too dark                
                this_slide, v, i = -1, -1, 0
            else:            
                v, i = compare_slides(gray, slides_v)
                this_slide = i if v > threshold else -1 
            if debug:
                if i>=0:
                    frame2 = frame.copy()            
                    frame2[p[0]:p[2], p[1]:p[3]] = cv2.resize(original_slides[i][q[0]:q[2], q[1]:q[3]], (p[3]-p[1], p[2]-p[0]))
                    outp = np.concatenate( (frame2, cv2.addWeighted(frame,0.5,frame2, 0.5,0), frame), axis = 1)
                    display_img_array(outp, width=1200, cvt=cv2.COLOR_BGR2RGB)
                else:
                    display_img_array(frame, width=400, cvt=cv2.COLOR_BGR2RGB)
                print v,i
            if frame_index%100 ==0:                
                frame2 = original_slides[this_slide] if this_slide >=0 else np.zeros_like(original_slides[0])
                frame2 = cv2.resize(frame2, (slide_w*frame.shape[0]/slide_h, frame.shape[0]))                                
                outp = np.concatenate( (frame, frame2), axis = 1)
                img_widget.value = img_to_png(outp, cvt=cv2.COLOR_BGR2RGB)
                img_widget.height=600
                progress.value = frame_index            
                progress_text.value = "%d/%d (%.1f)"%(frame_index, num_of_frames, 100.0*frame_index/num_of_frames)
                if i >=0:
                    progress_text.value += " match: %d, %s"%(i, v)
                    
            if this_slide != last_slide:
                # update
                frame_list.append( (last_start,  frame_index-1, last_slide))
                
                # display information                
                if last_slide >=0:                
                    fl = frame_list[-1]
                    t1, t2 = frame_to_time(fl[0], frame_rate), frame_to_time(fl[1], frame_rate)
                    print fl, "=(%s, %s)"%(t1,t2), "v=%f"%v, "dark=%d"%darklevel
                last_start = frame_index                            
        last_slide = this_slide
    # last update
    frame_list.append( (last_start,  frame_index-1, last_slide))
    cap.release()
    return frame_list
In [70]:
def write_file(fn, p,q, outfn, original_slides, sync_result, M=20, fourcc="XVID", SKIP=None, WH=None):    
    # W,H = 1920, 1080
    cap = cv2.VideoCapture(fn)
    SW, SH = int(cap.get(cv.CV_CAP_PROP_FRAME_WIDTH)), int(cap.get(cv.CV_CAP_PROP_FRAME_HEIGHT))
    if WH==None:
        W, H = SW, SH
    else:
        W,H = WH
    print "(W,H)", W,H
    sys.stdout.flush()
    p2 = ( p[0]*H/SH, p[1]*W/SW, p[2]*H/SH, p[3]*W/SW)
    pw, ph = p2[3]-p2[1], p2[2]-p2[0]
    print p2, q
    fourcc = cv.FOURCC(*fourcc)
    num_of_frames = cap.get(cv.CV_CAP_PROP_FRAME_COUNT)
    frame_rate = cap.get(cv.CV_CAP_PROP_FPS)
    print "frame_rate", frame_rate
    sys.stdout.flush()
    out = cv2.VideoWriter(outfn, fourcc, frame_rate, (W, H))
    frame_index =-1
    last_slide = -1
    last_start = -1
    frame_list = []
    result_index = 0
    progress = widgets.IntProgressWidget(min=0, max = num_of_frames - 1, value=0)
    progress_text = widgets.TextWidget()
    progress.set_css('background', 'black')
    display(progress)
    display(progress_text)
    img_widget = widgets.ImageWidget()
    display(img_widget)
    while cap.isOpened():
        frame_index +=1
        ret, frame = cap.read()
        if not ret:
            break
        while result_index < len(sync_result) and sync_result[result_index][1] < frame_index:
            result_index += 1            
        the_slide = (-1,-1,-1) if result_index >= len(sync_result) else sync_result[result_index]
        if SKIP and the_slide[2] in SKIP:
            the_slide = (-1,-1,-1)
        original_frame = cv2.resize(frame, (W, H), interpolation = cv2.INTER_CUBIC)
        if the_slide[2] >=0 and the_slide[1]-the_slide[0]>3*M:
            slide = original_slides[the_slide[2]]
            inner_frame = cv2.resize(slide[q[0]:q[2], q[1]:q[3]],  (pw, ph), interpolation = cv2.INTER_CUBIC )
            d = min(frame_index-the_slide[0], the_slide[1]-frame_index)
            out_frame = original_frame.copy()
            out_frame[p2[0]:p2[2], p2[1]:p2[3]] = inner_frame
            if d < M:
                out_frame = cv2.addWeighted(out_frame, d*1.0/M , original_frame, 1- d*1.0/M, 0)
        else:
            out_frame = original_frame
        out.write(out_frame)
        if frame_index%100 ==0:
            progress.value = frame_index            
            progress_text.value = "%d/%d (%.1f)"%(frame_index, num_of_frames, 100.0*frame_index/num_of_frames)
            disp_frame = np.concatenate((out_frame[:, :W/2], original_frame[:,W/2:]), axis=1)            
            img_widget.value = img_to_png(disp_frame, cvt=cv2.COLOR_BGR2RGB)  
            img_widget.width = "800"
    cap.release()
    out.release()
            
In [ ]:
import os.path
def load_original_slides(name):
    original_slides = []
    i = 0
    #progress = widgets.IntProgressWidget(min=0, max = num_of_frames - 1, value=0)
    progress_text = widgets.TextWidget()
    #progress.set_css('background', 'black')
    #display(progress)
    display(progress_text)
    while True:         
        progress_text.value = "loading %d"%i
        img = cv2.imread("%s/%s-%d.png"%(name, name, i))
        if img is None:
            break        
        original_slides.append(img)
        i+=1
    print "load original slides", len(original_slides)
    return original_slides
def prepare_slides(original_slides, q, blur_factor):
    normalized_slides = (cv2.blur(normalize(s), (blur_factor, blur_factor))  for s in original_slides)
    slides = [cv2.resize(s[q[0]:q[2], q[1]:q[3]], (256,256), interpolation = cv2.INTER_CUBIC)  for s in normalized_slides]
    slides_v = np.array([gray2vector(s) for s in slides])
    print "slides prepared"
    return slides, slides_v
    
In [ ]:
original_slides, original_slides_name, result = None, None, None
In [73]:
def auto_sync(NAME, p1, q1, blur_factor, p2=None, q2=None,  threshold=0.8, step=10, dark=1500, STOP=-1, debug=False, 
              SKIP=None, M=20, PASS=[3],  fourcc="XVID",  EXT="avi", WH=None):
    global original_slides, result, original_slides_name
    print "NAME=", NAME       
    if 0 in PASS or not os.path.isfile("%s/%s-0.png"%(NAME,NAME)) : # 0 Extract PDF
        print "extract slides"
        sys.stdout.flush()
        print os.system("convert -density 200 %s/%s.pdf  %s/%s.png"%(NAME,NAME,NAME,NAME))
    result_slides_name = original_slides_name
    if 1 in PASS or original_slides_name != NAME:
        print "load original png"
        original_slides = load_original_slides(NAME)
        original_slides_name = NAME    
    fn_base = "%s/%s"%(NAME,NAME)    
    if  os.path.isfile(fn_base+".mp4"):
        fn = fn_base+".mp4"
    elif os.path.isfile(fn_base+".avi"):
        fn = fn_base+".avi"
    else:
        print "original video file does not exist"
        return
    outfn = "%s/better_%s.%s"%(NAME, NAME, EXT)
        

    if 2 in PASS or result_slides_name != NAME: # Sync Video and Slides
        print "prepare slides"
        slides, slides_v = prepare_slides(original_slides, q1, blur_factor)        
        print "syncing video"
        result = sync_video(fn, p1, slides, slides_v, threshold=threshold, step=step, dark=dark, STOP=STOP, debug=debug)
        print "sync_video done"   
        
    if p2 is None:   # full screen
        p2 = p1
    if q2 is None:  # full screen
        q2 = q1
        
    if 3 in PASS or original_slides_name != NAME:
        print "start writing and converting"
        TEMP_OUT = "temp_out."+EXT
        write_file(fn, p1, q1, TEMP_OUT, original_slides, result, M=M, fourcc=fourcc, SKIP=SKIP, WH=WH)
        print "write done"
        sys.stdout.flush()
        retcode = os.system("avconv -y -i %s -i %s -map 0:v -map 1:a -c:v copy -c:a copy %s"%(TEMP_OUT, fn, outfn))
        print "covert done", retcode
In [ ]:
p1,q1, blur_factor = (10, 160, 1080, 1754) , (0, 40, 2112, 2844) , 10 
p2, q2 = p1, q1
auto_sync("tulip",  p1,q1, blur_factor, p2,q2, threshold=0.9, M=5, fourcc="x264", EXT="mp4", SKIP=[28], PASS=[2])
In [ ]:
p1,q1,blur_factor = (10, 159, 1080, 1754) , (0, 39, 2112, 2844) , 16
p2, q2 =  (10, 131, 1080, 1750) , (0, 0, 2115, 2844),
auto_sync("graphtool", p1,q1, blur_factor, p2, q2, threshold=0.7, SKIP=[7, 90, 91,92,93,94,95,96,97], PASS=[2])
In [ ]:
p1,q1, blur_factor = (10, 159, 1080, 1750) , (0, 35, 2115, 2844), 16
p2,q2 = (10, 138, 1080, 1750) , (0, 0, 2115, 2844) # test abcdefg
auto_sync("ls",  p1,q1, blur_factor, p2,q2, threshold=0.8, SKIP=[4,35,36] , PASS=[2])
In [ ]:
p1,q1, blur_factor = (211, 281, 958, 1723) , (0, 68, 2133, 2838) , 18
p2,q2 = None, None 
auto_sync("fabric",  p1,q1, blur_factor, p2,q2, threshold=0.8, SKIP=[19], M=40)
In [ ]:
p1,q1, blur_factor = (145, 160, 954, 1751) , (0, 27, 1125, 2000) , 12
p2, q2 = None, None
auto_sync("vote",  p1,q1, blur_factor, p2,q2, threshold=0.75, M=2)
In [ ]:
p1,q1, blur_factor = (11, 144, 1080, 1752) , (0, 0, 1485, 2000) , 16
p2, q2 = p1, q1
auto_sync("mezz",  p1,q1, blur_factor, p2,q2, threshold=0.9, M=20)
In [ ]:
p1,q1, blur_factor = (11, 135, 1080, 1753) , (0, 0, 2113, 2844) , 16
p2, q2 = p1, q1
auto_sync("summly",  p1,q1, blur_factor, p2,q2, threshold=0.935, M=20)
In [ ]:
p1,q1, blur_factor = (19, 160, 1067, 1672) , (0, 80, 2070, 2844), 36
p2, q2 = p1, q1
auto_sync("StreetVoice",  p1,q1, blur_factor, p2,q2, threshold=0.5, M=20)
In [ ]:
p1,q1, blur_factor = (19, 108, 1080, 1688) , (0, 0, 2084, 2844), 26
p2, q2 = p1, q1
auto_sync("grs",  p1,q1, blur_factor, p2,q2, threshold=0.7, M=20)
In [ ]:
p1,q1, blur_factor=(73, 7, 1028, 1429), (8, 32, 2133, 2842), 13
p2,q2=(69, 0, 1028, 1430), [0, 17, 2133, 2844]
auto_sync("dmhs",  p1,q1, blur_factor, p2,q2, threshold=0.6, M=20, SKIP=[0, 6, 17], PASS=[3])
In [ ]:
p1,q1, blur_factor=(87, 5, 404, 633), (9, 28, 1125, 2000), 14
p2,q2=(84, 0, 404, 633), [0, 9, 1125, 2000]
auto_sync("present",  p1,q1, blur_factor, p2,q2, threshold=0.7, M=20, WH=(1440,1080), PASS=[2,3])
In [ ]:
p1,q1, blur_factor=(33, 4, 453, 635), (8, 30, 2119, 2844), 18
p2,q2=(31, 0, 456, 635), [0, 8, 2133, 2844]
auto_sync("hai",  p1,q1, blur_factor, p2,q2, threshold=0.7, M=20, WH=(1440,1080))
In [76]:
p1,q1, blur_factor=(34, 2, 451, 629), (4, 8, 1500, 2000), 14
p2,q2=(32, 0, 451, 629), [0, 0, 1500, 2000]
auto_sync("vim",  p1,q1, blur_factor, p2,q2, threshold=0.8, M=20, WH=(1440,1080))
NAME= vim
load original png
load original slides 17
prepare slides
slides prepared
syncing video
frame_rate 29.97002997
(510, 1489, 0) =(0:17.0, 0:49.7) v=0.880750 dark=10112
(1490, 2859, 1) =(0:49.7, 1:35.4) v=0.874783 dark=7511
(2860, 3809, 2) =(1:35.4, 2:07.1) v=0.992393 dark=40593
(3810, 4639, 3) =(2:07.1, 2:34.8) v=0.880929 dark=9820
(4640, 5879, 4) =(2:34.8, 3:16.2) v=0.874664 dark=9693
(5880, 6299, 5) =(3:16.2, 3:30.2) v=0.314825 dark=27658
(9020, 9049, 5) =(5:01.0, 5:01.9) v=0.894895 dark=8972
(9050, 9919, 6) =(5:02.0, 5:31.0) v=0.314081 dark=27138
(10510, 10569, 6) =(5:50.7, 5:52.7) v=0.891409 dark=9972
(10570, 11779, 7) =(5:52.7, 6:33.0) v=0.312722 dark=26447
(15110, 15209, 7) =(8:24.2, 8:27.5) v=0.314112 dark=26469
(17290, 17909, 7) =(9:36.9, 9:57.6) v=0.318960 dark=24984
(19270, 19319, 7) =(10:43.0, 10:44.6) v=0.886876 dark=8421
(19320, 19349, 8) =(10:44.6, 10:45.6) v=0.890548 dark=9772
(19350, 20189, 7) =(10:45.6, 11:13.6) v=0.886495 dark=8399
(20190, 20369, 8) =(11:13.7, 11:19.6) v=0.674841 dark=7892
(20380, 20689, 8) =(11:20.0, 11:30.3) v=0.300560 dark=26469
(21740, 22079, 8) =(12:05.4, 12:16.7) v=0.882859 dark=8892
(22080, 22389, 9) =(12:16.7, 12:27.0) v=0.314180 dark=26330
(23570, 23749, 9) =(13:06.5, 13:12.4) v=0.282210 dark=22812
(24200, 24289, 9) =(13:27.5, 13:30.4) v=0.332195 dark=24593
(24850, 24909, 9) =(13:49.2, 13:51.1) v=0.884287 dark=8687
(24910, 25269, 10) =(13:51.2, 14:03.1) v=0.312524 dark=26418
(26450, 26489, 10) =(14:42.5, 14:43.8) v=0.236490 dark=11876
(26630, 26639, 9) =(14:48.6, 14:48.9) v=0.720991 dark=7883
(26650, 27539, 10) =(14:49.2, 15:18.9) v=0.670329 dark=7777
(27550, 27689, 10) =(15:19.3, 15:23.9) v=0.884074 dark=7866
(27690, 27899, 11) =(15:23.9, 15:30.9) v=0.885193 dark=8865
(27900, 27949, 10) =(15:30.9, 15:32.6) v=0.883748 dark=7886
(27950, 28209, 11) =(15:32.6, 15:41.2) v=0.352887 dark=13769
(29280, 29289, 11) =(16:17.0, 16:17.3) v=0.323675 dark=12192
(29440, 31019, 11) =(16:22.3, 17:15.0) v=0.883191 dark=8352
(31020, 31909, 12) =(17:15.0, 17:44.7) v=0.271830 dark=26401
(32770, 33209, 12) =(18:13.4, 18:28.1) v=0.885606 dark=8130
(33210, 33759, 13) =(18:28.1, 18:46.4) v=0.313255 dark=27302
(35280, 35349, 13) =(19:37.2, 19:39.5) v=0.880311 dark=9045
(35350, 35369, 14) =(19:39.5, 19:40.1) v=0.883706 dark=7905
(35370, 35509, 13) =(19:40.2, 19:44.8) v=0.296882 dark=21428
(35850, 35899, 13) =(19:56.2, 19:57.8) v=0.879523 dark=9066
(35900, 35949, 14) =(19:57.9, 19:59.5) v=0.884199 dark=7829
(35950, 35959, 13) =(19:59.5, 19:59.8) v=0.879959 dark=8882
(35960, 36919, 14) =(19:59.9, 20:31.9) v=0.306256 dark=24196
(37580, 37589, 14) =(20:53.9, 20:54.2) v=0.288880 dark=10194
(39230, 39269, 14) =(21:49.0, 21:50.3) v=0.340925 dark=11169
(42970, 42999, 14) =(23:53.8, 23:54.7) v=0.477629 dark=8324
(43100, 43119, 14) =(23:58.1, 23:58.7) v=0.223015 dark=2467
(43130, 43249, 15) =(23:59.1, 24:03.1) v=0.299696 dark=11126
(46260, 46309, 15) =(25:43.5, 25:45.2) v=0.303056 dark=12248
(47900, 47939, 15) =(26:38.3, 26:39.6) v=0.397188 dark=23937
(49930, 49949, 15) =(27:46.0, 27:46.6) v=0.319250 dark=6208
(49960, 50129, 16) =(27:47.0, 27:52.6) v=0.312347 dark=13905
sync_video done
start writing and converting
(W,H) 1440 1080
(76, 4, 1014, 1415) (4, 8, 1500, 2000)
frame_rate 29.97002997
write done
covert done 0
In [80]:
p1,q1, blur_factor=(126, 1, 443, 631), (6, 12, 1125, 2000), 12
p2,q2=(124, 0, 443, 631), [0, 6, 1125, 2000]
auto_sync("openstack",  p1,q1, blur_factor, p2,q2, threshold=0.7, M=20, WH=(1440,1080), SKIP=[3,14,35,36])
NAME= openstack
start writing and converting
(W,H) 1440 1080
(283, 2, 996, 1419) (6, 12, 1125, 2000)
frame_rate 29.97002997
write done
covert done 0
In [79]:
result
Out[79]:
[(-1, 1819, -1),
 (1820, 3119, 2),
 (3120, 4229, 3),
 (4230, 5089, 4),
 (5090, 7119, 5),
 (7120, 9649, 6),
 (9650, 12159, 7),
 (12160, 14419, 8),
 (14420, 17279, 9),
 (17280, 19809, 10),
 (19810, 22339, 11),
 (22340, 22589, 12),
 (22590, 23319, 13),
 (23320, 23399, 35),
 (23400, 23859, 36),
 (23860, 24239, 3),
 (24240, 25979, 14),
 (25980, 26689, -1),
 (26690, 28429, 16),
 (28430, 29339, 17),
 (29340, 30389, 18),
 (30390, 30849, 19),
 (30850, 30889, 20),
 (30890, 31949, 21),
 (31950, 33049, 20),
 (33050, 33079, 21),
 (33080, 33239, 22),
 (33240, 33259, 23),
 (33260, 35739, 24),
 (35740, 36519, 25),
 (36520, 37089, 26),
 (37090, 38029, 27),
 (38030, 39539, -1),
 (39540, 39749, 27),
 (39750, 40119, 28),
 (40120, 40379, 29),
 (40380, 41169, 30),
 (41170, 43199, -1),
 (43200, 43269, 30),
 (43270, 43299, 31),
 (43300, 43309, -1),
 (43310, 44159, 32),
 (44160, 44859, 33),
 (44860, 44869, 32),
 (44870, 44879, 31),
 (44880, 44889, 32),
 (44890, 44899, 33),
 (44900, 45709, 34),
 (45710, 45949, 35),
 (45950, 48038, -1)]