%matplotlib inline import numpy as np import cv2 import matplotlib.pyplot as plt import os import cv import sys try: import cv CAP_FRAME_COUNT = cv.CV_CAP_PROP_FRAME_COUNT CAP_FRAME_WIDTH = cv.CV_CAP_PROP_FRAME_WIDTH CAP_FRAME_HEIGHT = cv.CV_CAP_PROP_FRAME_HEIGHT CAP_FPS = cv.CV_CAP_PROP_FPS CAP_POS_FRAMES = cv.CV_CAP_PROP_POS_FRAMES except: CAP_FRAME_COUNT = cv2.CAP_PROP_FRAME_COUNT CAP_FRAME_WIDTH = cv2.CAP_PROP_FRAME_WIDTH CAP_FRAME_HEIGHT = cv2.CAP_PROP_FRAME_HEIGHT CAP_FPS = cv2.CAP_PROP_FPS CAP_POS_FRAMES = cv2.CAP_PROP_POS_FRAMES from IPython.html.widgets import interact, interactive, fixed from IPython.display import clear_output, display, HTML from IPython.html import widgets from io import BytesIO import PIL from IPython.display import display, Image def img_to_png(ima, cvt=None): if cvt: ima = cv2.cvtColor(ima, cvt) im = PIL.Image.fromarray(ima) bio = BytesIO() im.save(bio, format='png') return bio.getvalue() def display_img_array(ima, cvt=None, **kwargs): display(Image(img_to_png(ima, cvt=cvt) , format='png', **kwargs)) def normalize(im): im=cv2.cvtColor(im, cv2.CV_32F) im=cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) #im=cv2.equalizeHist(im) return im def gray2vector(img): v = img.reshape(-1).astype(float) v = v - np.average(v) return v/np.linalg.norm(v) def diff_i(gray, i): v1 = gray2vector(gray) return np.dot(v1, slides_v[i]) def compare_slides(gray, slides_v): v1 = gray2vector(gray) r = np.dot(slides_v, v1) i = np.argmax(r) return r[i], i def compare_absdiff(gray): return sorted( (cv2.absdiff(slides[i], gray).sum() , i) for i in range(len(slides))) def frame_to_time(f, r): s = f/r m = int(s/60) s = s -60*m return "%d:%04.1f"%(m,s) def sync_video( fn, p, slides, slides_v, threshold=0.8, step=10, dark=1000, STOP=-1, debug=False): cap = cv2.VideoCapture(fn) frame_rate = cap.get(cv.CV_CAP_PROP_FPS) print "frame_rate", frame_rate num_of_frames = cap.get(cv.CV_CAP_PROP_FRAME_COUNT) frame_index =-1 last_slide = -1 last_start = -1 frame_list = [] progress = widgets.IntProgressWidget(min=0, max = num_of_frames - 1, value=0) progress_text = widgets.TextWidget() progress.set_css('background', 'black') display(progress) display(progress_text) img_widget = widgets.ImageWidget() display(img_widget) slide_h, slide_w = original_slides[0].shape[:2] while cap.isOpened(): frame_index +=1 ret, frame = cap.read() if not ret: break if frame_index%step ==0: if STOP!=-1 and frame_index > STOP: break gray = cv2.resize(normalize(frame)[p[0]:p[2],p[1]:p[3]], (256,256)) darklevel = np.linalg.norm(gray.reshape(-1).astype(float)) if darklevel < dark: # too dark this_slide, v, i = -1, -1, 0 else: v, i = compare_slides(gray, slides_v) this_slide = i if v > threshold else -1 if debug: if i>=0: frame2 = frame.copy() frame2[p[0]:p[2], p[1]:p[3]] = cv2.resize(original_slides[i][q[0]:q[2], q[1]:q[3]], (p[3]-p[1], p[2]-p[0])) outp = np.concatenate( (frame2, cv2.addWeighted(frame,0.5,frame2, 0.5,0), frame), axis = 1) display_img_array(outp, width=1200, cvt=cv2.COLOR_BGR2RGB) else: display_img_array(frame, width=400, cvt=cv2.COLOR_BGR2RGB) print v,i if frame_index%100 ==0: frame2 = original_slides[this_slide] if this_slide >=0 else np.zeros_like(original_slides[0]) frame2 = cv2.resize(frame2, (slide_w*frame.shape[0]/slide_h, frame.shape[0])) outp = np.concatenate( (frame, frame2), axis = 1) img_widget.value = img_to_png(outp, cvt=cv2.COLOR_BGR2RGB) img_widget.height=600 progress.value = frame_index progress_text.value = "%d/%d (%.1f)"%(frame_index, num_of_frames, 100.0*frame_index/num_of_frames) if i >=0: progress_text.value += " match: %d, %s"%(i, v) if this_slide != last_slide: # update frame_list.append( (last_start, frame_index-1, last_slide)) # display information if last_slide >=0: fl = frame_list[-1] t1, t2 = frame_to_time(fl[0], frame_rate), frame_to_time(fl[1], frame_rate) print fl, "=(%s, %s)"%(t1,t2), "v=%f"%v, "dark=%d"%darklevel last_start = frame_index last_slide = this_slide # last update frame_list.append( (last_start, frame_index-1, last_slide)) cap.release() return frame_list def write_file(fn, p,q, outfn, original_slides, sync_result, M=20, fourcc="XVID", SKIP=None, WH=None): # W,H = 1920, 1080 cap = cv2.VideoCapture(fn) SW, SH = int(cap.get(cv.CV_CAP_PROP_FRAME_WIDTH)), int(cap.get(cv.CV_CAP_PROP_FRAME_HEIGHT)) if WH==None: W, H = SW, SH else: W,H = WH print "(W,H)", W,H sys.stdout.flush() p2 = ( p[0]*H/SH, p[1]*W/SW, p[2]*H/SH, p[3]*W/SW) pw, ph = p2[3]-p2[1], p2[2]-p2[0] print p2, q fourcc = cv.FOURCC(*fourcc) num_of_frames = cap.get(cv.CV_CAP_PROP_FRAME_COUNT) frame_rate = cap.get(cv.CV_CAP_PROP_FPS) print "frame_rate", frame_rate sys.stdout.flush() out = cv2.VideoWriter(outfn, fourcc, frame_rate, (W, H)) frame_index =-1 last_slide = -1 last_start = -1 frame_list = [] result_index = 0 progress = widgets.IntProgressWidget(min=0, max = num_of_frames - 1, value=0) progress_text = widgets.TextWidget() progress.set_css('background', 'black') display(progress) display(progress_text) img_widget = widgets.ImageWidget() display(img_widget) while cap.isOpened(): frame_index +=1 ret, frame = cap.read() if not ret: break while result_index < len(sync_result) and sync_result[result_index][1] < frame_index: result_index += 1 the_slide = (-1,-1,-1) if result_index >= len(sync_result) else sync_result[result_index] if SKIP and the_slide[2] in SKIP: the_slide = (-1,-1,-1) original_frame = cv2.resize(frame, (W, H), interpolation = cv2.INTER_CUBIC) if the_slide[2] >=0 and the_slide[1]-the_slide[0]>3*M: slide = original_slides[the_slide[2]] inner_frame = cv2.resize(slide[q[0]:q[2], q[1]:q[3]], (pw, ph), interpolation = cv2.INTER_CUBIC ) d = min(frame_index-the_slide[0], the_slide[1]-frame_index) out_frame = original_frame.copy() out_frame[p2[0]:p2[2], p2[1]:p2[3]] = inner_frame if d < M: out_frame = cv2.addWeighted(out_frame, d*1.0/M , original_frame, 1- d*1.0/M, 0) else: out_frame = original_frame out.write(out_frame) if frame_index%100 ==0: progress.value = frame_index progress_text.value = "%d/%d (%.1f)"%(frame_index, num_of_frames, 100.0*frame_index/num_of_frames) disp_frame = np.concatenate((out_frame[:, :W/2], original_frame[:,W/2:]), axis=1) img_widget.value = img_to_png(disp_frame, cvt=cv2.COLOR_BGR2RGB) img_widget.width = "800" cap.release() out.release() import os.path def load_original_slides(name): original_slides = [] i = 0 #progress = widgets.IntProgressWidget(min=0, max = num_of_frames - 1, value=0) progress_text = widgets.TextWidget() #progress.set_css('background', 'black') #display(progress) display(progress_text) while True: progress_text.value = "loading %d"%i img = cv2.imread("%s/%s-%d.png"%(name, name, i)) if img is None: break original_slides.append(img) i+=1 print "load original slides", len(original_slides) return original_slides def prepare_slides(original_slides, q, blur_factor): normalized_slides = (cv2.blur(normalize(s), (blur_factor, blur_factor)) for s in original_slides) slides = [cv2.resize(s[q[0]:q[2], q[1]:q[3]], (256,256), interpolation = cv2.INTER_CUBIC) for s in normalized_slides] slides_v = np.array([gray2vector(s) for s in slides]) print "slides prepared" return slides, slides_v original_slides, original_slides_name, result = None, None, None def auto_sync(NAME, p1, q1, blur_factor, p2=None, q2=None, threshold=0.8, step=10, dark=1500, STOP=-1, debug=False, SKIP=None, M=20, PASS=[3], fourcc="XVID", EXT="avi", WH=None): global original_slides, result, original_slides_name print "NAME=", NAME if 0 in PASS or not os.path.isfile("%s/%s-0.png"%(NAME,NAME)) : # 0 Extract PDF print "extract slides" sys.stdout.flush() print os.system("convert -density 200 %s/%s.pdf %s/%s.png"%(NAME,NAME,NAME,NAME)) result_slides_name = original_slides_name if 1 in PASS or original_slides_name != NAME: print "load original png" original_slides = load_original_slides(NAME) original_slides_name = NAME fn_base = "%s/%s"%(NAME,NAME) if os.path.isfile(fn_base+".mp4"): fn = fn_base+".mp4" elif os.path.isfile(fn_base+".avi"): fn = fn_base+".avi" else: print "original video file does not exist" return outfn = "%s/better_%s.%s"%(NAME, NAME, EXT) if 2 in PASS or result_slides_name != NAME: # Sync Video and Slides print "prepare slides" slides, slides_v = prepare_slides(original_slides, q1, blur_factor) print "syncing video" result = sync_video(fn, p1, slides, slides_v, threshold=threshold, step=step, dark=dark, STOP=STOP, debug=debug) print "sync_video done" if p2 is None: # full screen p2 = p1 if q2 is None: # full screen q2 = q1 if 3 in PASS or original_slides_name != NAME: print "start writing and converting" TEMP_OUT = "temp_out."+EXT write_file(fn, p1, q1, TEMP_OUT, original_slides, result, M=M, fourcc=fourcc, SKIP=SKIP, WH=WH) print "write done" sys.stdout.flush() retcode = os.system("avconv -y -i %s -i %s -map 0:v -map 1:a -c:v copy -c:a copy %s"%(TEMP_OUT, fn, outfn)) print "covert done", retcode p1,q1, blur_factor = (10, 160, 1080, 1754) , (0, 40, 2112, 2844) , 10 p2, q2 = p1, q1 auto_sync("tulip", p1,q1, blur_factor, p2,q2, threshold=0.9, M=5, fourcc="x264", EXT="mp4", SKIP=[28], PASS=[2]) p1,q1,blur_factor = (10, 159, 1080, 1754) , (0, 39, 2112, 2844) , 16 p2, q2 = (10, 131, 1080, 1750) , (0, 0, 2115, 2844), auto_sync("graphtool", p1,q1, blur_factor, p2, q2, threshold=0.7, SKIP=[7, 90, 91,92,93,94,95,96,97], PASS=[2]) p1,q1, blur_factor = (10, 159, 1080, 1750) , (0, 35, 2115, 2844), 16 p2,q2 = (10, 138, 1080, 1750) , (0, 0, 2115, 2844) # test abcdefg auto_sync("ls", p1,q1, blur_factor, p2,q2, threshold=0.8, SKIP=[4,35,36] , PASS=[2]) p1,q1, blur_factor = (211, 281, 958, 1723) , (0, 68, 2133, 2838) , 18 p2,q2 = None, None auto_sync("fabric", p1,q1, blur_factor, p2,q2, threshold=0.8, SKIP=[19], M=40) p1,q1, blur_factor = (145, 160, 954, 1751) , (0, 27, 1125, 2000) , 12 p2, q2 = None, None auto_sync("vote", p1,q1, blur_factor, p2,q2, threshold=0.75, M=2) p1,q1, blur_factor = (11, 144, 1080, 1752) , (0, 0, 1485, 2000) , 16 p2, q2 = p1, q1 auto_sync("mezz", p1,q1, blur_factor, p2,q2, threshold=0.9, M=20) p1,q1, blur_factor = (11, 135, 1080, 1753) , (0, 0, 2113, 2844) , 16 p2, q2 = p1, q1 auto_sync("summly", p1,q1, blur_factor, p2,q2, threshold=0.935, M=20) p1,q1, blur_factor = (19, 160, 1067, 1672) , (0, 80, 2070, 2844), 36 p2, q2 = p1, q1 auto_sync("StreetVoice", p1,q1, blur_factor, p2,q2, threshold=0.5, M=20) p1,q1, blur_factor = (19, 108, 1080, 1688) , (0, 0, 2084, 2844), 26 p2, q2 = p1, q1 auto_sync("grs", p1,q1, blur_factor, p2,q2, threshold=0.7, M=20) p1,q1, blur_factor=(73, 7, 1028, 1429), (8, 32, 2133, 2842), 13 p2,q2=(69, 0, 1028, 1430), [0, 17, 2133, 2844] auto_sync("dmhs", p1,q1, blur_factor, p2,q2, threshold=0.6, M=20, SKIP=[0, 6, 17], PASS=[3]) p1,q1, blur_factor=(87, 5, 404, 633), (9, 28, 1125, 2000), 14 p2,q2=(84, 0, 404, 633), [0, 9, 1125, 2000] auto_sync("present", p1,q1, blur_factor, p2,q2, threshold=0.7, M=20, WH=(1440,1080), PASS=[2,3]) p1,q1, blur_factor=(33, 4, 453, 635), (8, 30, 2119, 2844), 18 p2,q2=(31, 0, 456, 635), [0, 8, 2133, 2844] auto_sync("hai", p1,q1, blur_factor, p2,q2, threshold=0.7, M=20, WH=(1440,1080)) p1,q1, blur_factor=(34, 2, 451, 629), (4, 8, 1500, 2000), 14 p2,q2=(32, 0, 451, 629), [0, 0, 1500, 2000] auto_sync("vim", p1,q1, blur_factor, p2,q2, threshold=0.8, M=20, WH=(1440,1080)) p1,q1, blur_factor=(126, 1, 443, 631), (6, 12, 1125, 2000), 12 p2,q2=(124, 0, 443, 631), [0, 6, 1125, 2000] auto_sync("openstack", p1,q1, blur_factor, p2,q2, threshold=0.7, M=20, WH=(1440,1080), SKIP=[3,14,35,36]) result