#!/usr/bin/env python # coding: utf-8 # # 2. Background Remvoal with SVD # **Our goal today**: ![background removal](images/surveillance3.png) # ### Load and Format the Data # Let's use the real video 003 dataset from [BMC 2012 Background Models Challenge Dataset](http://bmc.iut-auvergne.com/?page_id=24) # Import needed libraries: # In[1]: import imageio imageio.plugins.ffmpeg.download() # In[2]: import moviepy.editor as mpe import numpy as np import scipy get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt # In[3]: np.set_printoptions(precision=4, suppress=True) # In[4]: video = mpe.VideoFileClip("movie/Video_003.avi") # In[5]: video.subclip(0,50).ipython_display(width=300) # In[6]: video.duration # ### Helper Methods # In[7]: def create_data_matrix_from_video(clip, fps=5, scale=50): return np.vstack([scipy.misc.imresize(rgb2gray(clip.get_frame(i/float(fps))).astype(int), scale).flatten() for i in range(fps * int(clip.duration))]).T # In[8]: def rgb2gray(rgb): return np.dot(rgb[...,:3], [0.299, 0.587, 0.114]) # ### Format the Data # An image from 1 moment in time is 120 pixels by 160 pixels (when scaled). We can *unroll* that picture into a single tall column. So instead of having a 2D picture that is $120 \times 160$, we have a $1 \times 19,200$ column # # This isn't very human-readable, but it's handy because it lets us stack the images from different times on top of one another, to put a video all into 1 matrix. If we took the video image every hundredth of a second for 100 seconds (so 10,000 different images, each from a different point in time), we'd have a $10,000 \times 19,200$ matrix, representing the video! # In[9]: scale = 0.50 # Adjust scale to change resolution of image dims = (int(240 * scale), int(320 * scale)) fps = 60 # frames per second # In[30]: M = create_data_matrix_from_video(video.subclip(0,100), fps, scale) # M = np.load("movie/med_res_surveillance_matrix_60fps.npy") # In[31]: print(dims, M.shape) # In[12]: plt.imshow(np.reshape(M[:,140], dims), cmap='gray'); # Since `create_data_from_matrix` is somewhat slow, we will save our matrix. In general, whenever you have slow pre-processing steps, it's a good idea to save the results for future use. # In[25]: np.save("movie/med_res_surveillance_matrix_60fps.npy", M) # In[23]: plt.figure(figsize=(12, 12)) plt.imshow(M[::3,:], cmap='gray') # **Questions**: What are those wavy black lines? What are the horizontal lines? # ## Singular Value Decomposition # ### Intro to SVD # “a convenient way for breaking a matrix into simpler, meaningful pieces we care about” – [David Austin](http://www.ams.org/samplings/feature-column/fcarc-svd) # # “the most important linear algebra concept I don’t remember learning” - [Daniel Lemire](http://lemire.me/blog/2010/07/05/the-five-most-important-algorithms/) # # **Applications of SVD**: # - Principal Component Analysis # - Data compression # - Pseudo-inverse # - Collaborative Filtering # - Topic Modeling # - Background Removal # - Removing Corrupted Data # In[ ]: # In[22]: U, s, V = np.linalg.svd(M, full_matrices=False) # This is really slow, so you may want to save your result to use in the future. # In[23]: np.save("movie/U.npy", U) np.save("movie/s.npy", s) np.save("move/V.npy", V) # In the future, you can just load what you've saved: # In[25]: U = np.load("movie/U.npy") s = np.load("movie/s.npy") V = np.load("movie/V.npy") # What do $U$, $S$, and $V$ look like? # In[26]: U.shape, s.shape, V.shape # #### Exercise # Check that they are a decomposition of M # In[33]: #Exercise: # They are! :-) # In[32]: np.allclose(M, reconstructed_matrix) # In[32]: np.set_printoptions(suppress=True, precision=0) # #### Properties of S # s is the diagonal of a *diagonal matrix* # In[ ]: np.diag(s[:6]) # Do you see anything about the order for $s$? # In[34]: s[0:2000:50] # In[35]: len(s) # In[36]: s[700] # In[37]: np.set_printoptions(suppress=True, precision=4) # $U$ is a giant matrix, so let's just look at a tiny bit of it: # In[38]: U[:5,:5] # ### Finding the background # In[39]: U.shape, s.shape, V.shape # In[40]: low_rank = np.expand_dims(U[:,0], 1) * s[0] * np.expand_dims(V[0,:], 0) # In[360]: plt.figure(figsize=(12, 12)) plt.imshow(low_rank, cmap='gray') # In[41]: plt.imshow(np.reshape(low_rank[:,0], dims), cmap='gray'); # How do we get the people from here? # In[42]: plt.imshow(np.reshape(M[:,0] - low_rank[:,0], dims), cmap='gray'); # High-resolution version # In[43]: plt.imshow(np.reshape(M[:,140] - low_rank[:,140], dims), cmap='gray'); # #### Make Video # I was inspired by [the work of fast.ai student Samir Moussa](http://forums.fast.ai/t/part-3-background-removal-with-robust-pca/4286) to make videos of the people. # In[70]: from moviepy.video.io.bindings import mplfig_to_npimage # In[75]: def make_video(matrix, dims, filename): mat_reshaped = np.reshape(matrix, (dims[0], dims[1], -1)) fig, ax = plt.subplots() def make_frame(t): ax.clear() ax.imshow(mat_reshaped[...,int(t*fps)]) return mplfig_to_npimage(fig) animation = mpe.VideoClip(make_frame, duration=int(10)) animation.write_videofile('videos/' + filename + '.mp4', fps=fps) # In[296]: make_video(M - low_rank, dims, "figures2") # In[554]: mpe.VideoFileClip("videos/figures2.mp4").subclip(0,10).ipython_display(width=300) # ### Speed of SVD for different size matrices # In[4]: import timeit import pandas as pd # In[5]: m_array = np.array([100, int(1e3), int(1e4)]) n_array = np.array([100, int(1e3), int(1e4)]) # In[6]: index = pd.MultiIndex.from_product([m_array, n_array], names=['# rows', '# cols']) # In[7]: pd.options.display.float_format = '{:,.3f}'.format df = pd.DataFrame(index=m_array, columns=n_array) # In[10]: # %%prun for m in m_array: for n in n_array: A = np.random.uniform(-40,40,[m,n]) t = timeit.timeit('np.linalg.svd(A, full_matrices=False)', number=3, globals=globals()) df.set_value(m, n, t) # In[12]: df/3 # ### 2 Backgrounds in 1 Video # We'll now use real video 008 dataset from [BMC 2012 Background Models Challenge Dataset](http://bmc.iut-auvergne.com/?page_id=24), in addition to video 003 that we used above. # In[34]: from moviepy.editor import concatenate_videoclips # In[39]: video2 = mpe.VideoFileClip("movie/Video_008.avi") # In[40]: concat_video = concatenate_videoclips([video2.subclip(0,20), video.subclip(0,10)]) concat_video.write_videofile("movie/concatenated_video.mp4") # In[35]: concat_video = mpe.VideoFileClip("movie/concatenated_video.mp4") # ### Now back to our background removal problem: # In[41]: concat_video.ipython_display(width=300, maxduration=160) # In[42]: scale = 0.5 # Adjust scale to change resolution of image dims = (int(240 * scale), int(320 * scale)) # In[43]: N = create_data_matrix_from_video(concat_video, fps, scale) # N = np.load("low_res_traffic_matrix.npy") # In[44]: np.save("med_res_concat_video.npy", N) # In[45]: N.shape # In[46]: plt.imshow(np.reshape(N[:,200], dims), cmap='gray'); # In[47]: U_concat, s_concat, V_concat = np.linalg.svd(N, full_matrices=False) # This is slow, so you may want to save your result to use in the future. # In[48]: np.save("movie/U_concat.npy", U_concat) np.save("movie/s_concat.npy", s_concat) np.save("movie/V_concat.npy", V_concat) # In the future, you can just load what you've saved: # In[49]: U_concat = np.load("movie/U_concat.npy") s_concat = np.load("movie/s_concat.npy") V_concat = np.load("movie/V_concat.npy") # In[50]: low_rank = U_concat[:,:10] @ np.diag(s_concat[:10]) @ V_concat[:10,:] # The top few components of U: # In[51]: plt.imshow(np.reshape(U_concat[:, 1], dims), cmap='gray') # In[52]: plt.imshow(np.reshape(U_concat[:, 2], dims), cmap='gray') # In[53]: plt.imshow(np.reshape(U_concat[:, 3], dims), cmap='gray') # Background removal: # In[54]: plt.imshow(np.reshape((N - low_rank)[:, -40], dims), cmap='gray') # In[55]: plt.imshow(np.reshape((N - low_rank)[:, 240], dims), cmap='gray') # ### Aside about data compression # Suppose we take 700 singular values (remember, there are 10000 singular values total) # In[56]: s[0:1000:50] # In[57]: k = 700 compressed_M = U[:,:k] @ np.diag(s[:k]) @ V[:k,:] # In[66]: plt.figure(figsize=(12, 12)) plt.imshow(compressed_M, cmap='gray') # In[58]: plt.imshow(np.reshape(compressed_M[:,140], dims), cmap='gray'); # In[59]: np.allclose(compressed_M, M) # In[60]: np.linalg.norm(M - compressed_M) # In[61]: U[:,:k].shape, s[:k].shape, V[:k,:].shape # space saved = data in U, s, V for 700 singular values / original matrix # In[50]: ((19200 + 1 + 6000) * 700) / (19200 * 6000) # We only need to store 15.3% as much data and can keep the accuracy to 1e-5! That's great! # ### That's pretty neat!!! but... # The runtime complexity for SVD is $\mathcal{O}(\text{min}(m^2 n,\; m n^2))$ # **Downside: this was really slow (also, we threw away a lot of our calculation)** # In[51]: get_ipython().run_line_magic('time', 'u, s, v = np.linalg.svd(M, full_matrices=False)') # In[52]: M.shape