#!/usr/bin/env python # coding: utf-8 # Image Approximation with Orthogonal Bases # ========================================= # # *Important:* Please read the [installation page](http://gpeyre.github.io/numerical-tours/installation_python/) for details about how to install the toolboxes. # $\newcommand{\dotp}[2]{\langle #1, #2 \rangle}$ # $\newcommand{\enscond}[2]{\lbrace #1, #2 \rbrace}$ # $\newcommand{\pd}[2]{ \frac{ \partial #1}{\partial #2} }$ # $\newcommand{\umin}[1]{\underset{#1}{\min}\;}$ # $\newcommand{\umax}[1]{\underset{#1}{\max}\;}$ # $\newcommand{\umin}[1]{\underset{#1}{\min}\;}$ # $\newcommand{\uargmin}[1]{\underset{#1}{argmin}\;}$ # $\newcommand{\norm}[1]{\|#1\|}$ # $\newcommand{\abs}[1]{\left|#1\right|}$ # $\newcommand{\choice}[1]{ \left\{ \begin{array}{l} #1 \end{array} \right. }$ # $\newcommand{\pa}[1]{\left(#1\right)}$ # $\newcommand{\diag}[1]{{diag}\left( #1 \right)}$ # $\newcommand{\qandq}{\quad\text{and}\quad}$ # $\newcommand{\qwhereq}{\quad\text{where}\quad}$ # $\newcommand{\qifq}{ \quad \text{if} \quad }$ # $\newcommand{\qarrq}{ \quad \Longrightarrow \quad }$ # $\newcommand{\ZZ}{\mathbb{Z}}$ # $\newcommand{\CC}{\mathbb{C}}$ # $\newcommand{\RR}{\mathbb{R}}$ # $\newcommand{\EE}{\mathbb{E}}$ # $\newcommand{\Zz}{\mathcal{Z}}$ # $\newcommand{\Ww}{\mathcal{W}}$ # $\newcommand{\Vv}{\mathcal{V}}$ # $\newcommand{\Nn}{\mathcal{N}}$ # $\newcommand{\NN}{\mathcal{N}}$ # $\newcommand{\Hh}{\mathcal{H}}$ # $\newcommand{\Bb}{\mathcal{B}}$ # $\newcommand{\Ee}{\mathcal{E}}$ # $\newcommand{\Cc}{\mathcal{C}}$ # $\newcommand{\Gg}{\mathcal{G}}$ # $\newcommand{\Ss}{\mathcal{S}}$ # $\newcommand{\Pp}{\mathcal{P}}$ # $\newcommand{\Ff}{\mathcal{F}}$ # $\newcommand{\Xx}{\mathcal{X}}$ # $\newcommand{\Mm}{\mathcal{M}}$ # $\newcommand{\Ii}{\mathcal{I}}$ # $\newcommand{\Dd}{\mathcal{D}}$ # $\newcommand{\Ll}{\mathcal{L}}$ # $\newcommand{\Tt}{\mathcal{T}}$ # $\newcommand{\si}{\sigma}$ # $\newcommand{\al}{\alpha}$ # $\newcommand{\la}{\lambda}$ # $\newcommand{\ga}{\gamma}$ # $\newcommand{\Ga}{\Gamma}$ # $\newcommand{\La}{\Lambda}$ # $\newcommand{\si}{\sigma}$ # $\newcommand{\Si}{\Sigma}$ # $\newcommand{\be}{\beta}$ # $\newcommand{\de}{\delta}$ # $\newcommand{\De}{\Delta}$ # $\newcommand{\phi}{\varphi}$ # $\newcommand{\th}{\theta}$ # $\newcommand{\om}{\omega}$ # $\newcommand{\Om}{\Omega}$ # This numerical tour uses several orthogonal bases to perform non-linear image # approximation. # In[1]: from __future__ import division import numpy as np import scipy as scp import pylab as pyl import matplotlib.pyplot as plt from nt_toolbox.general import * from nt_toolbox.signal import * import warnings warnings.filterwarnings('ignore') get_ipython().run_line_magic('matplotlib', 'inline') get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') # Best $M$-terms Non-linear Approximation # ----------------------------------------- # This tours makes use of an orthogonal base $ \Bb = \{ \psi_m \}_{m=0}^{N-1} $ # of the space $\RR^N$ of the images with $N$ pixels. # # # The best $M$-term approximation of $f$ is obtained by a non-linear # thresholding # # $$ f_M = \sum_{ \abs{\dotp{f}{\psi_m}}>T } \dotp{f}{\psi_m} \psi_m, $$ # where the value of $T>0$ should be carefully selected so that only # $M$ coefficients are not thresholded, i.e. # # $$ \abs{ \enscond{m}{ \abs{\dotp{f}{\psi_m}}>T } } = M. $$ # # # The goal is to use an ortho-basis $ \Bb $ so that the error $ \norm{f-f_M} $ # decays as fast as possible when $M$ increases, for a large class of # images. # # # This tour studies several different orthogonal bases: Fourier, wavelets (which is at the heart of JPEG-2000), # cosine, local cosine (which is at the heart of JPEG). # # # # First we load an image of $ N = n \times n $ pixels. # In[2]: n = 512 f = rescale(load_image("nt_toolbox/data/hibiscus.bmp", n)) # Display it. # In[3]: plt.figure(figsize = (5,5)) imageplot(f) # Fourier Approximation # --------------------- # The discrete 2-D Fourier atoms are defined as: # $$ \psi_m(x) = \frac{1}{\sqrt{N}} e^{ \frac{2i\pi}{n} ( x_1 m_1 + x_2 m_2 ) }, $$ # where $ 0 \leq m_1,m_2 < n $ indexes the frequency. # # # The set of inner products $ \{ \dotp{f}{\psi_m} \}_m $ is computed in # $O(N \log(N))$ operations with the 2-D Fast Fourier Transform (FFT) # algorithm (the pylab function is fft2). # # # Compute the Fourier transform using the FFT algorithm. # Note the normalization by $1/\sqrt{N}$ to ensure orthogonality (energy # conservation) of the transform. # In[4]: fF = pyl.fft2(f)/n # Display its magnitude (in log scale). # We use the pylab function fftshift to put the low frequency in the center. # In[5]: plt.figure(figsize = (5,5)) imageplot(np.log(1e-5 + abs(pyl.fftshift(fF)))) # An image is recovered from a set of coefficients $c_m$ using the inverse # Fourier Transform (pylab function ifft2)) that implements the formula # # $$ f_M = \sum_m c_m \psi_m. $$ # # Perform a thresholding. # In[6]: T = .3 c = np.multiply(fF,(abs(fF) > T)) # Inverse the Fourier transform. # In[7]: fM = np.real(pyl.ifft2(c)*n) # Display the approximation. # In[8]: plt.figure(figsize = (5,5)) imageplot(clamp(fM)) # __Exercise 1__ # # Compute a best $M$-term approximation in the Fourier basis of $f$, for # $M \in \{N/100, N/20\}$. Compute the approximation using # a well chosen hard threshold value $T$. # In[9]: run -i nt_solutions/coding_1_approximation/exo1 # In[10]: ## Insert your code here. # The best $M$-term approximation error is computed using the # conservation of energy as # # $$ \epsilon[M]^2 = \norm{f-f_M}^2 = \sum_{ \abs{\dotp{f}{\psi_m}} \leq T } \abs{\dotp{f}{\psi_m}}^2. $$ # # # If one denotes by $ \{ c_R[k] \}_{k=0}^{N-1} $ the set of # coefficients magnitudes $ \abs{\dotp{f}{\psi_m}} $ ordered by *decaying* # magnitudes, then this error is easily computed as # $$ \epsilon[M]^2 = \sum_{k=M}^{N-1} c_R[k]^2 # = \norm{f}^2 - \sum_{k=0}^{M-1} c_R[k]^2. $$ # This means that $\epsilon^2$ is equal to # $\norm{f}^2$ minus the discrete primitive of $ c_R^2 $. # __Exercise 2__ # # Compute and display in log scales the ordered coefficients $c_R$. # _Hint:_ a discrete primitive can be computed using the numpy function cumsum. # In[11]: run -i nt_solutions/coding_1_approximation/exo2 # In[12]: ## Insert your code here. # __Exercise 3__ # # Compute and display in log-scale the non-linear approximation # error $\epsilon[M]^2$. # Store the values of $\epsilon[M]^2$ in a vector $err\_fft$. # In[13]: run -i nt_solutions/coding_1_approximation/exo3 # In[14]: ## Insert your code here. # Wavelet Approximation # --------------------- # The Wavelet basis of continuous 2-D functions is defined by # by scaling and translating three mother # atoms $ \{\psi^H,\psi^V,\psi^D\} $: # $$ \psi_{j,n}^k(x) = \frac{1}{2^j}\psi^k\pa{\frac{x-2^j n}{2^j}} $$ # # # Non-linear wavelet approximation is a the heart of the JPEG-2000 # compression standard. # # # The set of inner products $ \{ \dotp{f}{\psi_m} \}_m $ is computed in # $O(N)$ operations with the 2-D Fast Wavelet Transform # algorithm. # # # Perform a wavelet transform. # Here we use a daubechies wavelet transform. # In[16]: from numpy import linalg from nt_toolbox.compute_wavelet_filter import * Jmin = 1 h = compute_wavelet_filter("Daubechies",10) fW = perform_wavortho_transf(f, Jmin, + 1, h) # Display the coefficients. # In[17]: plt.figure(figsize = (8,8)) plot_wavelet(fW, Jmin) plt.title('Wavelet coefficients') plt.show() # __Exercise 4__ # # Compute a best $M$-term approximation in the wavelet basis of $f$, for # $M \in \{N/100, N/20\}$. Compute the approximation using # a well chosen hard threshold value $T$. # Note that the inverse wavelet transform is obtained by replacing the +1 # by a -1 in the definition of the transform. # In[18]: run -i nt_solutions/coding_1_approximation/exo4 # In[19]: ## Insert your code here. # __Exercise 5__ # # Compute and display in log-scale the non-linear approximation # error $\epsilon[M]^2$. # Compares the Fourier and wavelets approximations. # Store the values of $\epsilon[M]^2$ in a vector $err\_wav$. # In[20]: run -i nt_solutions/coding_1_approximation/exo5 # In[21]: ## Insert your code here # Cosine Approximation # -------------------- # The discrete cosine approximation (DCT) is similar to the Fourier # approximation, excepted that it used symmetric boundary condition instead # of periodic boundary condition, and is thus more useful to approximate # image. # # # A 1-D cosine atom of $n$ sample is defined as # $$ \bar\psi_m(x) = \frac{1}{\sqrt{N}} \cos\pa{ \frac{2\pi}{N} (x-1/2) m } $$ # A 2-D cosine atom is obtained by tensor product of 1-D atoms # $$ \psi_{m_1,m_2}(x_1,x_2) = \bar\psi_{m_1}(x_1) \bar\psi_{m_2}(x_2). $$ # On the contrary to the Fourier 2-D atoms, these 2-D DCT atoms are not # oriented (they contains 4 Fourier frequencies). # # # The set of inner products $ \{ \dotp{f}{\psi_m} \}_m $ is computed in # $O(N \log(N))$ operations with the 2-D Fast Cosine Transform # algorithm. # In[22]: from scipy import fftpack def dct2(f): return np.transpose(fftpack.dct(np.transpose(fftpack.dct(f, norm = "ortho")), norm = "ortho")) def idct2(f): return np.transpose(fftpack.idct(np.transpose(fftpack.idct(f, norm = "ortho")), norm = "ortho")) fC = dct2(f) # Display the magnitude of the DCT coefficients. # Note that the low frequencies are in the upper-left corner. # In[23]: plt.figure(figsize = (5,5)) imageplot(np.log(1e-5 + abs(fC))) # __Exercise 6__ # # Compute a best $M$-term approximation in the wavelet basis of $f$, for # $M \in \{N/100, N/20\}$. Compute the approximation using # a well chosen hard threshold value $T$. Note that the inverse DCT # transform is obtained with the function idct. # In[24]: run -i nt_solutions/coding_1_approximation/exo6 # In[25]: ## Insert your code here. # __Exercise 7__ # # Compute and display in log-scale the non-linear approximation # error $\epsilon[M]^2$. # Compares the Fourier and DCT approximations. # Store the values of $\epsilon[M]^2$ in a vector $err\_dct$. # In[26]: run -i nt_solutions/coding_1_approximation/exo7 # In[27]: ## Insert your code here. # Local Cosine Approximation # -------------------------- # To improve the global DCT approximation, one can approximate # independantly small patches in the image. This corresponds to a # decomposition in a local cosine basis, which is at the heart # of the JPEG image compression standard. # # # The only parameter of the transform is the size of the square. # In[28]: w = 16 # Initialize at zero the transformed image in the local DCT basis. # In[29]: fL = np.zeros([n, n]) # Example of patch index. # In[30]: i = 5 j = 7 # For a given path index $(i,j)$, we extract a $(w,w)$ patch. # In[31]: P = f[(i-1)*w: i*w, (j-1)*w: j*w] # Compute the Cosine transform of the patch using the fast DCT algorithm. # In[32]: fL[(i-1)*w: i*w, (j-1)*w: j*w] = dct2(P) # Display the patch and its coefficients. We removed the low frequency of # $P$ for display purpose only. # In[33]: plt.figure(figsize = (8,8)) imageplot(P, 'Patch', [1, 2, 1]) imageplot(dct2(P-np.mean(P)), 'DCT', [1, 2, 2]) # __Exercise 8__ # # Compute the local DCT transform $f_L$ by transforming each patch. # In[34]: run -i nt_solutions/coding_1_approximation/exo8 # In[35]: ## Insert your code here. # Display the coefficients. # In[36]: plt.figure(figsize = (5,5)) imageplot(np.clip(abs(fL),0,.005*w*w)) # __Exercise 9__ # # Compute the inverse local DCT transform of the coefficients $f_L$ by inverse # transforming each patch using the function idct2. # In[37]: run -i nt_solutions/coding_1_approximation/exo9 # In[38]: ## Insert your code here. # __Exercise 10__ # # Compute a few best $M$-term approximations in the Local DCT basis of # $f$. # In[39]: run -i nt_solutions/coding_1_approximation/exo10 # In[40]: ## Insert your code here. # __Exercise 11__ # # Compute and display in log-scale the non-linear approximation # error $\epsilon[M]^2$. # Store the values of $\epsilon[M]^2$ in a vector |err_ldct|. # Compares the Fourier, Wavelets, DCT and local-DCT approximations. # In[41]: run -i nt_solutions/coding_1_approximation/exo11 # In[42]: ## Insert your code here. # Comparison of Wavelet Approximations of Several Images # ------------------------------------------------------ # An image is more complicated than an other one for a given orthogonal # basis if its approximation error decays more slowly. # # # First load several high resolution images. # In[43]: n = 512 fList = np.zeros([n,n,4]) fList[: , : , 0] = rescale(load_image("nt_toolbox/data/regular3.bmp", n)) fList[: , : , 1] = rescale(load_image("nt_toolbox/data/phantom.bmp", n)) fList[: , : , 2] = rescale(load_image("nt_toolbox/data/lena.bmp", n)) fList[: , : , 3] = rescale(load_image("nt_toolbox/data/mandrill.bmp", n)) # Display them. # In[44]: plt.figure(figsize = (7,7)) for i in range(4): imageplot(fList[: , : , i], '', [2,2,i+1]) # __Exercise 12__ # # Compare the approximation error decay for those images. # Display $ \log_{10}(\norm{f-f_M}) $ as a function of $\log_{10}(M)$. # In[45]: run -i nt_solutions/coding_1_approximation/exo12 # In[46]: ## Insert your code here.