#!/usr/bin/env python # coding: utf-8 # # MAT201A Final Project# # ### Hilda Chang HE ### # ### 2016 Winter # *Project Description:* # This project explores the Image Lossy Copression algorithm used in JEPG, which is called Discrete Cosine Transformation. # Firstly, I explain how DCT works on image compression. Then, I try to replace the DCT by FFT that we learnt in class. And then I try to evaluate these two methods and compare the proformances and try to analyse the reason why JPEG use DCT instead of FFT. At last but not least, I try to explore the left-out coeficients in DCT. # ## Image Lossy Compression Based on Discrete Cosine Transformation # In[131]: get_ipython().run_line_magic('matplotlib', 'inline') import io import os import numpy as np import matplotlib.pyplot # as plt import urllib2 import time # In[134]: get_ipython().run_line_magic('pylab', 'inline') from __future__ import print_function from __future__ import division from scipy.misc import lena img = lena() imshow(img, cmap=cm.gray) colorbar() img.dtype; # In[135]: shape(img) # In[139]: from scipy import fftpack time_butter = time.time() DCT = fftpack.dct(fftpack.dct(img.T, norm='ortho').T, norm='ortho') DCT_time = time.time() - time_butter print("Time comsumption for performing DCT for 512 * 512 Lena is", DCT_time,"s.") # In[5]: print(DCT) # In[6]: R_DCT = fftpack.idct(fftpack.idct(DCT.T, norm='ortho').T, norm='ortho') # In[7]: imshow(R_DCT, cmap=cm.gray) colorbar(); # In[8]: diff = img - R_DCT imshow(diff) colorbar() title("Distortion"); # **So I did a DCT on the Lena image, and then did an inverse DCT on the transformed numbers. Here you can see I compared the original picture with the one we got from doing these two transformations. There are clearly differents after we did the transformations by seeing the diff-graph. That is one of a minor reasons for this compression method being lossy.** # #### Now, let's dig into the DCT Coefficients a little bit deeper: # In[9]: figure(figsize=(15, 9)) scatter(range(DCT.ravel().size), np.abs(DCT.T.ravel()), c='#00F6B2', alpha=.3, ) title('DCT Coefficient Amplitude vs. Order of Coefficient') xlabel('Order of DCT Coefficients') ylabel('DCT Coefficients Amplitude'); # **This graph clearly shows that only the coefficient number in the very beginning containt significant information of this image.** # In[10]: imshow(np.log10(np.abs(DCT))) colorbar() gcf().set_figwidth(24) gcf().set_figheight(6) # **Therefore, we could explain the effect of DCT in this way: It collects the information of the original image and transforms them into the top left corner of this image. And this is exactly we want for lossy compression that use less numbers to present more information.** # ### Let's do compressions on Lena image with different lossy compression rate: # What we are going to do is to remove some coefficients in the DCT matrix. How many 0s we gonna replace with the original numbers determines the compresion rate. Becasue we use 0 instead of other numbers with long digits, so it takes less space to store those numbers. This is the reason for the image size to be smaller. # In[11]: from scipy import fftpack gcf().set_figwidth(16) gcf().set_figheight(12) j = 1 for i in range(0, len(img[0]), 26): dct_copy = DCT.copy() dct_copy[i:,:] = 0 dct_copy[:,i:] = 0 subplot(4,5,j) imshow(fftpack.idct(fftpack.idct(dct_copy.T, norm='ortho').T, norm='ortho'), cmap=cm.gray) j = j+1 # It's actually hard to tell the difference between the images at the last 2 rows. # ## Explore 1 : Replace DCT with FFT # In[138]: rfft_img = [] time_buffer = time.time() for t in range(0, len(img[0])): rfft = fft.rfft(img[t]) rfft_img.append(rfft) RFFT_time = time.time() - time_buffer print("Time comsumption for performing rFFT for 512 * 512 Lena is", RFFT_time,"s.") imshow(np.log10(np.absolute(rfft_img))) colorbar() gcf().set_figheight(5); # ** We could see that 1 dimentional transformation use less time than 2 dimentional transformation for use. But instead, the compression ratio will not be as ideal as the 2D method. ** # In[87]: j = 1 gcf().set_figwidth(16) gcf().set_figheight(12) rfft_img = np.asarray(rfft_img) for t in range(0, len(rfft_img[0]), 13): irfft_img = [] #irfft_img = np.asarray(irfft_img) rfft_copy = rfft_img.copy() rfft_copy[:, t:] = 0 for p in range(0, len(rfft_img)): row_rfft = fft.irfft(rfft_copy[p]) irfft_img.append(row_rfft) subplot(4,5,j) imshow(irfft_img, cmap=cm.gray) j = j + 1 # But this is a way to perform image compression when we need fast processing speed with enough storage. # ** Now let's perform 2D FFT ** # In[28]: time_buffer = time.time() FFT2D = log(abs(fft.fft2(img.astype(double)))) FFT2D_time = time.time() - time_buffer # In[142]: gcf().set_figwidth(8) subplot(121) imshow(FFT2D) subplot(122) imshow(fftshift(FFT2D)) print("Time comsumption for performing DCT for 512 * 512 Lena is", FFT2D_time,"s.") colorbar(); # **So, we could clearly see that 2D FFT has 2 important drawbacks for image compression** # # **1. The processing time is 3 to 4 times slower than DCT. If we take a picture using our high resolution cellphone, the storing time will be unacceptable.** # # **2. Since the 2D FFT converge the image information into the center of the picture. It makes the truncate method complicate when user need different compression rate. ** # Let's use a big picture with high resolution to prove my reasoning, # In[42]: img2 = imread('goldenGate.JPG') gcf().set_figwidth(16) gcf().set_figheight(12) imshow(img2, cmap=cm.gray) print("The size of this image is",img2.size); # In[43]: time_buffer = time.time() DCT_img2 = fftpack.dct(fftpack.dct(img.T, norm='ortho').T, norm='ortho') DCT_time_img2 = time.time() - time_buffer # In[44]: time_buffer = time.time() FFT = fft.fft2(img.astype(double)) FFT_time_img2 = time.time() - time_buffer # In[45]: print("DCT time comsuming: ",DCT_time_img2) print("FFT time comsuming: ", FFT_time_img2) # ** So we can see that FFT use 4 times longer to compress the image** # ## Explore 2 : What about those left-outs? # In[46]: from scipy import fftpack gcf().set_figwidth(16) gcf().set_figheight(12) j = 1 for i in range(0, len(img[0]), 26): dct_copy = DCT.copy() dct_copy[:i,:] = 0 dct_copy[:,:i] = 0 subplot(4,5,j) imshow(fftpack.idct(fftpack.idct(dct_copy.T, norm='ortho').T, norm='ortho'), cmap=cm.gray) j = j+1 # In[48]: from scipy import fftpack gcf().set_figwidth(16) gcf().set_figheight(12) j = 1 for i in range(0, 20): dct_copy = DCT.copy() dct_copy[:i,:] = 0 dct_copy[:,:i] = 0 subplot(4,5,j) imshow(fftpack.idct(fftpack.idct(dct_copy.T, norm='ortho').T, norm='ortho'), cmap=cm.gray) j = j+1 # So this method can be used to make some artificial effects, such as atomization or creating relief effect. # In[115]: gcf().set_figwidth(16) gcf().set_figheight(12) img3 = imread('DaeHyun.jpg') imshow(img3) print(shape(img3)); # In[116]: r = img3[:,:,0].astype(int8); g = img3[:,:,1].astype(int8); b = img3[:,:,2].astype(int8); r_DCT = fftpack.dct(fftpack.dct(r.T, norm='ortho').T, norm='ortho') g_DCT = fftpack.dct(fftpack.dct(g.T, norm='ortho').T, norm='ortho') b_DCT = fftpack.dct(fftpack.dct(b.T, norm='ortho').T, norm='ortho') # In[117]: from scipy import fftpack gcf().set_figwidth(16) gcf().set_figheight(12) j = 1 for i in range(0, 10): dct_r_copy = r_DCT.copy() dct_r_copy[:i,:] = 0 dct_r_copy[:,:i] = 0 dct_g_copy = g_DCT.copy() dct_g_copy[:i,:] = 0 dct_g_copy[:,:i] = 0 dct_b_copy = b_DCT.copy() dct_b_copy[:i,:] = 0 dct_b_copy[:,:i] = 0 idct_r = fftpack.idct(fftpack.idct(dct_r_copy.T, norm='ortho').T, norm='ortho') idct_g = fftpack.idct(fftpack.idct(dct_g_copy.T, norm='ortho').T, norm='ortho') idct_b = fftpack.idct(fftpack.idct(dct_b_copy.T, norm='ortho').T, norm='ortho') subplot(2,5,j) new_img = empty([800, 1200, 3]) new_img[:,:,0] = idct_r.astype(int8) + 127 new_img[:,:,1] = idct_g.astype(int8) + 127 new_img[:,:,2] = idct_b.astype(int8) + 127 imshow(new_img) j = j+1 # So simply apply DCT compression on RGB image doesn't create the effect we want. # But we can do it on gray-scale images.