Title: JPEG Compression Author: Thomas Breuel Institution: UniKL
from pylab import *
import cv2
from scipy.ndimage import filters
JPEG compression is the most widely used lossy image compression. It is based on a number of common steps in image compression. Roughly, the steps are:
We will look at some of these steps in details.
Let's start off with a color image.
# http://www.flickr.com/photos/32155539@N00/6049931955/sizes/l/in/photostream/
image = 1.0*imread("bearcubs-yosemite-james.jpg")
image /= amax(image)
imshow(image)
<matplotlib.image.AxesImage at 0x647411d0>
Image compression starts off with a linear color space transformation. This transformation separates out an intensity channel (Y) and two color channels (Cb, Cr). The two color channels are zero for grayscale images.
r,g,b = transpose(image,[2,0,1])
y = 0.299*r+0.587*g+0.114*b
cb = -0.168736*r-0.331264*g+0.5*b
cr = +0.5*r-0.418688*g-0.081312*b
Let's look at the three channels.
figsize(12,8); gray()
subplot(131); imshow(y)
subplot(132); imshow(cb,vmin=-.2,vmax=.2)
subplot(133); imshow(cr,vmin=-.2,vmax=.2)
<matplotlib.image.AxesImage at 0x647735d0>
# color space conversion functions
def rgb2ycbcr(rgb):
rgb = rgb/amax(rgb)
r,g,b = transpose(rgb,[2,0,1])
y = 0.299*r+0.587*g+0.114*b
cb = -0.168736*r-0.331264*g+0.5*b
cr = +0.5*r-0.418688*g-0.081312*b
return (y,cb,cr)
def ycbcr2rgb(ycbcr):
y,cb,cr = ycbcr
r = y + 1.402*cr
g = y - 0.34414*cb - 0.71414*cr
b = y + 1.772*cb
rgb = transpose(array((r,g,b),'f'),[1,2,0])
return clip(rgb,0,1.0)
# testing the color space conversion functions
Y,Cb,Cr = rgb2ycbcr(image)
rgb = ycbcr2rgb((Y,Cb,Cr))
imshow(rgb)
<matplotlib.image.AxesImage at 0x6f44b210>
# intensity channel
imshow(Y)
<matplotlib.image.AxesImage at 0x755cfad0>
# the Cb channel, keeping Y and Cr constant
imshow(ycbcr2rgb((0.5*ones(Y.shape),Cb,zeros(Cr.shape))))
<matplotlib.image.AxesImage at 0x72eb8e50>
# the Cr channel, keeping Y and Cb constant
imshow(ycbcr2rgb((0.5*ones(Y.shape),zeros(Cb.shape),Cr)))
<matplotlib.image.AxesImage at 0x6f29e990>
# color information only
imshow(ycbcr2rgb((0.3*ones(Y.shape),Cb,Cr)))
<matplotlib.image.AxesImage at 0x77e0c650>
(intensity channel resolution)
Downsampling or smoothing the intensity channel results in a blurry picture.
gf = filters.gaussian_filter
yblur = ycbcr2rgb((gf(Y,10.0),Cb,Cr))
imshow(yblur)
<matplotlib.image.AxesImage at 0x78b26e90>
# blurring the color channel
print amin(Cb),amax(Cb)
subplot(121); imshow(Cb,vmin=-0.2,vmax=0.2)
subplot(122); imshow(gf(Cb,20.0),vmin=-0.2,vmax=0.2)
-0.198481819608 0.148585788235
<matplotlib.image.AxesImage at 0x7b1ecbd0>
(blurring the color channel)
Even strong blur in the color channel is hardly noticeable.
colorblur = ycbcr2rgb((Y,gf(Cb,20.0),gf(Cr,20.0)))
imshow(colorblur)
<matplotlib.image.AxesImage at 0x789a4850>
# original vs color blur
subplot(121); imshow(image)
subplot(122); imshow(colorblur)
<matplotlib.image.AxesImage at 0x82447590>
# intensity blur vs color blur
subplot(121); imshow(yblur)
subplot(122); imshow(colorblur)
<matplotlib.image.AxesImage at 0x835bb690>
(summary)
General Observations:
JPEG Compression:
(DFT and DCT)
We looked at length at the Discrete Fourier Transform (DFT). Remember:
It turns out that there is also a basis that can be written purely in terms of cosines. The resulting transform is the Discrete Cosine Transform (DCT):
$$ X_k = \sum_{n=0}^{N-1} x_n \cos \left[\frac{\pi}{N} \left(n+\frac{1}{2}\right) k \right] \quad \quad k = 0, \dots, N-1 $$(Properties of the DCT)
Reflective boundary conditions and real-to-real make it a very good transform for image compression.
# reflective boundary conditions
axis("off"); imshow(imread("Figures/dct-boundary.png"))
<matplotlib.image.AxesImage at 0x616e6c90>
(basis functions)
We can look at the basis functions of the DCT by inverse transforming shifted impulses.
for i in range(1,6):
transformed = zeros(256)
transformed[i] = 1.0
signal = cv2.idct(transformed)
plot(signal)
# 2D inverse DCT of a shifted impulse
transformed = zeros((256,256))
transformed[3,8] = 1.0
signal = cv2.idct(transformed)
imshow(signal)
<matplotlib.image.AxesImage at 0x7e5f6690>
The "bears" image above has mostly smooth regions; let's pick an image with some edges for visualizing the results.
bridge = mean(imread("bridge.jpg"),axis=2)
bridge /= amax(bridge)
print bridge.shape
imshow(bridge)
(480, 640)
<matplotlib.image.AxesImage at 0x730f62d0>
# a patch and its DCT
patch = bridge[230:294,64:128].copy()
patch -= mean(patch)
dct = cv2.dct(patch)
subplot(121); imshow(patch,interpolation='nearest')
subplot(122); imshow(dct,interpolation='nearest')
<matplotlib.image.AxesImage at 0x7e61eb90>
Note that the coefficients are sparse: most coefficients are nearly zero.
We can set the near zero coefficients to zero without an appreciable loss in qualitiy.
from scipy import stats
threshold = stats.scoreatpercentile(abs(dct.ravel()),50)
threshold
0.027783365248010505
# removing 50 percent of the coefficients
threshold = stats.scoreatpercentile(abs(dct.ravel()),50)
thresholded = where(abs(dct)<threshold,0.0,dct)
print sum(dct!=0),sum(thresholded!=0)
subplot(121); imshow(dct,interpolation='nearest',cmap=cm.prism)
subplot(122); imshow(thresholded,interpolation='nearest',cmap=cm.prism)
4096 2048
<matplotlib.image.AxesImage at 0x7d377f90>
# original vs sparse coefficients
reverse = cv2.idct(dct)
reverse2 = cv2.idct(thresholded)
subplot(121); imshow(reverse,interpolation='nearest')
subplot(122); imshow(reverse2,interpolation='nearest')
<matplotlib.image.AxesImage at 0x79699b50>
# visualizing quality with sparse coefficients
figsize(8,8)
for i,perc in enumerate(exp(linspace(log(80),log(99.9),9))):
threshold = stats.scoreatpercentile(abs(dct.ravel()),perc)
thresholded = where(abs(dct)<threshold,0.0,dct)
subplot(3,3,i+1)
imshow(cv2.idct(thresholded),interpolation='nearest')
title("keeping %.1f%% (%d)"%(100-perc,thresholded.size-sum(thresholded==0.0)))
Real numbers don't compress well, so we need to convert the floating point coefficients to integers.
A simple way of doing this is to divide by a quantization coefficient and then round.
q = 0.05
qdct = array(floor((dct+q/2)/q),'i')
print qdct[:8,:8]
[[ 0 38 -32 6 8 -10 3 6] [ 12 17 -3 6 -8 2 -3 1] [ 14 13 5 8 -2 -6 5 -6] [ 13 4 5 -9 1 -4 8 8] [ 0 -3 -2 -8 3 3 -6 -1] [ 12 7 8 3 1 -4 3 -4] [ -2 -5 -2 -5 -2 0 3 -1] [ 2 14 12 -4 5 0 -1 -4]]
figsize(8,8)
# evaluating different quantization levels
for i,q in enumerate(exp(linspace(log(0.01),log(2.0),9))):
qdct = array(floor((dct+q/2)/q),'i')
nq = len(unique(qdct.ravel()))
rdct = qdct*q
subplot(3,3,i+1)
imshow(cv2.idct(rdct),interpolation='nearest')
title("q %.3f nq %d"%(q,nq))
# test image
h,w = Y.shape
imshow(Y)
<matplotlib.image.AxesImage at 0x77caff90>
# computing DCTs for all the patches
dcts = []
for y in range(0,h,8):
for x in range(0,w,8):
patch = Y[y:y+8,x:x+8]
dct = cv2.dct(patch)
dcts.append(dct)
(visualizing the patches)
Let's look at the DCTs. Notice how they are all very similar to each other.
# visualizing the patches
for i in range(0,9):
subplot(3,3,i+1)
imshow(abs(dcts[i*4])**.25,interpolation='nearest')
(coefficient variances)
We can see that the variances vary quite consistently, getting smaller the further we get away from the top left corner.
dcts = array(dcts)
vs = var(dcts,axis=0)
imshow(vs**.25,interpolation='nearest')
<matplotlib.image.AxesImage at 0x82466190>
(standard quantization matrices)
Based on such regularities, JPEG defines standard quantization matrices; each coefficient is divided by the corresponding quantization value, then converted to an integer (these quantization matrices are 255 times as large as in our example above).
axis("off"); imshow(imread("Figures/jpeg-quant.png"))
<matplotlib.image.AxesImage at 0x77b09b90>