In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import PchipInterpolator

raw_data = np.loadtxt('out.csv', delimiter=',')
ms = sorted(set(raw_data.T[0]))
qs = sorted(set(raw_data.T[1]))
M_interp = 16
Ms = np.linspace(min(ms), max(ms), (len(ms) - 1)*M_interp + 1)

def aggregate(data, drop=None, label=None):
    pixels = data.T[5]*data.T[6]*3//2
    for Q in qs:
        rate = []
        psnr = []
        de = []
        for M in ms:
            idx = (data.T[0] == M) & (data.T[1] == Q) 
            if drop is not None and label is not None:
                idx = idx & (label != drop)
            px_idx = pixels[idx]
            px_sum = px_idx.sum()
            rate.append(data[idx,2].sum()*8/px_sum)
            psnr.append((data[idx,3] * px_idx).sum()/px_sum)
            de.append((data[idx,4] * px_idx).sum()/px_sum)
        # Interpolate the results
        rate_ = np.poly1d(np.polyfit(ms, np.log(rate), 3))
        psnr_ = np.poly1d(np.polyfit(ms, psnr, 3))
        de_ = np.poly1d(np.polyfit(ms, de, 3))
        for M in Ms:
            yield (M, Q, np.exp(rate_(M)), psnr_(M), de_(M))

data = np.array([v for v in aggregate(raw_data)])

Quality vs bitrate (color vs brightness)

Analysis by David Michael Barr

CIEDE2000 is a perceptually uniform measure of color difference.

PSNR is an essential measure of error, in this case restricted to differences in brightness.

In the following plots, we compare the trends in quality vs bitrate for various allocations of color detail.

In [2]:
def plot_rd(D, N, C):
    X = data.T[2][data.T[0] == N]
    Y = data.T[D][data.T[0] == N]
    Y_ = np.linspace(Y.min(), Y.max(), 1000)
    X_ = np.exp(PchipInterpolator(Y[::-1], np.log(X[::-1]))(Y_))
    plt.semilogx(X_, Y_, color=C, lw=.5)

plt.figure(figsize=(16, 8))
plt.subplot(121)
plt.title('subset3')
plt.xlabel('bits per pixel')
plt.ylabel('CIEDE2000')
for N in range(40, 101):
    plot_rd(4, N, plt.cm.gray(N*3-100))
plot_rd(4, 68, 'r')
plt.xlim([data.T[2].min(), data.T[2].max()])
plt.ylim([data.T[4].min(), data.T[4].max()]);

plt.subplot(122)
plt.title('subset3')
plt.xlabel('bits per pixel')
plt.ylabel('PSNR')
for N in range(40, 101):
    plot_rd(3, N, plt.cm.gray(N*3-100))
plot_rd(3, 68, 'r')
plt.xlim([data.T[2].min(), data.T[2].max()])
plt.ylim([data.T[3].min(), data.T[3].max()]);

We found the balance of color detail that maximized perceived color matching but that ended up significantly harming details in brightness for small improvements in color accuracy. So we traded the marginal gains in color accuracy for greater losses in brightness detail by stepping back to the point where there is an even trade between the two.

In [3]:
def get_bdr(D):
    top =  min([data.T[D][data.T[0] == N].max() for N in Ms])
    bottom = max([data.T[D][data.T[0] == N].min() for N in Ms])

    def interp(N):
        Y = np.log(data.T[2][data.T[0] == N])
        X = data.T[D][data.T[0] == N]
        pchip = PchipInterpolator(X[::-1], Y[::-1], extrapolate=True)
        return pchip.integrate(bottom, top) / (top - bottom)

    base = interp(96)
    return [np.exp(interp(N) - base) - 1 for N in Ms]
    
de_bdr = get_bdr(4)
de_bdr_ = np.polyfit(x=Ms, y=de_bdr, deg=3)
de_bdr__ = np.polyder(de_bdr_)
psnr_bdr = get_bdr(3)
psnr_bdr_ = np.polyfit(x=Ms, y=psnr_bdr, deg=3)
psnr_bdr__ = np.polyder(psnr_bdr_)

best = np.roots(de_bdr__+psnr_bdr__)[0]

plt.figure(figsize=(16, 9))
plt.title('subset3')
plt.xlabel('chroma quantizer gradient Q6')
plt.ylabel('BD-rate')
# Q8 to Q6 (/4), since we sampled with a step of 4.
plt.scatter(Ms[::M_interp]/4, de_bdr[::M_interp], s=3)
plt.scatter(Ms[::M_interp]/4, psnr_bdr[::M_interp], s=3)
plt.plot(Ms/4, np.poly1d(de_bdr_)(Ms), label='CIEDE2000')
plt.plot(Ms/4, np.poly1d(psnr_bdr_)(Ms), label='PSNR')
plt.vlines(x=best/4, ymax=max(de_bdr), ymin=min(psnr_bdr), label='best - '+str(int(round(best/4))))
plt.xlim([10, 25])
plt.ylim([min(psnr_bdr), max(de_bdr)])
plt.legend();
np.poly1d(de_bdr_)(best), np.poly1d(psnr_bdr_)(best), best*16
Out[3]:
(0.028925745693066246, -0.056701091670904945, 1089.013519825164)

We repeated the analysis many times with slightly different collections of images to show the typical balance point and how much it varied according to which images were tested.

In [4]:
best = []
raw_label = raw_data.T[7].astype(int) - 1
label_map = np.arange(raw_label.max() + 1) % 4
import multiprocessing

def optimize(drop):
    data = np.array([v for v in aggregate(raw_data, drop, label)])

    def get_bdr(D):
        top =  min([data.T[D][data.T[0] == N].max() for N in Ms])
        bottom = max([data.T[D][data.T[0] == N].min() for N in Ms])

        def interp(N):
            Y = np.log(data.T[2][data.T[0] == N])
            X = data.T[D][data.T[0] == N]
            pchip = PchipInterpolator(X[::-1], Y[::-1], extrapolate=True)
            return pchip.integrate(bottom, top) / (top - bottom)

        base = interp(96)
        return [np.exp(interp(N) - base) - 1 for N in Ms]

    de_bdr = get_bdr(4)
    de_bdr_ = np.polyfit(x=Ms, y=de_bdr, deg=3)
    de_bdr__ = np.polyder(de_bdr_)
    psnr_bdr = get_bdr(3)
    psnr_bdr_ = np.polyfit(x=Ms, y=psnr_bdr, deg=3)
    psnr_bdr__ = np.polyder(psnr_bdr_)

    return np.roots(de_bdr__+psnr_bdr__)[0]

for drop_outter in range(25):
    np.random.shuffle(label_map)
    label = label_map[raw_label]
    pool = multiprocessing.Pool(4)
    best.extend(pool.map(optimize, range(4)))
    pool.close()
    pool.join()

# Q8 to Q6 (/4), since we sampled with a step of 4.
best = np.array(sorted(set(best)))/4
best
Out[4]:
array([16.78941863, 16.82228216, 16.82677867, 16.82886581, 16.84820511,
       16.85198117, 16.87583535, 16.88311918, 16.883748  , 16.88789836,
       16.89421253, 16.90329681, 16.90770884, 16.90985357, 16.91233044,
       16.91346879, 16.92122706, 16.9228993 , 16.92693143, 16.94032385,
       16.94451887, 16.95208867, 16.95286411, 16.95416093, 16.95793928,
       16.96025926, 16.96064708, 16.96136425, 16.96146212, 16.96318064,
       16.96333769, 16.96428518, 16.9669432 , 16.97506318, 16.9779565 ,
       16.97847716, 16.98119767, 16.98865472, 16.9899663 , 16.99040495,
       16.99945894, 17.00128212, 17.00203656, 17.00225844, 17.00835357,
       17.01546248, 17.01648847, 17.01915164, 17.01925646, 17.01983071,
       17.02041394, 17.02093636, 17.0236123 , 17.03076026, 17.03269324,
       17.0373965 , 17.03809949, 17.04001395, 17.04006294, 17.0417615 ,
       17.04188977, 17.04350556, 17.04393986, 17.04464277, 17.04723339,
       17.05164613, 17.05226331, 17.05237272, 17.05270346, 17.05417128,
       17.05493585, 17.06163489, 17.06455797, 17.06526704, 17.06740377,
       17.07398739, 17.07777224, 17.07804934, 17.08407846, 17.08858505,
       17.09067318, 17.09478911, 17.09811564, 17.10522728, 17.11780949,
       17.1181685 , 17.12137934, 17.12411801, 17.12609942, 17.13804293,
       17.13840306, 17.15795359, 17.18132875, 17.18293961, 17.18660995,
       17.18934696, 17.19003433, 17.19308542, 17.20618555, 17.21195041])
In [5]:
import seaborn as sns
from scipy.stats import mstats
from scipy.stats import norm
best_ = mstats.winsorize(best,(.05,.05))
plt.figure(figsize=(16, 9))
sns.distplot(best_, rug=True, hist=False, fit=norm, kde=True);
best_.mean(), best_.std()
Out[5]:
(17.016841899645836, 0.08920247608042965)