#!/usr/bin/env python
# coding: utf-8

# ## ThinkDSP
# 
# This notebook contains solutions to exercises in Chapter 6: Discrete Cosine Transform
# 
# Copyright 2015 Allen Downey
# 
# License: [Creative Commons Attribution 4.0 International](http://creativecommons.org/licenses/by/4.0/)

# In[1]:


# Get thinkdsp.py

import os

if not os.path.exists('thinkdsp.py'):
    get_ipython().system('wget https://github.com/AllenDowney/ThinkDSP/raw/master/code/thinkdsp.py')


# In[2]:


import numpy as np
import matplotlib.pyplot as plt

from thinkdsp import decorate


# ## Exercise 1
# 
# In this chapter I claim that `analyze1` takes time proportional
# to $n^3$ and `analyze2` takes time proportional to $n^2$.  To
# see if that's true, run them on a range of input sizes and time
# them.  In IPython, you can use the magic command `%timeit`.
# 
# If you plot run time versus input size on a log-log scale, you
# should get a straight line with slope 3 for  `analyze1` and
# slope 2 for `analyze2`.  You also might want to test `dct_iv`
# and `scipy.fftpack.dct`.
# 
# I'll start with a noise signal and an array of power-of-two sizes

# In[3]:


from thinkdsp import UncorrelatedGaussianNoise

signal = UncorrelatedGaussianNoise()
noise = signal.make_wave(duration=1.0, framerate=16384)
noise.ys.shape


# The following function takes an array of results from a timing experiment, plots the results, and fits a straight line.

# In[4]:


from scipy.stats import linregress

loglog = dict(xscale='log', yscale='log')

def plot_bests(ns, bests):    
    plt.plot(ns, bests)
    decorate(**loglog)
    
    x = np.log(ns)
    y = np.log(bests)
    t = linregress(x,y)
    slope = t[0]

    return slope


# In[5]:


PI2 = np.pi * 2

def analyze1(ys, fs, ts):
    """Analyze a mixture of cosines and return amplitudes.

    Works for the general case where M is not orthogonal.

    ys: wave array
    fs: frequencies in Hz
    ts: times where the signal was evaluated    

    returns: vector of amplitudes
    """
    args = np.outer(ts, fs)
    M = np.cos(PI2 * args)
    amps = np.linalg.solve(M, ys)
    return amps


# In[6]:


def run_speed_test(ns, func):
    results = []
    for N in ns:
        print(N)
        ts = (0.5 + np.arange(N)) / N
        freqs = (0.5 + np.arange(N)) / 2
        ys = noise.ys[:N]
        result = get_ipython().run_line_magic('timeit', '-r1 -o func(ys, freqs, ts)')
        results.append(result)
        
    bests = [result.best for result in results]
    return bests


# Here are the results for `analyze1`.

# In[7]:


ns = 2 ** np.arange(6, 13)
ns


# In[8]:


bests = run_speed_test(ns, analyze1)
plot_bests(ns, bests)


# The estimated slope is close to 2, not 3, as expected.  One possibility is that the performance of `np.linalg.solve` is nearly quadratic in this range of array sizes.
# 
# Here are the results for `analyze2`:

# In[9]:


def analyze2(ys, fs, ts):
    """Analyze a mixture of cosines and return amplitudes.

    Assumes that fs and ts are chosen so that M is orthogonal.

    ys: wave array
    fs: frequencies in Hz
    ts: times where the signal was evaluated    

    returns: vector of amplitudes
    """
    args = np.outer(ts, fs)
    M = np.cos(PI2 * args)
    amps = np.dot(M, ys) / 2
    return amps


# In[10]:


bests2 = run_speed_test(ns, analyze2)
plot_bests(ns, bests2)


# The results for `analyze2` fall in a straight line with the estimated slope close to 2, as expected.
# 
# Here are the results for the `scipy.fftpack.dct`

# In[11]:


import scipy.fftpack

def scipy_dct(ys, freqs, ts):
    return scipy.fftpack.dct(ys, type=3)


# In[12]:


bests3 = run_speed_test(ns, scipy_dct)
plot_bests(ns, bests3)


# This implementation of dct is even faster.  The line is curved, which means either we haven't seen the asymptotic behavior yet, or the asymptotic behavior is not a simple exponent of $n$.  In fact, as we'll see soon, the run time is proportional to $n \log n$.
# 
# The following figure shows all three curves on the same axes.

# In[13]:


plt.plot(ns, bests, label='analyze1')
plt.plot(ns, bests2, label='analyze2')
plt.plot(ns, bests3, label='fftpack.dct')
decorate(xlabel='Wave length (N)', ylabel='Time (s)', **loglog)


# ## Exercise 2
# 
# One of the major applications of the DCT is compression for both sound and images. In its simplest form, DCT-based compression works like this:
# 
# 1. Break a long signal into segments.
# 2. Compute the DCT of each segment.
# 3. Identify frequency components with amplitudes so low they are inaudible, and remove them. Store only the frequencies and amplitudes that remain.
# 4. To play back the signal, load the frequencies and amplitudes for each segment and apply the inverse DCT.
# 
# Implement a version of this algorithm and apply it to a recording of music or speech. How many components can you eliminate before the difference is perceptible?

# `thinkdsp` provides a class, `Dct` that is similar to a `Spectrum`, but which uses DCT instead of FFT.

# As an example, I'll use a recording of a saxophone:

# In[14]:


if not os.path.exists('100475__iluppai__saxophone-weep.wav'):
    get_ipython().system('wget https://github.com/AllenDowney/ThinkDSP/raw/master/code/100475__iluppai__saxophone-weep.wav')


# In[15]:


from thinkdsp import read_wave

wave = read_wave('100475__iluppai__saxophone-weep.wav')
wave.make_audio()


# Here's a short segment:

# In[16]:


segment = wave.segment(start=1.2, duration=0.5)
segment.normalize()
segment.make_audio()


# And here's the DCT of that segment:

# In[17]:


seg_dct = segment.make_dct()
seg_dct.plot(high=4000)
decorate(xlabel='Frequency (Hz)', ylabel='DCT')


# There are only a few harmonics with substantial amplitude, and many entries near zero.
# 
# The following function takes a DCT and sets elements below `thresh` to 0.

# In[18]:


def compress(dct, thresh=1):
    count = 0
    for i, amp in enumerate(dct.amps):
        if np.abs(amp) < thresh:
            dct.hs[i] = 0
            count += 1
            
    n = len(dct.amps)
    print(count, n, 100 * count / n, sep='\t')


# If we apply it to the segment, we can eliminate more than 90% of the elements:

# In[19]:


seg_dct = segment.make_dct()
compress(seg_dct, thresh=10)
seg_dct.plot(high=4000)


# And the result sounds the same (at least to me):

# In[20]:


seg2 = seg_dct.make_wave()
seg2.make_audio()


# To compress a longer segment, we can make a DCT spectrogram.  The following function is similar to `wave.make_spectrogram` except that it uses the DCT.

# In[21]:


from thinkdsp import Spectrogram

def make_dct_spectrogram(wave, seg_length):
    """Computes the DCT spectrogram of the wave.

    seg_length: number of samples in each segment

    returns: Spectrogram
    """
    window = np.hamming(seg_length)
    i, j = 0, seg_length
    step = seg_length // 2

    # map from time to Spectrum
    spec_map = {}

    while j < len(wave.ys):
        segment = wave.slice(i, j)
        segment.window(window)

        # the nominal time for this segment is the midpoint
        t = (segment.start + segment.end) / 2
        spec_map[t] = segment.make_dct()

        i += step
        j += step

    return Spectrogram(spec_map, seg_length)


# Now we can make a DCT spectrogram and apply `compress` to each segment:

# In[22]:


spectro = make_dct_spectrogram(wave, seg_length=1024)
for t, dct in sorted(spectro.spec_map.items()):
    compress(dct, thresh=0.2)


# In most segments, the compression is 75-85%.
# 
# To hear what it sounds like, we can convert the spectrogram back to a wave and play it.

# In[23]:


wave2 = spectro.make_wave()
wave2.make_audio()


# And here's the original again for comparison.

# In[24]:


wave.make_audio()


# As an experiment, you might try increasing `thresh` to see when the effect of compression becomes audible (to you).
# 
# Also, you might try compressing a signal with some noisy elements, like cymbals.

# In[ ]: