using PyCall
matplotlib = pyimport("matplotlib")
PyDict(matplotlib["rcParams"])["figure.figsize"] = (12, 5)
using PyPlot

# https://gist.github.com/jfsantos/a39ed69a7894876f1e04#file-audiodisplay-jl
# Thanks, @jfsantos
include("AudioDisplay.jl")

using WAV
using DSP
using MelGeneralizedCepstrums # to esimate spectral envelope parameters
using SynthesisFilters

# plotting utilities
function wavplot(x; label="a waveform", x_label="sample")
    plot(1:endof(x), x, "b", label=label)
    xlim(1, endof(x))
    xlabel(x_label)
    legend()
end

function wavcompare(x, y; label="synthesized waveform", x_label="sample")
    plot(1:endof(y), y, "r-+", label=label)
    plot(1:endof(x), x, label="original speech signal")
    xlim(1, endof(x))
    xlabel(x_label)
    legend()
end

x, fs = wavread(joinpath(dirname(@__FILE__), "data", "test16k.wav"), format="native")
x = convert(Vector{Float64}, vec(x))
fs = convert(Int, fs)
wavplot(x)
inline_audioplayer(map(Int16, x), fs)

# Note about excitation
# fs: 16000
# frame period: 5.0 ms
# F0 analysis: esimated by WORLD.dio and WORLD.stonemask

# Excitation signal: perioic pulse for voiced segments and gaussian random values for un-voiced segments
# base_excitation = vec(readdlm(joinpath(dirname(@__FILE__), "data", "test16k_excitation.txt")))

# Mixed excitation generated using WORLD.jl
base_excitation = vec(readdlm(joinpath(dirname(@__FILE__), "data", "test16k_world_excitation.txt")))
wavplot(base_excitation, label="mixed excitation")
inline_audioplayer(base_excitation ./ maximum(base_excitation), fs)

framelen = 512
hopsize = 80 # 5.0 ms for fs 16000
noverlap = framelen - hopsize

# Note that mgcep analysis basically assumes power-normalized window so that Σₙ w(n)² = 1
win = DSP.blackman(framelen) ./ sqrt(sumabs2(DSP.blackman(framelen)))
@assert isapprox(sumabs2(win), 1.0)

# create windowed signal matrix that each column represents a windowed time slice
as = arraysplit(x, framelen, noverlap)
xw = Array(Float64, framelen, length(as))
for t=1:length(as)
    xw[:,t] = as[t]
end

# col-wise windowing
xw .*= win;
@show size(xw)

c = estimate(MelCepstrum(20, mcepalpha(fs)), xw)
imshow(c, origin="lower", aspect="auto")
colorbar()

# Let's see spectral envelope estimate
imshow(real(mgc2sp(c, framelen)), origin="lower", aspect="auto")
colorbar()

c = estimate(LinearCepstrum(25), xw)
y = synthesis(base_excitation, c, hopsize)
wavcompare(x, y, label="Cepstrum-based synthesized waveform")
inline_audioplayer(round(Int16, clamp(y, typemin(Int16), typemax(Int16))), fs)

c = estimate(MelCepstrum(25, mcepalpha(fs)), xw)
y = synthesis(base_excitation, c, hopsize)
wavcompare(x, y, label="Mel-cepstrum-based synthesized waveform")
inline_audioplayer(round(Int16, clamp(y, typemin(Int16), typemax(Int16))), fs)

c = estimate(MelGeneralizedCepstrum(25, mcepalpha(fs), -1/4), xw)
y = synthesis(base_excitation, c, hopsize)
wavcompare(x, y, label="Mel-generalized cepstrum based synthesized waveform")
inline_audioplayer(round(Int16, clamp(y, typemin(Int16), typemax(Int16))), fs)

l = estimate(LinearPredictionCoef(25), xw, use_mgcep=true)
y = synthesis(base_excitation, l, hopsize)
wavcompare(x, y, label="LPC-based synthesized waveform")
inline_audioplayer(round(Int16, clamp(y, typemin(Int16), typemax(Int16))), fs)

l = lpc2par(estimate(LinearPredictionCoef(25), xw))
y = synthesis(base_excitation, l, hopsize)
wavcompare(x, y, label="PARCOR-based synthesized waveform")
inline_audioplayer(round(Int16, clamp(y, typemin(Int16), typemax(Int16))), fs)

l = lpc2lsp(estimate(LinearPredictionCoef(15), xw))
y = synthesis(base_excitation, l, hopsize)
wavcompare(x, y, label="LSP-based synthesized waveform")
inline_audioplayer(round(Int16, clamp(y, typemin(Int16), typemax(Int16))), fs)