This notebook gives a brief instroduction to SPTK.jl, especially focused on mel-generalized cepstrum analysis.
PyPlot
in this notebook)WAV
package)using PyCall
matplotlib = pyimport("matplotlib")
PyDict(matplotlib["rcParams"])["figure.figsize"] = (12, 5)
using PyPlot
INFO: Loading help data... Warning: using PyPlot.matplotlib in module Main conflicts with an existing identifier.
using WAV
import SPTK
filepath = joinpath(Pkg.dir("SPTK"), "examples", "test16k.wav")
x, fs = wavread(filepath, format="native")
x = convert(Vector{Float64}, vec(x)) # monoral
fs = convert(Int, fs)
# Visualize the speech signal in time-domain
plot(1:endof(x), x, label="a speech signal")
xlim(1, endof(x))
xlabel("sample")
legend()
PyObject <matplotlib.legend.Legend object at 0x7f97df24f190>
# Pick a short segment
pos = 3000
fftlen = 1024
# Note that mel-generalized cepstrum analysis basically assumes window is power-normalized.
xw = x[pos+1:pos+fftlen] .* SPTK.blackman(fftlen)
plot(1:endof(xw), xw, linewidth="2", label="a windowed speech signal")
xlim(1, endof(xw))
xlabel("sample")
legend()
PyObject <matplotlib.legend.Legend object at 0x7f97bb3ac810>
# Plotting utility for visualizing spectral envelope estimate
function pplot(sp, envelope; title="envelope")
plot(sp, "b-", linewidth="2", label="Original log spectrum 20log|X(ω)|")
plot(20/log(10)*(envelope), "r-", linewidth="3", label=title)
xlim(1, length(sp))
xlabel("frequency bin")
ylabel("log amplitude")
legend()
end
pplot (generic function with 1 method)
# Compute spectrum 20log|X(ω)| for a windowed signal
sp = 20log10(abs(rfft(xw)));
# Linear Cepstrum
c = SPTK.mgcep(xw, 20, 0.0, 0.0)
pplot(sp, real(SPTK.mgc2sp(c, 0.0, 0.0, fftlen)), title="Linear frequency cepstrum based envelope")
PyObject <matplotlib.legend.Legend object at 0x7f97b87033d0>
# Mel-Cepstrum
mc = SPTK.mcep(xw, 20, 0.41)
pplot(sp, real(SPTK.mgc2sp(mc, 0.41, 0.0, fftlen)), title="Mel-cepstrum based envelope")
PyObject <matplotlib.legend.Legend object at 0x7f97b864b1d0>
# LPC Cepstrum
mgc = SPTK.mgcep(xw, 20, 0.0, -1.0)
pplot(sp, real(SPTK.mgc2sp(mgc, 0.0, -1.0, fftlen)), title="LPC cepstrum based envelope")
PyObject <matplotlib.legend.Legend object at 0x7f97b85918d0>
# Warped LPC
mgc = SPTK.mgcep(xw, 20, 0.41, -1.0)
pplot(sp, real(SPTK.mgc2sp(mgc, 0.41, -1.0, fftlen)), title="Warped LPC based envelope")
PyObject <matplotlib.legend.Legend object at 0x7f97b84d9fd0>
# Generalized Cepstrum
mgc = SPTK.gcep(xw, 20, -0.35)
pplot(sp, real(SPTK.mgc2sp(mgc, 0.0, -0.35, fftlen)), title="Generalized cepstrum based envelope")
PyObject <matplotlib.legend.Legend object at 0x7f97b83af6d0>
# Mel-Generalized Cepstrum
mgc = SPTK.mgcep(xw, 20, 0.41, -0.35)
pplot(sp, real(SPTK.mgc2sp(mgc, 0.41, -0.35, fftlen)), title="Mel-generalized cepstrum based envelope")
PyObject <matplotlib.legend.Legend object at 0x7f97b82f6d90>
Almost all of vec2vec transforms have its mat2mat (col-wise) functions. Demonstrations are below.
# Utilities for splitting a time sequence into overlapping frames
countframes(x::AbstractVector, framelen, hopsize) = div(length(x) - framelen, hopsize) + 1
function splitframes(x::AbstractVector, framelen=1024, hopsize=framelen>>1)
N = countframes(x, framelen, hopsize)
frames = Array(eltype(x), framelen, N)
@inbounds for i = 1:N
frames[:,i] = x[(i-1)*hopsize+1:(i-1)*hopsize+framelen]
end
frames
end
splitframes (generic function with 3 methods)
winlen = 1024
hopsize = winlen>>1
xw = splitframes(x, winlen, hopsize) .* SPTK.blackman(winlen);
@show size(xw)
size(xw) => (1024,117)
(1024,117)
# Let's see how spectral envelope estimates are changed with different order of mel-cepstrum
# Estimate spectral envelope by mel-cepstrum analysis where order = 20
logsp = real(SPTK.mgc2sp(SPTK.mcep(xw, 20, 0.41), 0.41, 0.0, winlen))
imshow(20/log(10)*logsp, origin="lower", aspect="auto")
colorbar()
PyObject <matplotlib.colorbar.Colorbar instance at 0x7f97b81dec68>
# order = 30
logsp = real(SPTK.mgc2sp(SPTK.mcep(xw, 30, 0.41), 0.41, 0.0, winlen))
imshow(20/log(10)*logsp, origin="lower", aspect="auto")
colorbar()
PyObject <matplotlib.colorbar.Colorbar instance at 0x7f97b809dc20>
# order = 40
logsp = real(SPTK.mgc2sp(SPTK.mcep(xw, 40, 0.41), 0.41, 0.0, winlen))
imshow(20/log(10)*logsp, origin="lower", aspect="auto")
colorbar()
PyObject <matplotlib.colorbar.Colorbar instance at 0x7f97b8008ef0>