using PyCall matplotlib = pyimport("matplotlib") PyDict(matplotlib["rcParams"])["figure.figsize"] = (12, 5) using PyPlot using WAV import SPTK filepath = joinpath(Pkg.dir("SPTK"), "examples", "test16k.wav") x, fs = wavread(filepath, format="native") x = convert(Vector{Float64}, vec(x)) # monoral fs = convert(Int, fs) # Visualize the speech signal in time-domain plot(1:endof(x), x, label="a speech signal") xlim(1, endof(x)) xlabel("sample") legend() # Pick a short segment pos = 3000 fftlen = 1024 # Note that mel-generalized cepstrum analysis basically assumes window is power-normalized. xw = x[pos+1:pos+fftlen] .* SPTK.blackman(fftlen) plot(1:endof(xw), xw, linewidth="2", label="a windowed speech signal") xlim(1, endof(xw)) xlabel("sample") legend() # Plotting utility for visualizing spectral envelope estimate function pplot(sp, envelope; title="envelope") plot(sp, "b-", linewidth="2", label="Original log spectrum 20log|X(ω)|") plot(20/log(10)*(envelope), "r-", linewidth="3", label=title) xlim(1, length(sp)) xlabel("frequency bin") ylabel("log amplitude") legend() end # Compute spectrum 20log|X(ω)| for a windowed signal sp = 20log10(abs(rfft(xw))); # Linear Cepstrum c = SPTK.mgcep(xw, 20, 0.0, 0.0) pplot(sp, real(SPTK.mgc2sp(c, 0.0, 0.0, fftlen)), title="Linear frequency cepstrum based envelope") # Mel-Cepstrum mc = SPTK.mcep(xw, 20, 0.41) pplot(sp, real(SPTK.mgc2sp(mc, 0.41, 0.0, fftlen)), title="Mel-cepstrum based envelope") # LPC Cepstrum mgc = SPTK.mgcep(xw, 20, 0.0, -1.0) pplot(sp, real(SPTK.mgc2sp(mgc, 0.0, -1.0, fftlen)), title="LPC cepstrum based envelope") # Warped LPC mgc = SPTK.mgcep(xw, 20, 0.41, -1.0) pplot(sp, real(SPTK.mgc2sp(mgc, 0.41, -1.0, fftlen)), title="Warped LPC based envelope") # Generalized Cepstrum mgc = SPTK.gcep(xw, 20, -0.35) pplot(sp, real(SPTK.mgc2sp(mgc, 0.0, -0.35, fftlen)), title="Generalized cepstrum based envelope") # Mel-Generalized Cepstrum mgc = SPTK.mgcep(xw, 20, 0.41, -0.35) pplot(sp, real(SPTK.mgc2sp(mgc, 0.41, -0.35, fftlen)), title="Mel-generalized cepstrum based envelope") # Utilities for splitting a time sequence into overlapping frames countframes(x::AbstractVector, framelen, hopsize) = div(length(x) - framelen, hopsize) + 1 function splitframes(x::AbstractVector, framelen=1024, hopsize=framelen>>1) N = countframes(x, framelen, hopsize) frames = Array(eltype(x), framelen, N) @inbounds for i = 1:N frames[:,i] = x[(i-1)*hopsize+1:(i-1)*hopsize+framelen] end frames end winlen = 1024 hopsize = winlen>>1 xw = splitframes(x, winlen, hopsize) .* SPTK.blackman(winlen); @show size(xw) # Let's see how spectral envelope estimates are changed with different order of mel-cepstrum # Estimate spectral envelope by mel-cepstrum analysis where order = 20 logsp = real(SPTK.mgc2sp(SPTK.mcep(xw, 20, 0.41), 0.41, 0.0, winlen)) imshow(20/log(10)*logsp, origin="lower", aspect="auto") colorbar() # order = 30 logsp = real(SPTK.mgc2sp(SPTK.mcep(xw, 30, 0.41), 0.41, 0.0, winlen)) imshow(20/log(10)*logsp, origin="lower", aspect="auto") colorbar() # order = 40 logsp = real(SPTK.mgc2sp(SPTK.mcep(xw, 40, 0.41), 0.41, 0.0, winlen)) imshow(20/log(10)*logsp, origin="lower", aspect="auto") colorbar()