using Glob, WAV, Serialization, TextAnalysis, SparseArrays, Peaks, AMD using Grep using DSP, LPVSpectral using AudioClustering, SpectralDistances const fs = 44100 # sample rate const minpeak = 1e-6 # the minimum power to cinsider in a spectrum function get_audio_words(path, load_from_disc=true) cd(path) files = glob("*.wav") labels0 = match.(r"[a-z_]+", files)..:match .|> String ulabels = unique(labels0) labels = sum((labels0 .== reshape(ulabels,1,:)) .* (1:30)', dims=2)[:] M = mel(fs, 512) if !load_from_disc # process and save all files models = mapsoundfiles(files) do sound sound = vec(sound[findfirst(!iszero, sound):findlast(!iszero, sound)]) sound = SpectralDistances.bp_filter(sound, (50/fs, 18000/fs)) P = welch_pgram(sound, 512, fs=fs) spectral_fingerprint(M,P) end; serialize("audiowords", models) else models = deserialize("audiowords") end labels,models end function spectral_fingerprint(M,P, l = 8) power = M*P.power # Transform to a Mel spectrum power ./= std(power) p,prom = peakprom(power, Maxima(), 7, minpeak) perm = sortperm(prom, rev=true) p = p[perm[1:min(l, length(p))]] (p, power[p]) end path = "/home/fredrikb/kokulbirds/test_padded_30birds/" labels,words = get_audio_words(path,true); n_terms = 128 # The number of terms in the dictionary is the number of mel-frequencies in the spectra n_docs = length(words) terms = collect(1:n_terms) # We enumerate the terms starting from 1 inner_dtm = spzeros(Int, n_docs, n_terms) for di in 1:n_docs # Populate the document term matrix for (ti, tc) in zip(words[di]...) inner_dtm[di,ti] = round(Int, log10.(tc/minpeak)) end end dtm = DocumentTermMatrix(inner_dtm,terms) ntopics = 60 # We choose 60 topics, even though we know there are 30 classes in the "corpus" iteration = 1000 α = 0.1/ntopics # Dirichlet dist. hyperparameter for topic distribution per document. `α<1` yields a sparse topic mixture for each document. `α>1` yields a more uniform topic mixture for each document. β = 0.01 # Dirichlet dist. hyperparameter for word distribution per topic. `β<1` yields a sparse word mixture for each topic. `β>1` yields a more uniform word mixture for each topic. ϕ, θ = lda(dtm, ntopics, iteration, α, β); heatmap(θ, xlabel="Audio sample", ylabel="Topic") function corr(x::AbstractMatrix) d = diag(x) y = copy(x) for i = 1:length(d), j = 1:length(d) y[i,j] /= sqrt(x[i,i]*x[j,j]) end y end topic_cov_by_words = Matrix(ϕ*ϕ') topic_cov_by_sample = Matrix(θ*θ') topic_corr_by_words = corr(topic_cov_by_words) topic_corr_by_sample = corr(topic_cov_by_sample) plot( heatmap(topic_corr_by_sample, title="Correlation based on sample similarities", titlefont=10), heatmap(topic_corr_by_words, title="Correlation based on word similarities", titlefont=10) ) ulabels = unique(labels) classvecs = map(ulabels) do label classinds = findall(labels .== label) vec(mean(θ[:,classinds], dims=2)) end classvecs = reduce(hcat,classvecs) # n_topics × n_class heatmap(classvecs',yticks=(1:length(ulabels), ulabels), xlabel="Topic", ylabel="Class", size=(400,400), color=:blues) function diagonalize(C, tol=:auto; permute_y=false, doplot=false) C = copy(C) amdmat = size(C,1) == size(C,2) ? copy(C) : C'C # amdmat = C'C if tol === :auto # Find tolerance automatically (adhoc) y = abs.(amdmat[:]) inds = sortperm(y) cum = cumsum(y[inds]) cum ./= cum[end] i = findfirst(c->c > 0.3, cum) tol = y[inds[i]] end doplot && (histogram(abs.(amdmat[:])) |> display) amdmat[abs.(amdmat) .< tol] .= 0 perm = amd(sparse(amdmat)) yperm = permute_y ? perm : 1:size(C,1) C[yperm,perm], perm, yperm end function plotcov(C, xvector, yvector; kwargs...) xticks = (1:length(xvector), xvector) yticks = (1:length(yvector), yvector) heatmap(C; xticks=xticks, yticks=yticks, xrotation=50, title="Class similarity", kwargs...) end classvecs_unit = mapslices(v-> v./norm(v), classvecs, dims=1) classcov = classvecs_unit'topic_corr_by_sample*classvecs_unit C, perm, yperm = diagonalize(classcov, permute_y=true) plotcov(C,ulabels[perm],ulabels[perm], yflip=true)