using Glob, WAV, Serialization, TextAnalysis, SparseArrays, Peaks, AMD
using Grep
using DSP, LPVSpectral
using AudioClustering, SpectralDistances

const fs = 44100 # sample rate
const minpeak = 1e-6 # the minimum power to cinsider in a spectrum

function get_audio_words(path, load_from_disc=true)

    cd(path)
    files = glob("*.wav")
    labels0 = match.(r"[a-z_]+", files)..:match .|> String
    ulabels = unique(labels0)
    labels = sum((labels0 .== reshape(ulabels,1,:)) .* (1:30)', dims=2)[:]
    M = mel(fs, 512)
    if !load_from_disc # process and save all files
        models = mapsoundfiles(files) do sound
            sound = vec(sound[findfirst(!iszero, sound):findlast(!iszero, sound)])
            sound = SpectralDistances.bp_filter(sound, (50/fs, 18000/fs))
            P = welch_pgram(sound, 512, fs=fs)
            spectral_fingerprint(M,P)
        end;
        serialize("audiowords", models)
    else
        models = deserialize("audiowords")
    end

    labels,models
end

function spectral_fingerprint(M,P, l = 8)
    power = M*P.power     # Transform to a Mel spectrum
    power ./= std(power)
    p,prom = peakprom(power, Maxima(), 7, minpeak)
    perm = sortperm(prom, rev=true)

    p = p[perm[1:min(l, length(p))]]
    (p, power[p])
end

path = "/home/fredrikb/kokulbirds/test_padded_30birds/"
labels,words = get_audio_words(path,true);

n_terms = 128 # The number of terms in the dictionary is the number of mel-frequencies in the spectra
n_docs = length(words)
terms = collect(1:n_terms) # We enumerate the terms starting from 1
inner_dtm = spzeros(Int, n_docs, n_terms)

for di in 1:n_docs     # Populate the document term matrix
    for (ti, tc) in zip(words[di]...)
        inner_dtm[di,ti] = round(Int, log10.(tc/minpeak))
    end
end

dtm = DocumentTermMatrix(inner_dtm,terms)

ntopics   = 60      # We choose 60 topics, even though we know there are 30 classes in the "corpus"
iteration = 1000
α = 0.1/ntopics # Dirichlet dist. hyperparameter for topic distribution per document. `α<1` yields a sparse topic mixture for each document. `α>1` yields a more uniform topic mixture for each document.
β = 0.01 # Dirichlet dist. hyperparameter for word distribution per topic. `β<1` yields a sparse word mixture for each topic. `β>1` yields a more uniform word mixture for each topic.

ϕ, θ = lda(dtm, ntopics, iteration, α, β);

heatmap(θ, xlabel="Audio sample", ylabel="Topic")

function corr(x::AbstractMatrix)
    d = diag(x)
    y = copy(x)
    for i = 1:length(d), j = 1:length(d)
        y[i,j] /= sqrt(x[i,i]*x[j,j])
    end
    y
end
topic_cov_by_words   = Matrix(ϕ*ϕ')
topic_cov_by_sample  = Matrix(θ*θ')
topic_corr_by_words  = corr(topic_cov_by_words)
topic_corr_by_sample = corr(topic_cov_by_sample)

plot(
    heatmap(topic_corr_by_sample, title="Correlation based on sample similarities", titlefont=10),
    heatmap(topic_corr_by_words, title="Correlation based on word similarities", titlefont=10)
)

ulabels = unique(labels)
classvecs = map(ulabels) do label
    classinds = findall(labels .== label)
    vec(mean(θ[:,classinds], dims=2))
end
classvecs = reduce(hcat,classvecs) # n_topics × n_class
heatmap(classvecs',yticks=(1:length(ulabels), ulabels), xlabel="Topic", ylabel="Class", size=(400,400), color=:blues)

function diagonalize(C, tol=:auto; permute_y=false, doplot=false)
    C = copy(C)
    amdmat = size(C,1) == size(C,2) ? copy(C) : C'C
    # amdmat = C'C
    if tol === :auto # Find tolerance automatically (adhoc)
        y    = abs.(amdmat[:])
        inds = sortperm(y)
        cum  = cumsum(y[inds])
        cum ./= cum[end]
        i    = findfirst(c->c > 0.3, cum)
        tol  = y[inds[i]]
    end
    doplot && (histogram(abs.(amdmat[:])) |> display)
    amdmat[abs.(amdmat) .< tol] .= 0
    perm  = amd(sparse(amdmat))
    yperm = permute_y ? perm : 1:size(C,1)
    C[yperm,perm], perm, yperm
end
function plotcov(C, xvector, yvector; kwargs...)
    xticks = (1:length(xvector), xvector)
    yticks = (1:length(yvector), yvector)
    heatmap(C; xticks=xticks, yticks=yticks, xrotation=50, title="Class similarity", kwargs...)
end

classvecs_unit = mapslices(v-> v./norm(v), classvecs, dims=1)
classcov = classvecs_unit'topic_corr_by_sample*classvecs_unit

C, perm, yperm = diagonalize(classcov, permute_y=true)
plotcov(C,ulabels[perm],ulabels[perm], yflip=true)