using Pkg; Pkg.activate("../."); Pkg.instantiate();
using IJulia; try IJulia.clear_output(); catch _ end

using Distributions, Plots, LaTeXStrings
N = 100
generative_dist = MvNormal([0,1.], [0.8 0.5; 0.5 1.0])

D = rand(generative_dist, N) # Generate observations from generative_dist
scatter(D[1,:], D[2,:], marker=:x, markerstrokewidth=3, label=L"D")
x_dot = rand(generative_dist) # Generate x∙
scatter!([x_dot[1]], [x_dot[2]], label=L"x_\bullet")
plot!(range(0, 2), [1., 1., 1.], fillrange=2, alpha=0.4, color=:gray,label=L"S")


using Plots, Distributions, LaTeXStrings
d1 = Normal(0, 1) # μ=0, σ^2=1
d2 = Normal(3, 2) # μ=3, σ^2=4

# Calculate the parameters of the product d1*d2
s2_prod = (d1.σ^-2 + d2.σ^-2)^-1
m_prod = s2_prod * ((d1.σ^-2)*d1.μ + (d2.σ^-2)*d2.μ)
d_prod = Normal(m_prod, sqrt(s2_prod)) # Note that we neglect the normalization constant.

# Plot stuff
x = range(-4, stop=8, length=100)
plot(x, pdf.(d1,x), label=L"\mathcal{N}(0,1)", fill=(0, 0.1))                                   # Plot the first Gaussian
plot!(x, pdf.(d2,x), label=L"\mathcal{N}(3,4)", fill=(0, 0.1))                                  # Plot the second Gaussian
plot!(x, pdf.(d1,x) .* pdf.(d2,x), label=L"\mathcal{N}(0,1) \mathcal{N}(3,4)", fill=(0, 0.1))   # Plot the exact product
plot!(x, pdf.(d_prod,x), label=L"Z^{-1} \mathcal{N}(0,1) \mathcal{N}(3,4)", fill=(0, 0.1))      # Plot the normalized Gaussian product


using Plots, LaTeXStrings, Distributions

# Define the joint distribution p(x,y)
μ = [1.0; 2.0]
Σ = [0.3 0.7;
     0.7 2.0]
joint = MvNormal(μ,Σ)

# Define the marginal distribution p(x)
marginal_x = Normal(μ[1], sqrt(Σ[1,1]))

# Plot p(x,y)
x_range = y_range = range(-2,stop=5,length=1000)
joint_pdf = [ pdf(joint, [x_range[i];y_range[j]]) for  j=1:length(y_range), i=1:length(x_range)]
plot_1 = heatmap(x_range, y_range, joint_pdf, title = L"p(x, y)")

# Plot p(x)
plot_2 = plot(range(-2,stop=5,length=1000), pdf.(marginal_x, range(-2,stop=5,length=1000)), title = L"p(x)", label="", fill=(0, 0.1))

# Plot p(y|x = 0.1)
x = 0.1
conditional_y_m = μ[2]+Σ[2,1]*inv(Σ[1,1])*(x-μ[1])
conditional_y_s2 = Σ[2,2] - Σ[2,1]*inv(Σ[1,1])*Σ[1,2]
conditional_y = Normal(conditional_y_m, sqrt.(conditional_y_s2))
plot_3 = plot(range(-2,stop=5,length=1000), pdf.(conditional_y, range(-2,stop=5,length=1000)), title = L"p(y|x = %$x)", label="", fill=(0, 0.1))
plot(plot_1, plot_2, plot_3, layout=(1,3), size=(1200,300))


using Plots, Distributions

n = 100         # specify number of observations
θ = 2.0         # true value of the parameter we would like to estimate
noise_σ2 = 0.3  # variance of observation noise

observations = noise_σ2 * randn(n) .+ θ

function perform_kalman_step(prior :: Normal, x :: Float64, noise_σ2 :: Float64)
    K = prior.σ / (noise_σ2 + prior.σ)          # compute the Kalman gain
    posterior_μ = prior.μ + K*(x - prior.μ)     # update the posterior mean
    posterior_σ = prior.σ * (1.0 - K)           # update the posterior standard deviation
    return Normal(posterior_μ, posterior_σ)     # return the posterior distribution
end

post_μ = fill!(Vector{Float64}(undef,n + 1), NaN)     # means of p(θ|D) over time
post_σ2 = fill!(Vector{Float64}(undef,n + 1), NaN)    # variances of p(θ|D) over time

prior = Normal(0, 1)    # specify the prior distribution (you can play with the parameterization of this to get a feeling of how the Kalman filter converges)

post_μ[1] = prior.μ     # save prior mean and variance to show these in plot
post_σ2[1] = prior.σ

for (i, x) in enumerate(observations)                           # note that this loop demonstrates Bayesian learning on streaming data; we update the prior distribution using observation(s), after which this posterior becomes the new prior for future observations
    posterior = perform_kalman_step(prior, x, noise_σ2)         # compute the posterior distribution given the observation
    post_μ[i + 1] = posterior.μ                                 # save the mean of the posterior distribution
    post_σ2[i + 1] = posterior.σ                                # save the variance of the posterior distribution
    prior = posterior                                           # the posterior becomes the prior for future observations
end

obs_scale = collect(2:n+1)
scatter(obs_scale, observations, label=L"D", )  
post_scale = collect(1:n+1)                                                         # scatter the observations
plot!(post_scale, post_μ, ribbon=sqrt.(post_σ2), linewidth=3, label=L"p(θ | D_t)")  # lineplot our estimated means of intermediate posterior distributions
plot!(post_scale, θ*ones(n + 1), linewidth=2, label=L"θ")                           # plot the true value of θ


using Plots, Distributions, SpecialFunctions, LaTeXStrings
X = Normal(0,1)
Y = Normal(0,1)
pdf_product_std_normals(z::Vector) = (besselk.(0, abs.(z))./π)
range1 = collect(range(-4,stop=4,length=100))
plot(range1, pdf.(X, range1), label=L"p(X)=p(Y)=\mathcal{N}(0,1)", fill=(0, 0.1))
plot!(range1, pdf.(X,range1).*pdf.(Y,range1), label=L"p(X)*p(Y)", fill=(0, 0.1))
plot!(range1, pdf_product_std_normals(range1), label=L"p(Z=X*Y)", fill=(0, 0.1))

using HCubature, LinearAlgebra, Plots, Distributions# Numerical integration package
# Maximum likelihood estimation of 2D Gaussian
N = length(sum(D,dims=1))
μ = 1/N * sum(D,dims=2)[:,1]
D_min_μ = D - repeat(μ, 1, N)
Σ = Hermitian(1/N * D_min_μ*D_min_μ')
m = MvNormal(μ, convert(Matrix, Σ));

contour(range(-3, 4, length=100), range(-3, 4, length=100), (x, y) -> pdf(m, [x, y]))

# Numerical integration of p(x|m) over S:
(val,err) = hcubature((x)->pdf(m,x), [0., 1.], [2., 2.])
println("p(x⋅∈S|m) ≈ $(val)")

scatter!(D[1,:], D[2,:], marker=:x, markerstrokewidth=3, label=L"D")
scatter!([x_dot[1]], [x_dot[2]], label=L"x_\bullet")
plot!(range(0, 2), [1., 1., 1.], fillrange=2, alpha=0.4, color=:gray, label=L"S")

open("../../styles/aipstyle.html") do f
    display("text/html", read(f, String))
end