# imports and set up plots using Distributions using LinearAlgebra: diag using Random using GaussianProcesses import Statistics import PyPlot; plt=PyPlot using LaTeXStrings cbbPalette = ["#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"]; """ The true function we will be simulating from. """ function fstar(x::Float64) return abs(x-5)*cos(2*x) end σy = 10.0 n=5000 Random.seed!(1) # for reproducibility Xdistr = Beta(7,7) ϵdistr = Normal(0,σy) x = rand(Xdistr, n)*10 X = Matrix(x') Y = fstar.(x) .+ rand(ϵdistr,n) ; xx = range(0, stop=10, length=200) plt.plot(x, Y, ".", color=cbbPalette[1], label="simulated data", alpha=0.1) plt.plot(xx, fstar.(xx), color=cbbPalette[2], label=L"f^\star(X)", linewidth=2) plt.xlabel(L"X") plt.ylabel(L"Y") plt.title("Simulated Data") plt.legend(loc="lower center", fontsize="small") ; k = SEIso(log(0.3), log(5.0)) GPE(X, Y, MeanConst(mean(Y)), k, log(σy)) # warm-up # time the second run (so the time doesn't include compilation): @time gp_full = GPE(X, Y, MeanConst(mean(Y)), k, log(σy)) # extract predictions predict_f(gp_full, xx; full_cov=true) # warm-up @time μ_exact, Σ_exact = predict_f(gp_full, xx; full_cov=true) ; xx = range(0, stop=10, length=200) plt.plot(x,Y, ".", color="black", markeredgecolor="None", alpha=0.1, label="simulated data") plt.plot(xx, fstar.(xx), color=cbbPalette[2], label=L"f^\star(X)", linewidth=2) plt.plot(xx, μ_exact, color=cbbPalette[3], label=L"$f(x) \mid Y, \hat\theta$") plt.fill_between(xx, μ_exact.-sqrt.(diag(Σ_exact)), μ_exact.+sqrt.(diag(Σ_exact)), color=cbbPalette[3], alpha=0.5) plt.xlabel(L"X") plt.ylabel(L"Y") plt.title("Exact GP") plt.legend(loc="lower center", fontsize="small") plt.ylim(-7.5,7.5) ; Xu = Matrix(quantile(x, [0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.98])') gp_SOR = GaussianProcesses.SoR(X, Xu, Y, MeanConst(mean(Y)), k, log(σy)); @time gp_SOR = GaussianProcesses.SoR(X, Xu, Y, MeanConst(mean(Y)), k, log(σy)); predict_f(gp_SOR, xx; full_cov=true) @time predict_f(gp_SOR, xx; full_cov=true); function plot_approximation(gp_exact, gp_approx, approx_label) μ_exact, Σ_exact = predict_f(gp_full, xx; full_cov=true) μapprox, Σapprox = predict_f(gp_approx, xx; full_cov=true) plt.plot(x,Y, ".", color="black", markeredgecolor="None", alpha=0.1, label="simulated data") plt.plot(xx, fstar.(xx), color=cbbPalette[2], label=L"f^\star(X)", linewidth=2) plt.plot(xx, μ_exact, color=cbbPalette[3], label=L"$f(x) \mid Y, \hat\theta$") plt.fill_between(xx, μ_exact.-sqrt.(diag(Σ_exact)), μ_exact.+sqrt.(diag(Σ_exact)), color=cbbPalette[3], alpha=0.3) plt.plot(xx, μapprox, color=cbbPalette[6], label=L"$f(x) \mid Y, \hat\theta$"*approx_label) y_err = sqrt.(diag(Σapprox)) plt.fill_between(xx, μapprox.-y_err, μapprox.+y_err, color=cbbPalette[6], alpha=0.5) plt.xlabel(L"X") plt.ylabel(L"Y") plt.plot(vec(Xu), fill(0.0, length(Xu)).-5, linestyle="None", marker=6, markersize=12, color="black", label=L"inducing points $X_\mathbf{u}$") plt.legend(loc="upper center", fontsize="small") plt.ylim(-10, 10) end plot_approximation(gp_full, gp_SOR, "SoR") plt.title("Subset of Regressors") ; gp_DTC = GaussianProcesses.DTC(X, Xu, Y, MeanConst(mean(Y)), k, log(σy)); @time gp_DTC = GaussianProcesses.DTC(X, Xu, Y, MeanConst(mean(Y)), k, log(σy)); μDTCgp, ΣDTCgp = predict_f(gp_DTC, xx; full_cov=true) @time predict_f(gp_DTC, xx; full_cov=true); plot_approximation(gp_full, gp_DTC, "DTC") plt.title("Deterministic Training Conditionals") ; gp_FITC = GaussianProcesses.FITC(X, Xu, Y, MeanConst(mean(Y)), k, log(σy)); @time gp_FITC = GaussianProcesses.FITC(X, Xu, Y, MeanConst(mean(Y)), k, log(σy)); predict_f(gp_FITC, xx; full_cov=true) @time predict_f(gp_FITC, xx; full_cov=true); plot_approximation(gp_full, gp_FITC, "FITC") plt.title("Fully Independent Training Conditionals") ; inearest = [argmin(abs.(xi.-Xu[1,:])) for xi in x] blockindices = [findall(isequal(i), inearest) for i in 1:size(Xu,2)] GaussianProcesses.FSA(X, Xu, blockindices, Y, MeanConst(mean(Y)), k, log(σy)); @time gp_FSA = GaussianProcesses.FSA(X, Xu, blockindices, Y, MeanConst(mean(Y)), k, log(σy)); iprednearest = [argmin(abs.(xi.-Xu[1,:])) for xi in xx] blockindpred = [findall(isequal(i), iprednearest) for i in 1:size(Xu,2)] predict_f(gp_FSA, xx, blockindpred; full_cov=true) @time predict_f(gp_FSA, xx, blockindpred; full_cov=true); function plot_approximation(gp_exact, gp_approx, approx_label, blockindpred) μ_exact, Σ_exact = predict_f(gp_full, xx; full_cov=true) μapprox, Σapprox = predict_f(gp_approx, xx, blockindpred; full_cov=true) plt.plot(x,Y, ".", color="black", markeredgecolor="None", alpha=0.1, label="simulated data") plt.plot(xx, fstar.(xx), color=cbbPalette[2], label=L"f^\star(X)", linewidth=2) plt.plot(xx, μ_exact, color=cbbPalette[3], label=L"$f(x) \mid Y, \hat\theta$") plt.fill_between(xx, μ_exact.-sqrt.(diag(Σ_exact)), μ_exact.+sqrt.(diag(Σ_exact)), color=cbbPalette[3], alpha=0.3) plt.plot(xx, μapprox, color=cbbPalette[6], label=L"$f(x) \mid Y, \hat\theta$"*approx_label) y_err = sqrt.(diag(Σapprox)) plt.fill_between(xx, μapprox.-y_err, μapprox.+y_err, color=cbbPalette[6], alpha=0.5) plt.xlabel(L"X") plt.ylabel(L"Y") plt.plot(vec(Xu), fill(0.0, length(Xu)).-5, linestyle="None", marker=6, markersize=12, color="black", label=L"inducing points $X_\mathbf{u}$") plt.legend(loc="top left", fontsize="small") plt.ylim(-10, 10) end # from StatsBase.jl: midpoints(v::AbstractVector) = [Statistics.middle(v[i - 1], v[i]) for i in 2:length(v)] plt.axvline.(midpoints(vec(Xu)), alpha=0.9, color=cbbPalette[7], zorder=-1) plot_approximation(gp_full, gp_FSA, "FSA", blockindpred) ; gp_SOR = GaussianProcesses.SoR(X, Xu, Y, MeanConst(mean(Y)), k, log(σy)); gp_SOR.covstrat