using AugmentedGaussianProcesses
using Distributions
using Plots
We create a toy dataset with X ∈ [-20, 20] and y = 5 * sinc(X)
N = 1000
X = reshape((sort(rand(N)) .- 0.5) * 40.0, N, 1)
σ = 0.01
function latent(x)
return 5.0 * sinc.(x)
end
Y = vec(latent(X) + σ * randn(N));
Visualization of the data :
scatter(X, Y; lab="")
In this first example we are going to look at the effect of using inducing points compared to the true Gaussian Process For simplicity we will keep all inducing points and kernel parameters fixed
Ms = [4, 8, 16, 32, 64];
Create an empty array of GPs
models = Vector{AbstractGPModel}(undef, length(Ms) + 1);
Chose a kernel
kernel = SqExponentialKernel();# + PeriodicKernel()
And Run sparse classification with an increasing number of inducing points
for (index, num_inducing) in enumerate(Ms)
@info "Training with $(num_inducing) points"
m = SVGP(
kernel, # Kernel
GaussianLikelihood(σ), # Likelihood used
AnalyticVI(), # Inference usede to solve the problem
inducingpoints(KmeansAlg(num_inducing), X); # Inducing points initialized with kmeans
optimiser=false, # Keep kernel parameters fixed
Zoptimiser=false, # Keep inducing points locations fixed
)
@time train!(m, X, Y, 100) # Train the model for 100 iterations
models[index] = m # Save the model in the array
end
[ Info: Training with 4 points 2.281944 seconds (4.92 M allocations: 277.567 MiB, 2.85% gc time, 99.28% compilation time) [ Info: Training with 8 points 0.009826 seconds (13.68 k allocations: 25.571 MiB) [ Info: Training with 16 points 0.018545 seconds (13.68 k allocations: 46.160 MiB) [ Info: Training with 32 points 0.059243 seconds (13.68 k allocations: 90.182 MiB) [ Info: Training with 64 points 0.148940 seconds (14.69 k allocations: 189.940 MiB, 26.43% gc time)
Train the model without any inducing points (no approximation)
@info "Training with full model"
mfull = GP(
X,
Y,
kernel;
noise=σ,
opt_noise=false, # Keep the noise value fixed
optimiser=false, # Keep kernel parameters fixed
)
@time train!(mfull, 5);
models[end] = mfull;
[ Info: Training with full model 0.939268 seconds (249.32 k allocations: 380.320 MiB, 7.50% gc time, 10.78% compilation time)
Create a grid and compute prediction on it
function compute_grid(model, n_grid=50)
mins = -20
maxs = 20
x_grid = range(mins, maxs; length=n_grid) # Create a grid
y_grid, sig_y_grid = proba_y(model, reshape(x_grid, :, 1)) # Predict the mean and variance on the grid
return y_grid, sig_y_grid, x_grid
end;
Plot the data as a scatter plot
function plotdata(X, Y)
return Plots.scatter(X, Y; alpha=0.33, msw=0.0, lab="", size=(300, 500))
end
function plot_model(model, X, Y, title=nothing)
n_grid = 100
y_grid, sig_y_grid, x_grid = compute_grid(model, n_grid)
title = if isnothing(title)
(model isa SVGP ? "M = $(dim(model[1]))" : "full")
else
title
end
p = plotdata(X, Y)
Plots.plot!(
p,
x_grid,
y_grid;
ribbon=2 * sqrt.(sig_y_grid), # Plot 2 std deviations
title=title,
color="red",
lab="",
linewidth=3.0,
)
if model isa SVGP # Plot the inducing points as well
Plots.plot!(
p,
vec(model.f[1].Z),
zeros(dim(model.f[1]));
msize=2.0,
color="black",
t=:scatter,
lab="",
)
end
return p
end;
Plots.plot(
plot_model.(models, Ref(X), Ref(Y))...; layout=(1, length(models)), size=(1000, 200)
) # Plot all models and combine the plots
We now look at using another noise than Gaussian noise. In AGP.jl you can use the Student-T likelihood, the Laplace likelihood and the Heteroscedastic likelihood
We will use the same toy dataset for our experiment
Create an array of model with different likelihoods:
likelihoods = [
StudentTLikelihood(3.0), LaplaceLikelihood(3.0), HeteroscedasticLikelihood(1.0)
]
ngmodels = Vector{AbstractGPModel}(undef, length(likelihoods) + 1)
for (i, l) in enumerate(likelihoods)
@info "Training with the $(l)" # We need to use VGP
m = VGP(
X,
Y, # First arguments are the input and output
kernel, # Kernel
l, # Likelihood used
AnalyticVI(); # Inference usede to solve the problem
optimiser=false, # Keep kernel parameters fixed
)
@time train!(m, 10) # Train the model for 100 iterations
ngmodels[i] = m # Save the model in the array
end
ngmodels[end] = models[end] # Add the Gaussian model
[ Info: Training with the Student-t likelihood (ν=3.0, σ=1.0) 2.309321 seconds (3.13 M allocations: 770.705 MiB, 6.15% gc time, 59.78% compilation time) [ Info: Training with the Laplace likelihood (β=3.0) 1.513189 seconds (1.26 M allocations: 672.434 MiB, 3.20% gc time, 46.17% compilation time) [ Info: Training with the Gaussian likelihood with heteroscedastic noise 3.318307 seconds (4.08 M allocations: 1.397 GiB, 3.92% gc time, 51.40% compilation time)
Gaussian Process with a Gaussian likelihood (σ² = 0.01) infered by Analytic Inference
We can now repeat the prediction from before :
Plots.plot(
plot_model.(
ngmodels, Ref(X), Ref(Y), ["Student-T", "Laplace", "Heteroscedastic", "Gaussian"]
)...;
layout=(2, 2),
size=(1000, 200),
) # Plot all models and combine the plots
This notebook was generated using Literate.jl.