using GaussianProcesses, RDatasets import Distributions:Normal using Random Random.seed!(113355) crabs = dataset("MASS","crabs"); # load the data crabs = crabs[shuffle(1:size(crabs)[1]), :]; # shuffle the data train = crabs[1:div(end,2),:]; y = Array{Bool}(undef,size(train)[1]); # response y[train[:,:Sp].=="B"].=0; # convert characters to booleans y[train[:,:Sp].=="O"].=1; X = convert(Matrix,train[:,4:end]); # predictors #Select mean, kernel and likelihood function mZero = MeanZero(); # Zero mean function kern = Matern(3/2,zeros(5),0.0); # Matern 3/2 ARD kernel (note that hyperparameters are on the log scale) lik = BernLik(); # Bernoulli likelihood for binary data {0,1} gp = GP(X',y,mZero,kern,lik) # Fit the Gaussian process model set_priors!(gp.kernel,[Normal(0.0,2.0) for i in 1:6]) samples = mcmc(gp; nIter=10000, burn=1000, thin=10); test = crabs[div(end,2)+1:end,:]; # select test data yTest = Array{Bool}(undef,size(test)[1]); # test response data yTest[test[:,:Sp].=="B"].=0; # convert characters to booleans yTest[test[:,:Sp].=="O"].=1; xTest = convert(Matrix,test[:,4:end]); ymean = Array{Float64}(undef,size(samples,2),size(xTest,1)); for i in 1:size(samples,2) set_params!(gp,samples[:,i]) update_target!(gp) ymean[i,:] = predict_y(gp,xTest')[1] end using Plots gr() plot(ymean',leg=false,html_output_format=:png) scatter!(yTest)