# score function score(x,y) = sqrt(sum(log((1+x)./(1+y)).^2)/length(x)) # number of test samples N = 70943 # used for the public leaderboard n = int(0.3*N) # score best constant predictor const_benchmark = 0.486459 # best constant const_value = 0.209179 # all zeros prediction zeros_benchmark = 0.522226; first_moment = log(1+const_value) second_moment = zeros_benchmark; t = 3*second_moment/(2*first_moment) p = 2*first_moment/t # sample holdout values solution = exp(t*rand(n) .* float(rand(n) .< p))-1; # select coordinate from v1 if where v is 1 and from v2 where v is 0 combine(v,v1,v2) = v1 .* v + v2 .* (1-v) function boost(v1,v2,k,score) m = mean([score(v1),score(v2)]) A = rand(0:1,(length(v1),k)) # select columns of A that give better than mean score a = filter(i -> score(combine(A[:,i],v1,v2)) < m,[1:k]) # take majority vote over all selected columns v = float(A[:,a] * ones(length(a)) .> length(a)/2.0) return combine(v,v1,v2) end # our score function s(x) = round(score(solution,x),5) vals = [1,100,200,300,400,500,600,700] function expmt() v1 = solution + 1.15 * rand(n) v2 = solution + 1.15 * rand(n) return Float64[ s(boost(v1,v2,i,s)) for i in vals ] end reps = 10 S = zeros(reps,length(vals)) for i in 1:reps S[i,:] = expmt() end means = [mean(S[:,j]) for j in 1:length(vals)] stds = [std(S[:,j]) for j in 1:length(vals)]; using PyPlot plot(vals,means) means scores = Float64[] for i in 1:reps v1 = solution + 1.15 * rand(n) v2 = solution + 1.15 * rand(n) v21 = boost(v1,v2,500,s) v22 = boost(v1,v2,500,s) v31 = boost(v21,v22,500,s) v32 = boost(v21,v22,500,s) v4 = boost(v31,v32,500,s) push!(scores,s(v4)) end mean(scores)