# After installing and starting Julia run the following to install the required packages:
# Pkg.init(); Pkg.update()
# for p in ("CUDAdrv","IJulia","Knet"); Pkg.add(p); end
# Pkg.checkout("Knet","ilkarman") # make sure we have the right Knet version
# Pkg.build("Knet")
using Knet
True=true # so we can read the python params
include("common/params.py");
println("OS: ", Sys.KERNEL)
println("Julia: ", VERSION)
println("Knet: ", Pkg.installed("Knet"))
println("GPU: ", readstring(`nvidia-smi --query-gpu=name --format=csv,noheader`))
OS: Linux Julia: 0.6.1 Knet: 0.8.5+ GPU: Tesla K80
# define model
function initmodel(; atype=KnetArray, dtype=Float32, winit=xavier, binit=zeros)
w(dims...)=atype(winit(dtype,dims...))
b(dims...)=atype(binit(dtype,dims...))
return Any[
w(3,3,3,50), b(1,1,50,1),
w(3,3,50,50), b(1,1,50,1),
w(3,3,50,100), b(1,1,100,1),
w(3,3,100,100), b(1,1,100,1),
w(512,6400), b(512,1),
w(10,512), b(10,1)
]
end;
# define loss and its gradient
function predict(w,x)
convbias(x,w,b) = conv4(w,x;padding=1) .+ b
fc(x,w,b) = w * mat(x) .+ b;
x = relu.(convbias(x,w[1],w[2]))
x = relu.(pool(convbias(x,w[3],w[4])))
x = dropout(x,0.25)
x = relu.(convbias(x,w[5],w[6]))
x = relu.(pool(convbias(x,w[7],w[8])))
x = dropout(x,0.25)
x = relu.(fc(x,w[9],w[10]))
x = dropout(x,0.5)
return fc(x,w[11],w[12])
end
loss(w,x,y)=nll(predict(w,x),y) # nll: negative log likelihood
lossgradient = grad(loss);
# load data
include(Knet.dir("data","cifar.jl"))
@time (xtrn,ytrn,xtst,ytst,lbls)=cifar10()
for d in (xtrn,ytrn,xtst,ytst); println(summary(d)); end
INFO: Reading cifar-10-binary.tar.gz...
3.525842 seconds (1.27 M allocations: 1.783 GiB, 18.79% gc time) 32×32×3×50000 Array{Float32,4} 50000-element Array{UInt8,1} 32×32×3×10000 Array{Float32,4} 10000-element Array{UInt8,1}
# prepare for training
model = optim = nothing; knetgc() # Clear memory from last run
model = initmodel()
optim = optimizers(model, Momentum; lr=LR, gamma=MOMENTUM);
# cold start
@time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true,xtype=KnetArray)
grads = lossgradient(model, x, y)
update!(model, grads, optim)
end
25.437272 seconds (4.03 M allocations: 784.659 MiB, 11.86% gc time)
# prepare for training
model = optim = nothing; knetgc() # Clear memory from last run
model = initmodel()
optim = optimizers(model, Momentum; lr=LR, gamma=MOMENTUM);
# 159s
info("Training...")
@time for epoch in 1:EPOCHS
@time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true,xtype=KnetArray)
grads = lossgradient(model, x, y)
update!(model, grads, optim)
end
end
INFO: Training...
15.909965 seconds (1.88 M allocations: 670.408 MiB, 0.41% gc time) 15.772903 seconds (1.88 M allocations: 670.285 MiB, 0.42% gc time) 15.829510 seconds (1.88 M allocations: 670.285 MiB, 0.44% gc time) 15.961176 seconds (1.88 M allocations: 670.285 MiB, 0.43% gc time) 15.869710 seconds (1.88 M allocations: 670.285 MiB, 0.46% gc time) 15.872871 seconds (1.88 M allocations: 670.353 MiB, 0.46% gc time) 15.839494 seconds (1.88 M allocations: 670.285 MiB, 0.42% gc time) 16.007868 seconds (1.88 M allocations: 670.285 MiB, 0.47% gc time) 15.859198 seconds (1.88 M allocations: 670.285 MiB, 0.47% gc time) 15.870192 seconds (1.88 M allocations: 670.285 MiB, 0.43% gc time) 158.797837 seconds (18.77 M allocations: 6.547 GiB, 0.44% gc time)
# test accuracy 77.54
testdata = minibatch(xtst,ytst,BATCHSIZE;xtype=KnetArray)
@time accuracy(model,testdata,predict)
2.123045 seconds (559.28 k allocations: 145.928 MiB, 1.10% gc time)
0.7754407051282052