using QuantEcon using DataFrames n = 2 # Number of states m = 2 # Number of actions # Reward array R = [5 10; -1 -Inf] # Transition probability array Q = Array{Float64}(n, m, n) Q[1, 1, :] = [0.5, 0.5] Q[1, 2, :] = [0, 1] Q[2, 1, :] = [0, 1] Q[2, 2, :] = [0.5, 0.5] # Arbitrary # Discount rate beta = 0.95 ddp = DiscreteDP(R, Q, beta); function sigma_star(beta) sigma = Vector{Int64}(2) sigma[2] = 1 if beta > 10/11 sigma[1] = 1 else sigma[1] = 2 end return sigma end function v_star(beta) v = Vector{Float64}(2) v[2] = -1 / (1 - beta) if beta > 10/11 v[1] = (5 - 5.5*beta) / ((1 - 0.5*beta) * (1 - beta)) else v[1] = (10 - 11*beta) / (1 - beta) end return v end; sigma_star(beta) v_star(beta) epsilon = 1e-2 v_init = [0., 0.] res_vi = solve(ddp, v_init, VFI, epsilon=epsilon); res_vi.num_iter res_vi.v maximum(abs, res_vi.v - v_star(beta)) < epsilon/2 res_vi.sigma num_reps = 164 values = Matrix{Float64}(num_reps, n) diffs = Vector{Float64}(num_reps) spans = Vector{Float64}(num_reps) v = [0, 0] values[1, :] = v diffs[1] = NaN spans[1] = NaN for i in 2:num_reps v_new = bellman_operator(ddp, v) values[i, :] = v_new diffs[i] = maximum(abs, v_new - v) spans[i] = maximum(v_new - v) - minimum(v_new - v) v = v_new end col_names = map(Symbol, ["i", "v^i(1)", "v^i(2)", "‖v^i - v^(i-1)‖", "span(v^i - v^(i-1))"]) df = DataFrame(Any[0:num_reps-1, values[:, 1], values[:, 2], diffs, spans], col_names) display_nums = [i+1 for i in 0:9] append!(display_nums, [10*i+1 for i in 1:16]) append!(display_nums, [160+i+1 for i in 1:3]) df[display_nums, [1, 2, 3, 4]] display_nums = [i+1 for i in 1:12] append!(display_nums, [10*i+1 for i in 2:6]) df[display_nums, [1, 4, 5]] epsilon * (1-beta) / beta spans[12] < epsilon * (1-beta) / beta epsilon = 1e-2 v_init = [0., 0.] k = 0 res_mpi_1 = solve(ddp, v_init, MPFI, epsilon=epsilon, k=k); res_mpi_1.num_iter res_mpi_1.v v_init = [0., 0.] res_pi = solve(ddp, v_init, PFI); res_pi.num_iter res_pi.v maximum(abs, res_pi.v - v_star(beta)) v = [0., 0.] sigma = [0, 0] # Dummy sigma_new = compute_greedy(ddp, v) i = 0 while true println("Iterate $i") println(" value: $v") println(" policy: $sigma_new") if all(sigma_new .== sigma) break end copy!(sigma, sigma_new) v = evaluate_policy(ddp, sigma) sigma_new = compute_greedy(ddp, v) i += 1 end println("Terminated") epsilon = 1e-2 v_init = [0., 0.] k = 6 res_mpi = solve(ddp, v_init, MPFI, epsilon=epsilon, k=k); res_mpi.num_iter res_mpi.v maximum(abs, res_mpi.v - v_star(beta)) < epsilon/2 # T_sigma operator function T_sigma{T<:Integer}(ddp::DiscreteDP, sigma::Array{T}) R_sigma, Q_sigma = RQ_sigma(ddp, sigma) return v -> R_sigma + ddp.beta * Q_sigma * v end; epsilon = 1e-2 v = [0, 0] k = 6 i = 0 println("Iterate $i") println(" v: $v") sigma = Vector{Int64}(n) u = Vector{Float64}(n) while true i += 1 bellman_operator!(ddp, v, u, sigma) # u and sigma are modified in place diff = u - v span = maximum(diff) - minimum(diff) println("Iterate $i") println(" sigma: $sigma") println(" T_sigma(v): $u") println(" span: $span") if span < epsilon * (1-ddp.beta) / ddp.beta v = u + ((maximum(diff) + minimum(diff)) / 2) * (ddp.beta / (1 - ddp.beta)) break end v = compute_fixed_point(T_sigma(ddp, sigma), u, err_tol=0, max_iter=k, verbose=false) #The above is equivalent to the following: #for j in 1:k # v = T_sigma(ddp, sigma)(u) # copy!(u, v) #end #copy!(v, u) println(" T_sigma^k+1(v): $v") end println("Terminated") println(" sigma: $sigma") println(" v: $v")