using QuantEcon using Plots plotlyjs() # The maximum capacity maxcap = 30 # The number of states n = maxcap + 1 # The number of actions m = n # Constants a1, b1 = 14, 0.8 a2, b2 = 10, 0.4 # Reward functions function F(x::Number) return a1 * x^b1 end function G(x::Number) return a2 * x^b2 end # State transition probabilities probs = [0.1, 0.2, 0.4, 0.2, 0.1] supp_size = length(probs) # Discount factor delta = 0.9 # Reward array R = zeros(Float64, n, m) for s in 1:n for x in 1:m # Because julian index of array begins with 1 xv = x - 1 sv = s - 1 if x <= s r = F(xv) + G(sv - xv) else r = -Inf end R[s, x] = r end end # Transition probability array Q = zeros(Float64, n, m, n) for s in 1:n for x in 1:m # Guarding if x > s continue end for j in 1:supp_size Q[s, x, min(s - x + j, n)] += probs[j] end end end # Solve the dynamic optimization problem by policy iteration res = solve(DiscreteDP(R, Q, delta), PFI) # Number of iterations res.num_iter # Optimal policy res.sigma' # Optimal value function res.v' # Simulate the controlled Markov chain for num_rep times # and compute the average init = 1 nyrs = 50 ts_length = nyrs + 1 num_rep = 10^4 ave_path = zeros(ts_length) for i in 1:num_rep path = simulate(res.mc, ts_length, init = init) ave_path = (i / (i + 1)) * ave_path + (1 / (i + 1)) * path end ave_path' # Stationary distribution of the Markov chain stationary_dist = stationary_distributions(res.mc)[1] stationary_dist' # Plot sigma, v, ave_path, stationary_dist indices = 0:maxcap p1 = plot( scatter(indices, res.sigma .- 1; label="Irrigation"), title="Optimal Irrigation Policy", ylabel="Irrigation", xlabel="Water Level" ) p2 = plot( plot(indices, res.v; label="Value"), title="Optimal Value Function", ylabel="Value", xlabel="Water Level" ) p3 = plot( plot(ave_path .- 1; label="Water Level"), title="Average Optimal State Path", ylabel="Water Level", xlabel="Year" ) p4 = plot( bar(indices, stationary_dist; label="Probability"), title="Stationary Distribution", ylabel="Probability", xlabel="Water Level" ) plot(p1, p2, p3, p4) # Arrays of state and action indices S = 0:n-1 X = 0:m-1 s_indices = Int64[] a_indices = Int64[] S_left = reshape(S, n, 1) .- reshape(S, 1, n) for i in 1:n for j in 1:n if S_left[i, j] >= 0 push!(s_indices, i) push!(a_indices, j) end end end # Reward vector S_left_o = S_left S_left = Array{Int64}(length(a_indices)) for i in 1:length(a_indices) S_left[i] = S_left_o[s_indices[i], a_indices[i]] end R = F.(X[a_indices]) + G.(S_left) # Transition probability array L = length(S_left) Q = zeros(L, n) for i in 1:length(S_left) s_left = S_left[i] for j in 1:supp_size Q[i, min(s_left + j, n)] += probs[j] end end # Solve the dynamic optimization problem by policy iteration res = solve(DiscreteDP(R, Q, delta, s_indices, a_indices), PFI) # Number of iterations res.num_iter # Simulate the controlled Markov chain for num_rep times # and compute the average init = 1 nyrs = 50 ts_length = nyrs + 1 num_rep = 10 ^ 4 ave_path = zeros(ts_length) for i in 1:num_rep path = simulate(res.mc, ts_length, init = init) ave_path = (i / (i + 1)) * ave_path + (1 / (i + 1)) * path end ave_path # Stationary distribution of the Markov chain stationary_dist = stationary_distributions(res.mc)[1] # Plot sigma, v, ave_path, stationary_dist indices = 0:maxcap p1 = plot( scatter(indices, res.sigma .- 1; label="Irrigation"), title="Optimal Irrigation Policy", ylabel="Irrigation", xlabel="Water Level" ) p2 = plot( plot(indices, res.v; label="Value"), title="Optimal Value Function", ylabel="Value", xlabel="Water Level" ) p3 = plot( plot(ave_path .- 1; label="Water Level"), title="Average Optimal State Path", ylabel="Water Level", xlabel="Year" ) p4 = plot( bar(indices, stationary_dist; label="Probability"), title="Stationary Distribution", ylabel="Probability", xlabel="Water Level" ) plot(p1, p2, p3, p4)