Shosei Yu
Faculty of Economics, University of Tokyo
From Miranda and Fackler, Applied Computational Economics and Finance, 2002, Section 7.6.5
Julia translation of the Python version
using QuantEcon
using Plots
plotlyjs()
Plotly javascript loaded.
To load again call
init_notebook(true)
Plots.PlotlyJSBackend()
# The maximum capacity
maxcap = 30
# The number of states
n = maxcap + 1
# The number of actions
m = n
# Constants
a1, b1 = 14, 0.8
a2, b2 = 10, 0.4
# Reward functions
function F(x::Number)
return a1 * x^b1
end
function G(x::Number)
return a2 * x^b2
end
# State transition probabilities
probs = [0.1, 0.2, 0.4, 0.2, 0.1]
supp_size = length(probs)
# Discount factor
delta = 0.9
# Reward array
R = zeros(Float64, n, m)
for s in 1:n
for x in 1:m
# Because julian index of array begins with 1
xv = x - 1
sv = s - 1
if x <= s
r = F(xv) + G(sv - xv)
else
r = -Inf
end
R[s, x] = r
end
end
# Transition probability array
Q = zeros(Float64, n, m, n)
for s in 1:n
for x in 1:m
# Guarding
if x > s
continue
end
for j in 1:supp_size
Q[s, x, min(s - x + j, n)] += probs[j]
end
end
end
# Solve the dynamic optimization problem by policy iteration
res = solve(DiscreteDP(R, Q, delta), PFI)
QuantEcon.DPSolveResult{QuantEcon.PFI,Float64}([338.416, 361.988, 377.426, 391.426, 405.236, 417.962, 429.861, 441.171, 452.067, 462.66 … 576.471, 585.14, 593.698, 602.157, 610.511, 618.805, 627.056, 635.21, 643.282, 651.278], [338.416, 361.988, 377.426, 391.426, 405.236, 417.962, 429.861, 441.171, 452.067, 462.66 … 576.471, 585.14, 593.698, 602.157, 610.511, 618.805, 627.056, 635.21, 643.282, 651.278], 4, [1, 1, 1, 2, 2, 2, 2, 2, 2, 2 … 5, 5, 5, 5, 5, 6, 6, 6, 6, 6], Discrete Markov Chain stochastic matrix of type Array{Float64,2}: [0.1 0.2 … 0.0 0.0; 0.0 0.1 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.1 0.0])
# Number of iterations
res.num_iter
# Optimal policy
res.sigma'
1×31 RowVector{Int64,Array{Int64,1}}: 1 1 1 2 2 2 2 2 2 2 3 3 3 … 4 4 5 5 5 5 5 6 6 6 6 6
# Optimal value function
res.v'
1×31 RowVector{Float64,Array{Float64,1}}: 338.416 361.988 377.426 391.426 … 627.056 635.21 643.282 651.278
# Simulate the controlled Markov chain for num_rep times
# and compute the average
init = 1
nyrs = 50
ts_length = nyrs + 1
num_rep = 10^4
ave_path = zeros(ts_length)
for i in 1:num_rep
path = simulate(res.mc, ts_length, init = init)
ave_path = (i / (i + 1)) * ave_path + (1 / (i + 1)) * path
end
ave_path'
1×51 RowVector{Float64,Array{Float64,1}}: 0.9999 3.004 4.69893 5.89881 … 13.5211 13.513 13.5308 13.5274
# Stationary distribution of the Markov chain
stationary_dist = stationary_distributions(res.mc)[1]
stationary_dist'
1×31 RowVector{Float64,Array{Float64,1}}: 0.0 0.0 1.15115e-7 1.03604e-6 … 0.0 0.0 0.0 0.0 0.0 0.0 0.0
# Plot sigma, v, ave_path, stationary_dist
indices = 0:maxcap
p1 = plot(
scatter(indices, res.sigma .- 1; label="Irrigation"),
title="Optimal Irrigation Policy",
ylabel="Irrigation",
xlabel="Water Level"
)
p2 = plot(
plot(indices, res.v; label="Value"),
title="Optimal Value Function",
ylabel="Value",
xlabel="Water Level"
)
p3 = plot(
plot(ave_path .- 1; label="Water Level"),
title="Average Optimal State Path",
ylabel="Water Level",
xlabel="Year"
)
p4 = plot(
bar(indices, stationary_dist; label="Probability"),
title="Stationary Distribution",
ylabel="Probability",
xlabel="Water Level"
)
plot(p1, p2, p3, p4)
# Arrays of state and action indices
S = 0:n-1
X = 0:m-1
s_indices = Int64[]
a_indices = Int64[]
S_left = reshape(S, n, 1) .- reshape(S, 1, n)
for i in 1:n
for j in 1:n
if S_left[i, j] >= 0
push!(s_indices, i)
push!(a_indices, j)
end
end
end
# Reward vector
S_left_o = S_left
S_left = Array{Int64}(length(a_indices))
for i in 1:length(a_indices)
S_left[i] = S_left_o[s_indices[i], a_indices[i]]
end
R = F.(X[a_indices]) + G.(S_left)
496-element Array{Float64,1}: 0.0 10.0 14.0 13.1951 24.0 24.3754 15.5185 27.1951 34.3754 33.7151 17.411 29.5185 37.5705 ⋮ 173.71 178.917 184.003 188.958 193.772 198.426 202.893 207.128 211.051 214.5 217.036 212.728
# Transition probability array
L = length(S_left)
Q = zeros(L, n)
for i in 1:length(S_left)
s_left = S_left[i]
for j in 1:supp_size
Q[i, min(s_left + j, n)] += probs[j]
end
end
# Solve the dynamic optimization problem by policy iteration
res = solve(DiscreteDP(R, Q, delta, s_indices, a_indices), PFI)
QuantEcon.DPSolveResult{QuantEcon.PFI,Float64}([338.416, 361.988, 377.426, 391.426, 405.236, 417.962, 429.861, 441.171, 452.067, 462.66 … 576.471, 585.14, 593.698, 602.157, 610.511, 618.805, 627.056, 635.21, 643.282, 651.278], [338.416, 361.988, 377.426, 391.426, 405.236, 417.962, 429.861, 441.171, 452.067, 462.66 … 576.471, 585.14, 593.698, 602.157, 610.511, 618.805, 627.056, 635.21, 643.282, 651.278], 4, [1, 1, 1, 2, 2, 2, 2, 2, 2, 2 … 5, 5, 5, 5, 5, 6, 6, 6, 6, 6], Discrete Markov Chain stochastic matrix of type Array{Float64,2}: [0.1 0.2 … 0.0 0.0; 0.0 0.1 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.1 0.0])
# Number of iterations
res.num_iter
# Simulate the controlled Markov chain for num_rep times
# and compute the average
init = 1
nyrs = 50
ts_length = nyrs + 1
num_rep = 10 ^ 4
ave_path = zeros(ts_length)
for i in 1:num_rep
path = simulate(res.mc, ts_length, init = init)
ave_path = (i / (i + 1)) * ave_path + (1 / (i + 1)) * path
end
ave_path
51-element Array{Float64,1}: 0.9999 3.0158 4.68993 5.88621 6.93361 7.94041 8.83802 9.59694 10.2172 10.718 11.1322 11.467 11.7598 ⋮ 13.4258 13.4576 13.4336 13.4571 13.454 13.4666 13.4685 13.4806 13.5008 13.5062 13.5035 13.5036
# Stationary distribution of the Markov chain
stationary_dist = stationary_distributions(res.mc)[1]
31-element Array{Float64,1}: 0.0 0.0 1.15115e-7 1.03604e-6 8.05805e-6 5.98598e-5 0.000444344 0.00329805 0.0244792 0.0608112 0.120882 0.139769 0.15025 ⋮ 0.000443927 5.70763e-5 6.34181e-6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
# Plot sigma, v, ave_path, stationary_dist
indices = 0:maxcap
p1 = plot(
scatter(indices, res.sigma .- 1; label="Irrigation"),
title="Optimal Irrigation Policy",
ylabel="Irrigation",
xlabel="Water Level"
)
p2 = plot(
plot(indices, res.v; label="Value"),
title="Optimal Value Function",
ylabel="Value",
xlabel="Water Level"
)
p3 = plot(
plot(ave_path .- 1; label="Water Level"),
title="Average Optimal State Path",
ylabel="Water Level",
xlabel="Year"
)
p4 = plot(
bar(indices, stationary_dist; label="Probability"),
title="Stationary Distribution",
ylabel="Probability",
xlabel="Water Level"
)
plot(p1, p2, p3, p4)