# coding: utf-8 # # DiscreteDP Example: Water Management # **Daisuke Oyama** # # *Faculty of Economics, University of Tokyo* # From Miranda and Fackler, Applied Computational Economics and Finance, 2002, # Section 7.6.5 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: import itertools import numpy as np from scipy import sparse import matplotlib.pyplot as plt from quantecon.markov import DiscreteDP # In[3]: maxcap = 30 n = maxcap + 1 # Number of states m = n # Number of actions a1, b1 = 14, 0.8 a2, b2 = 10, 0.4 F = lambda x: a1 * x**b1 # Benefit from irrigation U = lambda c: a2 * c**b2 # Benefit from recreational consumption c = s - x probs = [0.1, 0.2, 0.4, 0.2, 0.1] supp_size = len(probs) beta = 0.9 # ## Product formulation # In[4]: # Reward array R = np.empty((n, m)) for s, x in itertools.product(range(n), range(m)): R[s, x] = F(x) + U(s-x) if x <= s else -np.inf # In[5]: # Transition probability array Q = np.zeros((n, m, n)) for s, x in itertools.product(range(n), range(m)): if x <= s: for j in range(supp_size): Q[s, x, np.minimum(s-x+j, n-1)] += probs[j] # In[6]: # Create a DiscreteDP ddp = DiscreteDP(R, Q, beta) # In[7]: # Solve the dynamic optimization problem (by policy iteration) res = ddp.solve() # In[8]: # Number of iterations res.num_iter # In[9]: # Optimal policy res.sigma # In[10]: # Optimal value function res.v # In[11]: # Simulate the controlled Markov chain for num_rep times # and compute the average init = 0 nyrs = 50 ts_length = nyrs + 1 num_rep = 10**4 ave_path = np.zeros(ts_length) for i in range(num_rep): path = res.mc.simulate(ts_length, init=init) ave_path = (i/(i+1)) * ave_path + (1/(i+1)) * path # In[12]: ave_path # In[13]: # Stationary distribution of the Markov chain stationary_dist = res.mc.stationary_distributions[0] # In[14]: stationary_dist # In[15]: # Plot sigma, v, ave_path, stationary_dist hspace = 0.3 fig, axes = plt.subplots(2, 2, figsize=(12, 8+hspace)) fig.subplots_adjust(hspace=hspace) axes[0, 0].plot(res.sigma, '*') axes[0, 0].set_xlim(-1, 31) axes[0, 0].set_ylim(-0.5, 5.5) axes[0, 0].set_xlabel('Water Level') axes[0, 0].set_ylabel('Irrigation') axes[0, 0].set_title('Optimal Irrigation Policy') axes[0, 1].plot(res.v) axes[0, 1].set_xlim(0, 30) y_lb, y_ub = 300, 700 axes[0, 1].set_ylim(y_lb, y_ub) axes[0, 1].set_yticks(np.linspace(y_lb, y_ub, 5, endpoint=True)) axes[0, 1].set_xlabel('Water Level') axes[0, 1].set_ylabel('Value') axes[0, 1].set_title('Optimal Value Function') axes[1, 0].plot(ave_path) axes[1, 0].set_xlim(0, nyrs) y_lb, y_ub = 0, 15 axes[1, 0].set_ylim(y_lb, y_ub) axes[1, 0].set_yticks(np.linspace(y_lb, y_ub, 4, endpoint=True)) axes[1, 0].set_xlabel('Year') axes[1, 0].set_ylabel('Water Level') axes[1, 0].set_title('Average Optimal State Path') axes[1, 1].bar(range(n), stationary_dist, align='center') axes[1, 1].set_xlim(-1, n) y_lb, y_ub = 0, 0.15 axes[1, 1].set_ylim(y_lb, y_ub+0.01) axes[1, 1].set_yticks(np.linspace(y_lb, y_ub, 4, endpoint=True)) axes[1, 1].set_xlabel('Water Level') axes[1, 1].set_ylabel('Probability') axes[1, 1].set_title('Stationary Distribution') plt.show() # ## State-action pairs formulation # In[16]: # Arrays of state and action indices S = np.arange(n) X = np.arange(m) S_left = S.reshape(n, 1) - X.reshape(1, n) s_indices, a_indices = np.where(S_left >= 0) # In[17]: # Reward vector S_left = S_left[s_indices, a_indices] R = F(X[a_indices]) + U(S_left) # In[18]: # Transition probability array L = len(S_left) Q = sparse.lil_matrix((L, n)) for i, s_left in enumerate(S_left): for j in range(supp_size): Q[i, np.minimum(s_left+j, n-1)] += probs[j] # In[19]: # Create a DiscreteDP ddp = DiscreteDP(R, Q, beta, s_indices, a_indices) # In[20]: # Solve the dynamic optimization problem (by policy iteration) res = ddp.solve() # In[21]: # Number of iterations res.num_iter # In[22]: # Simulate the controlled Markov chain for num_rep times # and compute the average init = 0 nyrs = 50 ts_length = nyrs + 1 num_rep = 10**4 ave_path = np.zeros(ts_length) for i in range(num_rep): path = res.mc.simulate(ts_length, init=init) ave_path = (i/(i+1)) * ave_path + (1/(i+1)) * path # In[23]: # Stationary distribution of the Markov chain stationary_dist = res.mc.stationary_distributions[0] # In[24]: # Plot sigma, v, ave_path, stationary_dist hspace = 0.3 fig, axes = plt.subplots(2, 2, figsize=(12, 8+hspace)) fig.subplots_adjust(hspace=hspace) axes[0, 0].plot(res.sigma, '*') axes[0, 0].set_xlim(-1, 31) axes[0, 0].set_ylim(-0.5, 5.5) axes[0, 0].set_xlabel('Water Level') axes[0, 0].set_ylabel('Irrigation') axes[0, 0].set_title('Optimal Irrigation Policy') axes[0, 1].plot(res.v) axes[0, 1].set_xlim(0, 30) y_lb, y_ub = 300, 700 axes[0, 1].set_ylim(y_lb, y_ub) axes[0, 1].set_yticks(np.linspace(y_lb, y_ub, 5, endpoint=True)) axes[0, 1].set_xlabel('Water Level') axes[0, 1].set_ylabel('Value') axes[0, 1].set_title('Optimal Value Function') axes[1, 0].plot(ave_path) axes[1, 0].set_xlim(0, nyrs) y_lb, y_ub = 0, 15 axes[1, 0].set_ylim(y_lb, y_ub) axes[1, 0].set_yticks(np.linspace(y_lb, y_ub, 4, endpoint=True)) axes[1, 0].set_xlabel('Year') axes[1, 0].set_ylabel('Water Level') axes[1, 0].set_title('Average Optimal State Path') axes[1, 1].bar(range(n), stationary_dist, align='center') axes[1, 1].set_xlim(-1, n) y_lb, y_ub = 0, 0.15 axes[1, 1].set_ylim(y_lb, y_ub+0.01) axes[1, 1].set_yticks(np.linspace(y_lb, y_ub, 4, endpoint=True)) axes[1, 1].set_xlabel('Water Level') axes[1, 1].set_ylabel('Probability') axes[1, 1].set_title('Stationary Distribution') plt.show()