#!/usr/bin/env python
# coding: utf-8

# # Policy Iteration
# 
# Stachurski (2008) Chapter 10

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')


# In[2]:


import numpy as np
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt


# ## ラムゼーモデル
# 
# 再掲

# In[3]:


class Ramsey:
    """One-sector Ramsey model"""

    def __init__(self, A, α, ρ):
        self.A, self.α, self.ρ = A, α, ρ

    def f(self, x):
        """Production function"""
        return self.A * x ** self.α

    def U(self, x):
        """Utility from consumption"""
        return np.log(x)
    
    def u(self, x, y):
        """Reduced form utility"""
        return self.U(self.f(x) - y)

    def is_feasible(self, x, y):
        """Checks feasibility"""
        return self.f(x) >= y


# ## 解析解
# 
# 再掲

# In[4]:


def analytic_solutions(A, α, ρ):
    c0 = (1.0 / (1.0 - α * ρ) / (1.0 - ρ) * np.log(A) +
          α * ρ * np.log(α * ρ) / (1.0 - α * ρ) / (1.0 - ρ) +
          np.log(1.0 - α * ρ) / (1.0 - ρ))
    c1 = α / (1.0 - α * ρ) 
    
    def value_func(x):
        return c0 + c1 * np.log(x)
    
    def policy_func(x):
        return ρ * c1 * A * x**α / (1.0 + ρ * c1)  ## Fixed this line
        
    return value_func, policy_func


# ## 線形近似
# 
# 前回より実用的に ... 

# In[5]:


class PLApprox:  # Refactored this class
    def __init__(self, a, b, N, upsample=10):
        self.a, self.b, self.N = a, b, N
        self.grids = np.linspace(a, b, upsample*N)
        self.centers = np.linspace(a, b, N)
    
    def proj(self, f):
        return np.array([f(c) for c in self.centers])
    
    def inj(self, a):
        return interp1d(self.centers, a)


# # Policy Iteration

# 任意の $x \in X$, に対して $(x, f(x)) \in \mathbb{D}$ を満たす関数 $f$ を policy function という. 
# 
# Policy function $f$ と十分大きな $T$ について, その policy function の value
# 
# $$
#     v_f(x) = \sum_{t=0}^\infty \rho^t u(f^t(x), f^{t+1}(x)) 
#     \approx \sum_{t=0}^T \rho^t u(f^t(x), f^{t+1}(x))
# $$

# In[6]:


def value_of_policy(f, model, apx, T):
    fc = apx.inj(f)
    X = apx.centers
    Y = fc(X)
    vf = np.zeros_like(X)
    for i in range(T):
        vf += model.ρ**i * model.u(X, Y)
        X, Y = Y, fc(Y)
    return vf


# Policy の更新則: $f\to v_f \to \hat f$ 
# 
# $$
#   \hat f (x) = \arg\max_y u(x, y) + \rho v_f(y)
# $$

# In[7]:


def greedy(vf, model, apx):
    vc = apx.inj(vf)
    Y = apx.grids
    fhat = np.empty_like(vf)
    for i, x in enumerate(apx.centers):
        X = np.ones_like(Y) * x
        with np.errstate(invalid='ignore'):
            maximand = np.where(model.is_feasible(X, Y),
                                model.u(X, Y) + model.ρ*vc(Y),
                                -np.inf)
        fhat[i] = Y[np.argmax(maximand)]
    return fhat


# ### Policy Iteration Algorithm
# 
# \begin{align}
# \begin{array}{cccccc}
# f_0        &          & f_1        &          & f_2        & \cdots & \to & f^* \\
# \downarrow & \nearrow & \downarrow & \nearrow & \downarrow &        &     &     \\
# v_0        &          & v_1        &          & v_2        & \cdots & \to & v^* \\
# \end{array}
# \end{align}

# いつものモデルで実験

# In[8]:


A, α, ρ =1.1, 0.4, 0.9
ramsey = Ramsey(A, α, ρ)
apx = PLApprox(a=1e-3, b=5.0, N=500, upsample=100)


# In[9]:


df = apx.proj(lambda x: 0.5 * ramsey.f(x))

fhat = df
for _ in range(10):
    vf = value_of_policy(fhat, ramsey, apx, T=100)
    fhat = greedy(vf, ramsey, apx)

for _ in range(3):
    plt.plot(apx.centers, vf)
    vf = value_of_policy(fhat, ramsey, apx, T=100)
    fhat = greedy(vf, ramsey, apx)
    
v, h = analytic_solutions(A, α, ρ)
plt.plot(apx.grids, v(apx.grids), ':')


# In[10]:


plt.plot(apx.centers, fhat)
plt.plot(apx.centers, h(apx.centers))


# ## なぜ収束するのか？
# 
# $$
# f_{n+1}(x) = \arg\max_y u(x, y) + \rho v_n(y)
# $$
# 
# に対応する作用素
# 
# $$
#   T_{n+1} w (x) = u(x, f_{n+1}(x)) + \rho w(f_{n+1}(x))
# $$
# 
# を定義. 
# 
# 3つの性質に注意:
# 
# $$
#  T_{n+1} v_n = v_{n+1}
# $$
# 
# $$
#  T_{n+1} v_n = Tv_n
# $$
# 
# $$
#   v \le w \Longrightarrow T_{n+1}v \le T_{n+1}w
# $$

# ### 価値の単調性
# 
# \begin{align}
# v_n(x) &= u(x, f_n(x)) + \rho v_n(f_n(x)) \\
#        &\le u(x, f_{n+1}(x)) + \rho v_n(f_{n+1}(x)) \\
#        &=: T_{n+1} v_n(x) \\
#        &\le T_{n+1}^2 v_n(x) \\
#        &\le \cdots \\
#        &\to v_{n+1}(x)
# \end{align}

# ### Bellman作用素 $T$ との関係
# 
# \begin{align}
#     T^n v_0 \le v_n \le v^*
# \end{align}
# 
# #### 証明
# 
# $n=0$
# のときは自明に成り立つ. 
# $n$ まで成り立つとして, 
# 
# \begin{align}
#     T^{n+1} v_0 = T(T^n v_0) \le Tv_n = T_{n+1} v_n = v_{n+1}
# \end{align}