#!/usr/bin/env python
# coding: utf-8

# In[ ]:


# In[6]:


import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import _lib.pr_func as pr
from _lib.utility import *
from scipy.interpolate import InterpolatedUnivariateSpline
get_ipython().run_line_magic('config', "InlineBackend.figure_format = 'retina'")
sns.set_style("whitegrid")
#import sys
#sys.path.append('..')


# # The Legendre Transform and Rate Distortion
# ## _When is it true, that dU/dI = 1/beta?_

# ### Legendre Transform
# The Legendre transform of a function $f:x\mapsto f(x)$ is given by a function $g:u\mapsto g(u)$, such that $\pm\frac{dg}{du}(u) = x(u)$. $g$ can be obtained by treating $x$ as a function of $u$ and defining
# $$
# u := \frac{df}{dx}, \quad g(u) := \pm (u\, x(u) - f(x(u)))
# $$
# since then automatically $\pm \frac{dg}{du} = x(u) + u \frac{dx}{du} - \frac{df}{dx} \frac{dx}{du} = x(u)$. Similarly, if $g$ is given, then $f$ can be reconstructed by 
# $$
# x := \pm \frac{dg}{du}, \quad f(x) := x \, u(x) \mp g(u(x)) \, .
# $$
# 
# ### Free Energy principle for one-step decision-making with fixed prior
# The Legendre transform can be applied to single-task decision-making with utility function $U$ and fixed prior $p_0$. If we consider the equilibrium Free Energy as a function of $\tau := \frac{1}{\beta}$,
# $$
# F(\tau) = \tau \log Z_\tau = \tau \log \sum_a p_0(a) e^{U(a)/\tau} \, ,
# $$
# then $F$ is the Legendre transform of the expected utility $\bar U := \mathbb E[U]$ as a function of $D_{KL}(p(a)\|p_0(a))$. This can be seen by explicitly differentiating $F$: We have
# 
# $$
# \frac{dF}{d\tau} = \log Z_\tau + \frac{\tau}{Z_\tau} \sum_a p_0(a) e^{U(a)/\tau} \frac{-U(a)}{\tau^2} = \log Z_\tau -\frac{\bar U}{\tau} = \frac{1}{\tau} ( F(\tau) - \bar U) , 
# $$
# and therefore,
# $$
# I := - \frac{dF}{d\tau} =  \frac{1}{\tau} (\bar U - F(\tau)) = \frac{\bar U}{\tau} - \log Z_\tau = D_{KL}(p(a)\|p_0(a)) \, .
# $$
# and
# $$
# f(I) = \tau I + F(\tau) = \bar U
# $$
# Hence $F:\tau \mapsto F(\tau)$ is the Legendre transform of $\bar U: I\mapsto \bar U(I)$ and it follows that $\frac{d\bar U}{dI} = \tau = \frac{1}{\beta}$.
# 
# __Note:___ The same calculation works for multi-task decision-making, i.e. when $U$ is a function of $w$ and $a$. Here, the equilibrium Free Energy is given by 
# $$
# \frac{1}{\beta} \sum_w \rho(w) \log Z(w) . 
# $$

# ### Rate distortion
# In the limit case of a perfectly optimal prior, $p_0$ is replaced by $p(a) = \sum_w p_\beta(a|w) \rho(w)$, in particular, there might be a different prior for each $\beta$. The above derivation cannot be followed since there is no closed from expression for the equilibrium Free Energy in terms of $\beta$ anymore. In particular, by taking the derivative of $F$ with respect to $\tau$ as in the derivation above, the $\tau$-dependency of the prior would be neglected.

# ## Simulations

# In[7]:


def RD(U,pw,beta):
    pa = pr.func(vars=['a'], val='unif').normalize()
    test = 0
    for i in range(0,1000):
        pagw = (pa*pr.exp(beta*U)).normalize(['a'])
        pa = pr.sum(['w'],pagw*pw)
        if np.linalg.norm(test-pagw.val)<1e-10:
            break
        test= pagw.val
    EU = pr.sum(U*pagw*pw)
    IWA = pr.sum(pw*pagw*pr.log(pagw/pa))
    return EU.val, IWA.val

def nonRD(U,pw,beta):    
    pa = pr.func(vars=['a'], val='unif').normalize()
    pagw = (pa*pr.exp(beta*U)).normalize(['a'])
    EU = pr.sum(U*pagw*pw)
    DKL = pr.sum(pw*pagw*pr.log(pagw/pa))
    return EU.val, DKL.val


# In[38]:


# setup
N,K = 20,20
pr.set_dims([('w',N),('a',K)])
U = pr.func(val=utility(N,K,6.0),vars=['w','a'])
# U = pr.func(val=np.eye(N),vars=['w','a'])
pw = pr.func(vars=['w'],val='unif').normalize()

# RD curve
EUs = []
Is = []
betas = np.linspace(0.3,5,1000)

for beta in betas:
    EU,I = RD(U,pw,beta) 
    EUs.append(EU)
    Is.append(I)
    
# nonRD curve
EUs_non = []
Is_non = []

for beta in betas:
    EU,I = nonRD(U,pw,beta) 
    EUs_non.append(EU)
    Is_non.append(I)

with plt.rc_context({"figure.figsize": (16,8)}):
    plt.title("Rate distortion curves")
    plt.plot(Is,EUs,label="optimal prior")
    plt.plot(Is_non,EUs_non, label="non-optimal prior")
    plt.legend()
    plt.show()


# In[60]:


# calc derivative of EU wrt I
f_non = InterpolatedUnivariateSpline(Is_non, EUs_non, k=5)
dfdx_non = f_non.derivative()
dUdx_non = dfdx_non(Is_non)
# compare against 1/beta
with plt.rc_context({"figure.figsize": (16,8)}):
    plt.title("Comparison of ${dU}/{dI}$ and ${1}/{\\beta}$ -- non-optimal prior")
    plt.plot(Is_non,1.0/betas,label="1/beta",linestyle="-",color="xkcd:silver")
    plt.plot(Is_non,dUdx_non,label="dU/dI",linestyle=":",color="xkcd:denim blue")
    plt.legend()
    plt.show()
    print "||dU/dI - 1/beta|| = {}".format(np.linalg.norm(dUdx_non-1.0/betas))  


# In[61]:


# calc derivative of EU wrt I
f = InterpolatedUnivariateSpline(Is, EUs, k=5)
dfdx = f.derivative()
dUdx = dfdx(Is)
# compare against 1/beta
with plt.rc_context({"figure.figsize": (16,8)}):
    plt.title("Comparison of ${dU}/{dI}$ and ${1}/{\\beta}$ -- optimal prior")
    plt.plot(Is,1.0/betas,label="1/beta",linestyle="-",color="xkcd:silver")
    plt.plot(Is,dUdx,label="dU/dI",linestyle=":",color="xkcd:denim blue")
    plt.legend()
    plt.show()
    print "||dU/dI - 1/beta|| = {}".format(np.linalg.norm(dUdx-1.0/betas))  


# In[65]:


# zommed in
with plt.rc_context({"figure.figsize": (16,8)}):
    plt.title("Comparison of ${dU}/{dI}$ and ${1}/{\\beta}$ -- optimal prior, zoomed in")
    plt.scatter(Is,1.0/betas,label="1/beta",marker='.',color="xkcd:silver")
    plt.scatter(Is,dUdx,label="dU/dI",marker='.',color="xkcd:denim blue")
    plt.xlim([0,0.01])
    plt.legend()
    plt.show()


# In[ ]: