#!/usr/bin/env python
# coding: utf-8

# # Neal's funnel
# 
# This notebook introduces a toy distribution introduced by Radford Neal and is the $d+1$ dimensional,
# 
# $p(\boldsymbol{x},\nu) = \left[\prod_{i=1}^{d} \mathcal{N}(x_i|0,e^{\nu / 2})\right] \mathcal{N}(\nu|0,3),$
# 
# which has shown to cause problems for samplers owing to its "funnel" shaped geometry in the marginals $(x_i,\nu)$,
# 
# $p(x_i,\nu) = \mathcal{N}(x_i|0,e^{\nu / 2})\mathcal{N}(\nu|0,3),$
# 
# which we now plot.

# In[1]:


import pints
import pints.toy
import numpy as np
import matplotlib.pyplot as plt

# Create log pdf
log_pdf = pints.toy.NealsFunnelLogPDF()

# Plot marginal density
levels = np.linspace(-7, -1, 20)
x = np.linspace(-10, 10, 100)
y = np.linspace(-10, 10, 100)
X, Y = np.meshgrid(x, y)
Z = [[log_pdf.marginal_log_pdf(i, j) for i in x] for j in y]
plt.contour(X, Y, Z, levels = levels)
plt.xlabel('x_i')
plt.ylabel('nu')
plt.show()


# We can also sample independently from this toy LogPDF, and add that to the visualisation:

# In[2]:


direct = log_pdf.sample(1500)

plt.contour(X, Y, Z, levels=levels, colors='k', alpha=0.2)
plt.scatter(direct[:, 0], direct[:, 9], alpha=0.2)
plt.xlim(-10, 10)
plt.ylim(-10, 10)
plt.show()


# We now try to sample from the distribution with MCMC:

# In[3]:


# Create an adaptive covariance MCMC routine
x0 = np.random.uniform(-25, 25, size=(3, 10))
mcmc = pints.MCMCController(log_pdf, 3, x0, method=pints.AdaptiveCovarianceMCMC)

# Stop after 10000 iterations
mcmc.set_max_iterations(3000)

# Disable logging
mcmc.set_log_to_screen(False)

# Run!
print('Running...')
chains = mcmc.run()
print('Done!')

# Discard warm-up
chains = [chain[1000:] for chain in chains]


# The adaptive covariance fails to get into the funnel region.

# In[4]:


stacked = np.vstack(chains)
plt.contour(X, Y, Z, levels=levels, colors='k', alpha=0.2)
plt.scatter(stacked[:, 0], stacked[:, 9], alpha=0.2)
plt.xlim(-10, 10)
plt.ylim(-10, 10)
plt.show()


# Now check how close the result is to the expected result, using the [Kullback-Leibler divergence](https://en.wikipedia.org/wiki/Kullback–Leibler_divergence), and compare this to the result from sampling directly.

# In[5]:


print(log_pdf.kl_divergence(stacked))
print(log_pdf.kl_divergence(direct))


# Hamiltonian Monte Carlo fares much better on this curved density.

# In[28]:


# Create an adaptive covariance MCMC routine
x0 = np.random.uniform(0, 10, size=(3, 10))
sigma0 = np.repeat(0.25, 10)
mcmc = pints.MCMCController(log_pdf, 3, x0, method=pints.HamiltonianMCMC, sigma0=sigma0)

# Stop after 10000 iterations
mcmc.set_max_iterations(500)

# Disable logging
mcmc.set_log_to_screen(False)

# Run!
print('Running...')
chains = mcmc.run()
print('Done!')

stacked = np.vstack(chains)


# Hamiltonian Monte Carlo does better than adaptive but still not great.

# In[29]:


print(log_pdf.kl_divergence(stacked))
print(log_pdf.kl_divergence(direct))


# Visualising the path of one of the chains the sampler struggles to explore both the neck and the outside region efficiently.

# In[30]:


divergent_transitions = mcmc.samplers()[0].divergent_iterations()
plt.contour(X, Y, Z, levels=levels, colors='k', alpha=0.2)
plt.plot(chains[2][:, 1], chains[2][:, 9], alpha=0.5)
plt.scatter(chains[0][divergent_transitions, 0], chains[0][divergent_transitions, 1], color='red')
plt.xlim(-10, 10)
plt.ylim(-10, 10)
plt.show()