import numpy as np
import matplotlib.pyplot as plt
import pods
import teaching_plots as plot
import mlai

data = pods.datasets.olympic_marathon_men()
x = data['X']
y = data['Y']

offset = y.mean()
scale = np.sqrt(y.var())

xlim = (1875,2030)
ylim = (2.5, 6.5)
yhat = (y-offset)/scale

fig, ax = plt.subplots(figsize=plot.big_wide_figsize)
_ = ax.plot(x, y, 'r.',markersize=10)
ax.set_xlabel('year', fontsize=20)
ax.set_ylabel('pace min/km', fontsize=20)
ax.set_xlim(xlim)
ax.set_ylim(ylim)

mlai.write_figure(figure=fig, filename='../slides/diagrams/datasets/olympic-marathon.svg', transparent=True, frameon=True)

import mlai
import numpy as np
import scipy as sp

class LML(mlai.LM):
    """Linear model with evolving loss
    :param X: input values
    :type X: numpy.ndarray
    :param y: target values
    :type y: numpy.ndarray
    :param basis: basis function 
    :param type: function
    :param beta: weight of the loss function
    :param type: float"""

    def __init__(self, X, y, basis=None, beta=1.0, lambd=1.0):
        "Initialise"
        if basis is None:
            basis = mlai.basis(mlai.polynomial, number=2)
        mlai.LM.__init__(self, X, y, basis)
        self.s = np.ones((self.num_data, 1))#np.random.rand(self.num_data, 1)>0.5
        self.update_w()
        self.sigma2 = 1/beta
        self.beta = beta
        self.name = 'LML_'+basis.function.__name__
        self.objective_name = 'Weighted Sum of Square Training Error'
        self.lambd = lambd

    def update_QR(self):
        "Perform the QR decomposition on the basis matrix."
        self.Q, self.R = np.linalg.qr(self.Phi*np.sqrt(self.s))

    def fit(self):
        """Minimize the objective function with respect to the parameters"""
        for i in range(30):
            self.update_w() # In the linear regression clas
            self.update_s()
        
    def update_w(self):
        self.update_QR()
        self.w_star = sp.linalg.solve_triangular(self.R, np.dot(self.Q.T, self.y*np.sqrt(self.s)))
        self.update_losses()

    def predict(self, X):
        """Return the result of the prediction function."""
        return np.dot(self.basis.Phi(X), self.w_star), None
        
    def update_s(self):
        """Update the weights"""
        self.s = 1/(self.lambd + self.beta*self.losses)
                                                 
    def update_losses(self):
        """Compute the loss functions for each data point."""
        self.update_f()
        self.losses = ((self.y-self.f)**2)
        self.beta = 1/(self.losses*self.s).mean()
        
    def objective(self):
        """Compute the objective function."""
        self.update_losses()
        return (self.losses*self.s).sum()

num_basis=2 
data_limits=[1890, 2020]
basis = mlai.basis(mlai.polynomial, num_basis, data_limits=data_limits)
model = LML(x, y, basis=basis, lambd=1, beta=1)
model2 = mlai.LM(x, y, basis=basis)

model.fit()
model2.fit()

import matplotlib.pyplot as plt

x_test = np.linspace(data_limits[0], data_limits[1], 130)[:, None]
f_test, f_var = model.predict(x_test)
f2_test, f2_var = model2.predict(x_test)

import teaching_plots as plot
from matplotlib import rc, rcParams
rcParams.update({'font.size': 22})
rc('text', usetex=True)

fig, ax = plt.subplots(figsize=plot.big_wide_figsize)
ax.plot(x_test, f2_test, linewidth=3, color='r')
ax.plot(x, y, 'g.', markersize=10)
ax.set_xlim(data_limits[0], data_limits[1])
ax.set_xlabel('year')
ax.set_ylabel('pace min/km')
_ = ax.set_ylim(2, 6)
mlai.write_figure('../slides/diagrams/ml/olympic-loss-linear-regression000.svg', transparent=True)
ax.plot(x_test, f_test, linewidth=3, color='b')
ax.plot(x, y, 'g.', markersize=10)
ax2 = ax.twinx()
ax2.bar(x.flatten(), model.s.flatten(), width=2, color='b')
ax2.set_ylim(0, 4)
ax2.set_yticks([0, 1, 2])
ax2.set_ylabel('$\langle s_i \\rangle$')
mlai.write_figure('../slides/diagrams/ml/olympic-loss-linear-regression001.svg', transparent=True)

import pods
pods.notebook.display_plots('olympic-loss-linear-regression{number:0>3}.svg', 
                            directory='../slides/diagrams/ml', number=(0, 1))

class BLML(mlai.BLM):
    """Bayesian Linear model with evolving loss
    :param X: input values
    :type X: numpy.ndarray
    :param y: target values
    :type y: numpy.ndarray
    :param basis: basis function 
    :param type: function
    :param beta: weight of the loss function
    :param type: float"""

    def __init__(self, X, y, basis=None, alpha=1.0, beta=1.0, lambd=1.0):
        "Initialise"
        if basis is None:
            basis = mlai.basis(mlai.polynomial, number=2)
        mlai.BLM.__init__(self, X, y, basis=basis, alpha=alpha, sigma2=1/beta)
        self.s = np.ones((self.num_data, 1))#np.random.rand(self.num_data, 1)>0.5       
        self.update_w()
        self.beta = beta
        self.name = 'BLML_'+basis.function.__name__
        self.objective_name = 'Weighted Sum of Square Training Error'
        self.lambd = lambd     

    def update_QR(self):
        "Perform the QR decomposition on the basis matrix."
        self.Q, self.R = np.linalg.qr(np.vstack([self.Phi*np.sqrt(self.s), np.sqrt(self.sigma2/self.alpha)*np.eye(self.basis.number)]))

    def fit(self):
        """Minimize the objective function with respect to the parameters"""
        for i in range(30):
            self.update_w()
            self.update_s()
        
    def update_w(self):
        self.update_QR()
        self.QTy = np.dot(self.Q[:self.y.shape[0], :].T, self.y*np.sqrt(self.s))
        self.mu_w = sp.linalg.solve_triangular(self.R, self.QTy)
        self.RTinv = sp.linalg.solve_triangular(self.R, np.eye(self.R.shape[0]), trans='T')
        self.C_w = np.dot(self.RTinv, self.RTinv.T)
        self.update_losses()

    def update_s(self):
        """Update the weights"""
        self.s = 1/(self.lambd + self.beta*self.losses)
                                                 
    def update_losses(self):
        """Compute the loss functions for each data point."""
        self.update_f()
        self.losses = ((self.y-self.f_bar)**2) + self.f_cov[:, np.newaxis]
        self.beta = 1/(self.losses*self.s).mean()
        self.sigma2=1/self.beta
        

 

model = BLML(x, y, basis=basis, alpha=1000, lambd=1, beta=1)
model2 = mlai.BLM(x, y, basis=basis, alpha=1000, sigma2=1)

model.fit()
model2.fit()

x_test = np.linspace(data_limits[0], data_limits[1], 130)[:, None]
f_test, f_var = model.predict(x_test)
f2_test, f2_var = model2.predict(x_test)

import gp_tutorial

fig, ax = plt.subplots(figsize=plot.big_wide_figsize)
from matplotlib import rc, rcParams
rcParams.update({'font.size': 22})
rc('text', usetex=True)
gp_tutorial.gpplot(x_test, f2_test, f2_test - 2*np.sqrt(f2_var), f2_test + 2*np.sqrt(f2_var), ax=ax, edgecol='r', fillcol='#CC3300')
ax.plot(x, y, 'g.', markersize=10)
ax.set_xlim(data_limits[0], data_limits[1])
ax.set_xlabel('year')
ax.set_ylabel('pace min/km')
_ = ax.set_ylim(2, 6)
mlai.write_figure('../slides/diagrams/ml/olympic-loss-bayes-linear-regression000.svg', transparent=True)
gp_tutorial.gpplot(x_test, f_test, f_test - 2*np.sqrt(f_var), f_test + 2*np.sqrt(f_var), ax=ax, edgecol='b', fillcol='#0033CC')
#ax.plot(x_test, f_test, linewidth=3, color='b')
ax.plot(x, y, 'g.', markersize=10)
ax2 = ax.twinx()
ax2.bar(x.flatten(), model.s.flatten(), width=2, color='b')
ax2.set_ylim(0, 0.2)
ax2.set_yticks([0, 0.1, 0.2])
ax2.set_ylabel('$\langle s_i \\rangle$')
mlai.write_figure('../slides/diagrams/ml/olympic-loss-bayes-linear-regression001.svg', transparent=True)

import pods
pods.notebook.display_plots('olympic-loss-bayes-linear-regression{number:0>3}.svg', 
                            directory='../slides/diagrams/ml', number=(0, 1))

class GPL(mlai.GP):
    def __init__(self, X, losses, kernel, beta=1.0, mu=0.0, X_star=None, v_star=None):
        # Bring together locations
        self.kernel = kernel
        self.K = self.kernel.K(X)
        self.mu = np.ones((X.shape[0],1))*mu
        self.beta = beta
        if X_star is not None:
            kstar = kernel.K(X, X_star)
            kstarstar = kernel.K(X_star, X_star)
            kstarstarInv = np.linalg.inv(kstarstar)
            kskssInv = np.dot(kstar, kstarstarInv)
            self.K -= np.dot(kskssInv,kstar.T)
            if v_star is not None:
                self.mu = kskssInv*(v_star-self.mu)+self.mu
                Xaug = np.vstack((X, X_star))
            else:
                raise ValueError("v_star should not be None when X_star is None")

class BLMLGP(BLML):
    def __init__(self, X, y, basis=None, kernel=None, beta=1.0, mu=0.0, alpha=1.0, X_star=None, v_star=None):
        BLML.__init__(self, X, y, basis=basis, alpha=alpha, beta=beta, lambd=None)
        self.gp_model=GPL(self.X, self.losses, kernel=kernel, beta=beta, mu=mu, X_star=X_star, v_star=v_star)
    def update_s(self):
        """Update the weights"""
        self.gp_model.C = sp.linalg.inv(sp.linalg.inv(self.gp_model.K+np.eye(self.X.shape[0])*1e-6) + self.beta*np.diag(self.losses.flatten()))
        self.gp_model.diagC = np.diag(self.gp_model.C)[:, np.newaxis]
        self.gp_model.f = self.gp_model.beta*np.dot(np.dot(self.gp_model.C,np.diag(self.losses.flatten())),self.gp_model.mu) +self.gp_model.mu
        
        #f, v = self.gp_model.K self.gp_model.predict(self.X)
        self.s = self.gp_model.f*self.gp_model.f + self.gp_model.diagC # + 1.0/(self.losses*self.gp_model.beta)

model = BLMLGP(x, y, 
           basis=basis, 
           kernel=mlai.kernel(mlai.eq_cov, lengthscale=20, variance=1.0),
           mu=0.0,
           beta=1.0, 
           alpha=1000,
           X_star=np.asarray([[2020]]), 
           v_star=np.asarray([[1]]))

model.fit()

f_test, f_var = model.predict(x_test)

fig, ax = plt.subplots(figsize=plot.big_wide_figsize)
ax.cla()
from matplotlib import rc, rcParams
rcParams.update({'font.size': 22})
rc('text', usetex=True)
gp_tutorial.gpplot(x_test, f2_test, f2_test - 2*np.sqrt(f2_var), f2_test + 2*np.sqrt(f2_var), ax=ax, edgecol='r', fillcol='#CC3300')
ax.plot(x, y, 'g.', markersize=10)
ax.set_xlim(data_limits[0], data_limits[1])
ax.set_xlabel('year')
ax.set_ylabel('pace min/km')
_ = ax.set_ylim(2, 6)
mlai.write_figure('../slides/diagrams/ml/olympic-gp-loss-bayes-linear-regression000.svg', transparent=True)
gp_tutorial.gpplot(x_test, f_test, f_test - 2*np.sqrt(f_var), f_test + 2*np.sqrt(f_var), ax=ax, edgecol='b', fillcol='#0033CC')
#ax.plot(x_test, f_test, linewidth=3, color='b')
ax.plot(x, y, 'g.', markersize=10)
ax2 = ax.twinx()
ax2.bar(x.flatten(), model.s.flatten(), width=2, color='b')
ax2.set_ylim(0, 3)
ax2.set_yticks([0, 0.5, 1])
ax2.set_ylabel('$\langle s_i \\rangle$')
mlai.write_figure('../slides/diagrams/ml/olympic-gp-loss-bayes-linear-regression001.svg', transparent=True)

import pods

pods.notebook.display_plots('olympic-gp-loss-bayes-linear-regression{number:0>3}.svg', 
                            directory='../slides/diagrams/ml', number=(0, 1))

fig, ax = plt.subplots(figsize=plot.big_wide_figsize)
num_samps=10
samps=np.random.multivariate_normal(model.gp_model.f.flatten(), model.gp_model.C, size=100).T**2
ax.plot(x, samps, '-x', markersize=10, linewidth=2)
ax.set_xlim(data_limits[0], data_limits[1])
ax.set_xlabel('year')
_ = ax.set_ylabel('$s_i$')
mlai.write_figure('../slides/diagrams/ml/olympic-gp-loss-samples.svg', transparent=True)

fig, ax = plt.subplots(figsize=plot.big_wide_figsize)
ax.plot(x, y, 'r.', markersize=10)
ax.set_xlim(data_limits[0], data_limits[1])
ax.set_ylim(2, 6)
ax.set_xlabel('year')
ax.set_ylabel('pace min/km')
gp_tutorial.gpplot(x_test, f_test, f_test - 2*np.sqrt(f_var), f_test + 2*np.sqrt(f_var), ax=ax, edgecol='b', fillcol='#0033CC')
mlai.write_figure('../slides/diagrams/ml/olympic-gp-loss-bayes-linear-regression-and-samples000.svg', transparent=True)
allsamps = []
for i in range(samps.shape[1]):
    model.s = samps[:, i:i+1]
    model.update_w()
    f_bar, f_cov =model.predict(x_test, full_cov=True)
    f_samp = np.random.multivariate_normal(f_bar.flatten(), f_cov, size=10).T
    ax.plot(x_test, f_samp, linewidth=0.5, color='k')
    allsamps+=list(f_samp[-1, :])
mlai.write_figure('../slides/diagrams/ml/olympic-gp-loss-bayes-linear-regression-and-samples001.svg', transparent=True)

import pods
pods.notebook.display_plots('olympic-gp-loss-bayes-linear-regression-and-samples{number:0>3}.svg', 
                            directory='../slides/diagrams/ml', number=(0, 1))

fig, ax = plt.subplots(figsize=plot.big_figsize)
ax.hist(np.asarray(allsamps), bins=30, density=True)
ax.set_xlabel='pace min/kim'
mlai.write_figure('../slides/diagrams/ml/olympic-gp-loss-histogram-2020.svg', transparent=True)