import numpy as np import matplotlib.pyplot as plt import pods import teaching_plots as plot import mlai data = pods.datasets.olympic_marathon_men() x = data['X'] y = data['Y'] offset = y.mean() scale = np.sqrt(y.var()) xlim = (1875,2030) ylim = (2.5, 6.5) yhat = (y-offset)/scale fig, ax = plt.subplots(figsize=plot.big_wide_figsize) _ = ax.plot(x, y, 'r.',markersize=10) ax.set_xlabel('year', fontsize=20) ax.set_ylabel('pace min/km', fontsize=20) ax.set_xlim(xlim) ax.set_ylim(ylim) mlai.write_figure(figure=fig, filename='../slides/diagrams/datasets/olympic-marathon.svg', transparent=True, frameon=True) import mlai import numpy as np import scipy as sp class LML(mlai.LM): """Linear model with evolving loss :param X: input values :type X: numpy.ndarray :param y: target values :type y: numpy.ndarray :param basis: basis function :param type: function :param beta: weight of the loss function :param type: float""" def __init__(self, X, y, basis=None, beta=1.0, lambd=1.0): "Initialise" if basis is None: basis = mlai.basis(mlai.polynomial, number=2) mlai.LM.__init__(self, X, y, basis) self.s = np.ones((self.num_data, 1))#np.random.rand(self.num_data, 1)>0.5 self.update_w() self.sigma2 = 1/beta self.beta = beta self.name = 'LML_'+basis.function.__name__ self.objective_name = 'Weighted Sum of Square Training Error' self.lambd = lambd def update_QR(self): "Perform the QR decomposition on the basis matrix." self.Q, self.R = np.linalg.qr(self.Phi*np.sqrt(self.s)) def fit(self): """Minimize the objective function with respect to the parameters""" for i in range(30): self.update_w() # In the linear regression clas self.update_s() def update_w(self): self.update_QR() self.w_star = sp.linalg.solve_triangular(self.R, np.dot(self.Q.T, self.y*np.sqrt(self.s))) self.update_losses() def predict(self, X): """Return the result of the prediction function.""" return np.dot(self.basis.Phi(X), self.w_star), None def update_s(self): """Update the weights""" self.s = 1/(self.lambd + self.beta*self.losses) def update_losses(self): """Compute the loss functions for each data point.""" self.update_f() self.losses = ((self.y-self.f)**2) self.beta = 1/(self.losses*self.s).mean() def objective(self): """Compute the objective function.""" self.update_losses() return (self.losses*self.s).sum() num_basis=2 data_limits=[1890, 2020] basis = mlai.basis(mlai.polynomial, num_basis, data_limits=data_limits) model = LML(x, y, basis=basis, lambd=1, beta=1) model2 = mlai.LM(x, y, basis=basis) model.fit() model2.fit() import matplotlib.pyplot as plt x_test = np.linspace(data_limits[0], data_limits[1], 130)[:, None] f_test, f_var = model.predict(x_test) f2_test, f2_var = model2.predict(x_test) import teaching_plots as plot from matplotlib import rc, rcParams rcParams.update({'font.size': 22}) rc('text', usetex=True) fig, ax = plt.subplots(figsize=plot.big_wide_figsize) ax.plot(x_test, f2_test, linewidth=3, color='r') ax.plot(x, y, 'g.', markersize=10) ax.set_xlim(data_limits[0], data_limits[1]) ax.set_xlabel('year') ax.set_ylabel('pace min/km') _ = ax.set_ylim(2, 6) mlai.write_figure('../slides/diagrams/ml/olympic-loss-linear-regression000.svg', transparent=True) ax.plot(x_test, f_test, linewidth=3, color='b') ax.plot(x, y, 'g.', markersize=10) ax2 = ax.twinx() ax2.bar(x.flatten(), model.s.flatten(), width=2, color='b') ax2.set_ylim(0, 4) ax2.set_yticks([0, 1, 2]) ax2.set_ylabel('$\langle s_i \\rangle$') mlai.write_figure('../slides/diagrams/ml/olympic-loss-linear-regression001.svg', transparent=True) import pods pods.notebook.display_plots('olympic-loss-linear-regression{number:0>3}.svg', directory='../slides/diagrams/ml', number=(0, 1)) class BLML(mlai.BLM): """Bayesian Linear model with evolving loss :param X: input values :type X: numpy.ndarray :param y: target values :type y: numpy.ndarray :param basis: basis function :param type: function :param beta: weight of the loss function :param type: float""" def __init__(self, X, y, basis=None, alpha=1.0, beta=1.0, lambd=1.0): "Initialise" if basis is None: basis = mlai.basis(mlai.polynomial, number=2) mlai.BLM.__init__(self, X, y, basis=basis, alpha=alpha, sigma2=1/beta) self.s = np.ones((self.num_data, 1))#np.random.rand(self.num_data, 1)>0.5 self.update_w() self.beta = beta self.name = 'BLML_'+basis.function.__name__ self.objective_name = 'Weighted Sum of Square Training Error' self.lambd = lambd def update_QR(self): "Perform the QR decomposition on the basis matrix." self.Q, self.R = np.linalg.qr(np.vstack([self.Phi*np.sqrt(self.s), np.sqrt(self.sigma2/self.alpha)*np.eye(self.basis.number)])) def fit(self): """Minimize the objective function with respect to the parameters""" for i in range(30): self.update_w() self.update_s() def update_w(self): self.update_QR() self.QTy = np.dot(self.Q[:self.y.shape[0], :].T, self.y*np.sqrt(self.s)) self.mu_w = sp.linalg.solve_triangular(self.R, self.QTy) self.RTinv = sp.linalg.solve_triangular(self.R, np.eye(self.R.shape[0]), trans='T') self.C_w = np.dot(self.RTinv, self.RTinv.T) self.update_losses() def update_s(self): """Update the weights""" self.s = 1/(self.lambd + self.beta*self.losses) def update_losses(self): """Compute the loss functions for each data point.""" self.update_f() self.losses = ((self.y-self.f_bar)**2) + self.f_cov[:, np.newaxis] self.beta = 1/(self.losses*self.s).mean() self.sigma2=1/self.beta model = BLML(x, y, basis=basis, alpha=1000, lambd=1, beta=1) model2 = mlai.BLM(x, y, basis=basis, alpha=1000, sigma2=1) model.fit() model2.fit() x_test = np.linspace(data_limits[0], data_limits[1], 130)[:, None] f_test, f_var = model.predict(x_test) f2_test, f2_var = model2.predict(x_test) import gp_tutorial fig, ax = plt.subplots(figsize=plot.big_wide_figsize) from matplotlib import rc, rcParams rcParams.update({'font.size': 22}) rc('text', usetex=True) gp_tutorial.gpplot(x_test, f2_test, f2_test - 2*np.sqrt(f2_var), f2_test + 2*np.sqrt(f2_var), ax=ax, edgecol='r', fillcol='#CC3300') ax.plot(x, y, 'g.', markersize=10) ax.set_xlim(data_limits[0], data_limits[1]) ax.set_xlabel('year') ax.set_ylabel('pace min/km') _ = ax.set_ylim(2, 6) mlai.write_figure('../slides/diagrams/ml/olympic-loss-bayes-linear-regression000.svg', transparent=True) gp_tutorial.gpplot(x_test, f_test, f_test - 2*np.sqrt(f_var), f_test + 2*np.sqrt(f_var), ax=ax, edgecol='b', fillcol='#0033CC') #ax.plot(x_test, f_test, linewidth=3, color='b') ax.plot(x, y, 'g.', markersize=10) ax2 = ax.twinx() ax2.bar(x.flatten(), model.s.flatten(), width=2, color='b') ax2.set_ylim(0, 0.2) ax2.set_yticks([0, 0.1, 0.2]) ax2.set_ylabel('$\langle s_i \\rangle$') mlai.write_figure('../slides/diagrams/ml/olympic-loss-bayes-linear-regression001.svg', transparent=True) import pods pods.notebook.display_plots('olympic-loss-bayes-linear-regression{number:0>3}.svg', directory='../slides/diagrams/ml', number=(0, 1)) class GPL(mlai.GP): def __init__(self, X, losses, kernel, beta=1.0, mu=0.0, X_star=None, v_star=None): # Bring together locations self.kernel = kernel self.K = self.kernel.K(X) self.mu = np.ones((X.shape[0],1))*mu self.beta = beta if X_star is not None: kstar = kernel.K(X, X_star) kstarstar = kernel.K(X_star, X_star) kstarstarInv = np.linalg.inv(kstarstar) kskssInv = np.dot(kstar, kstarstarInv) self.K -= np.dot(kskssInv,kstar.T) if v_star is not None: self.mu = kskssInv*(v_star-self.mu)+self.mu Xaug = np.vstack((X, X_star)) else: raise ValueError("v_star should not be None when X_star is None") class BLMLGP(BLML): def __init__(self, X, y, basis=None, kernel=None, beta=1.0, mu=0.0, alpha=1.0, X_star=None, v_star=None): BLML.__init__(self, X, y, basis=basis, alpha=alpha, beta=beta, lambd=None) self.gp_model=GPL(self.X, self.losses, kernel=kernel, beta=beta, mu=mu, X_star=X_star, v_star=v_star) def update_s(self): """Update the weights""" self.gp_model.C = sp.linalg.inv(sp.linalg.inv(self.gp_model.K+np.eye(self.X.shape[0])*1e-6) + self.beta*np.diag(self.losses.flatten())) self.gp_model.diagC = np.diag(self.gp_model.C)[:, np.newaxis] self.gp_model.f = self.gp_model.beta*np.dot(np.dot(self.gp_model.C,np.diag(self.losses.flatten())),self.gp_model.mu) +self.gp_model.mu #f, v = self.gp_model.K self.gp_model.predict(self.X) self.s = self.gp_model.f*self.gp_model.f + self.gp_model.diagC # + 1.0/(self.losses*self.gp_model.beta) model = BLMLGP(x, y, basis=basis, kernel=mlai.kernel(mlai.eq_cov, lengthscale=20, variance=1.0), mu=0.0, beta=1.0, alpha=1000, X_star=np.asarray([[2020]]), v_star=np.asarray([[1]])) model.fit() f_test, f_var = model.predict(x_test) fig, ax = plt.subplots(figsize=plot.big_wide_figsize) ax.cla() from matplotlib import rc, rcParams rcParams.update({'font.size': 22}) rc('text', usetex=True) gp_tutorial.gpplot(x_test, f2_test, f2_test - 2*np.sqrt(f2_var), f2_test + 2*np.sqrt(f2_var), ax=ax, edgecol='r', fillcol='#CC3300') ax.plot(x, y, 'g.', markersize=10) ax.set_xlim(data_limits[0], data_limits[1]) ax.set_xlabel('year') ax.set_ylabel('pace min/km') _ = ax.set_ylim(2, 6) mlai.write_figure('../slides/diagrams/ml/olympic-gp-loss-bayes-linear-regression000.svg', transparent=True) gp_tutorial.gpplot(x_test, f_test, f_test - 2*np.sqrt(f_var), f_test + 2*np.sqrt(f_var), ax=ax, edgecol='b', fillcol='#0033CC') #ax.plot(x_test, f_test, linewidth=3, color='b') ax.plot(x, y, 'g.', markersize=10) ax2 = ax.twinx() ax2.bar(x.flatten(), model.s.flatten(), width=2, color='b') ax2.set_ylim(0, 3) ax2.set_yticks([0, 0.5, 1]) ax2.set_ylabel('$\langle s_i \\rangle$') mlai.write_figure('../slides/diagrams/ml/olympic-gp-loss-bayes-linear-regression001.svg', transparent=True) import pods pods.notebook.display_plots('olympic-gp-loss-bayes-linear-regression{number:0>3}.svg', directory='../slides/diagrams/ml', number=(0, 1)) fig, ax = plt.subplots(figsize=plot.big_wide_figsize) num_samps=10 samps=np.random.multivariate_normal(model.gp_model.f.flatten(), model.gp_model.C, size=100).T**2 ax.plot(x, samps, '-x', markersize=10, linewidth=2) ax.set_xlim(data_limits[0], data_limits[1]) ax.set_xlabel('year') _ = ax.set_ylabel('$s_i$') mlai.write_figure('../slides/diagrams/ml/olympic-gp-loss-samples.svg', transparent=True) fig, ax = plt.subplots(figsize=plot.big_wide_figsize) ax.plot(x, y, 'r.', markersize=10) ax.set_xlim(data_limits[0], data_limits[1]) ax.set_ylim(2, 6) ax.set_xlabel('year') ax.set_ylabel('pace min/km') gp_tutorial.gpplot(x_test, f_test, f_test - 2*np.sqrt(f_var), f_test + 2*np.sqrt(f_var), ax=ax, edgecol='b', fillcol='#0033CC') mlai.write_figure('../slides/diagrams/ml/olympic-gp-loss-bayes-linear-regression-and-samples000.svg', transparent=True) allsamps = [] for i in range(samps.shape[1]): model.s = samps[:, i:i+1] model.update_w() f_bar, f_cov =model.predict(x_test, full_cov=True) f_samp = np.random.multivariate_normal(f_bar.flatten(), f_cov, size=10).T ax.plot(x_test, f_samp, linewidth=0.5, color='k') allsamps+=list(f_samp[-1, :]) mlai.write_figure('../slides/diagrams/ml/olympic-gp-loss-bayes-linear-regression-and-samples001.svg', transparent=True) import pods pods.notebook.display_plots('olympic-gp-loss-bayes-linear-regression-and-samples{number:0>3}.svg', directory='../slides/diagrams/ml', number=(0, 1)) fig, ax = plt.subplots(figsize=plot.big_figsize) ax.hist(np.asarray(allsamps), bins=30, density=True) ax.set_xlabel='pace min/kim' mlai.write_figure('../slides/diagrams/ml/olympic-gp-loss-histogram-2020.svg', transparent=True)