from IPython.lib.display import YouTubeVideo YouTubeVideo('ncwsr1Of6Cw') import numpy as np import pods data = pods.datasets.olympic_marathon_men() x = data['X'] y = data['Y'] offset = y.mean() scale = np.sqrt(y.var()) import matplotlib.pyplot as plt import teaching_plots as plot import mlai xlim = (1875,2030) ylim = (2.5, 6.5) yhat = (y-offset)/scale fig, ax = plt.subplots(figsize=plot.big_wide_figsize) _ = ax.plot(x, y, 'r.',markersize=10) ax.set_xlabel('year', fontsize=20) ax.set_ylabel('pace min/km', fontsize=20) ax.set_xlim(xlim) ax.set_ylim(ylim) mlai.write_figure(figure=fig, filename='../slides/diagrams/datasets/olympic-marathon.svg', transparent=True, frameon=True) import numpy as np from matplotlib import pyplot as plt import mlai import pods basis = mlai.polynomial data = pods.datasets.olympic_marathon_men() x = data['X'] y = data['Y'] xlim = [1892, 2020] basis=mlai.Basis(mlai.polynomial, number=1, data_limits=xlim) from ipywidgets import IntSlider pods.notebook.display_plots('olympic_LM_polynomial_number{num_basis:0>3}.svg', directory='../slides/diagrams/ml', num_basis=IntSlider(1,1,27,1)) import pods from ipywidgets import IntSlider pods.notebook.display_plots('pinball{sample:0>3}.svg', '../slides/diagrams', sample=IntSlider(1, 1, 2, 1)) import pods from ipywidgets import IntSlider pods.notebook.display_plots('olympic_val_extra_LM_polynomial_number{num_basis:0>3}.svg', directory='../slides/diagrams/ml', num_basis=IntSlider(1, 1, max_basis, 1)) import pods from ipywidgets import IntSlider pods.notebook.display_plots('olympic_val_inter_LM_polynomial_number{num_basis:0>3}.svg', directory='../slides/diagrams/ml', num_basis=IntSlider(1, 1, max_basis, 1)) # select indices of data to 'hold out' indices_hold_out = np.flatnonzero(x>1980) # Create a training set x_train = np.delete(x, indices_hold_out, axis=0) y_train = np.delete(y, indices_hold_out, axis=0) # Create a hold out set x_valid = np.take(x, indices_hold_out, axis=0) y_valid = np.take(y, indices_hold_out, axis=0) # Write code for your answer to Question 3 in this box # provide the answers so that the code runs correctly otherwise you will loose marks! def polynomial(x, degree, loc, scale): degrees =np.arange(degree+1) return ((x-loc)/scale)**degrees # Write code for your answer to Question 4 in this box # provide the answers so that the code runs correctly otherwise you will loose marks! import numpy as np def create_data(per_cluster=30): """Create a randomly sampled data set :param per_cluster: number of points in each cluster """ X = [] y = [] scale = 3 prec = 1/(scale*scale) pos_mean = [[-1, 0],[0,0.5],[1,0]] pos_cov = [[prec, 0.], [0., prec]] neg_mean = [[0, -0.5],[0,-0.5],[0,-0.5]] neg_cov = [[prec, 0.], [0., prec]] for mean in pos_mean: X.append(np.random.multivariate_normal(mean=mean, cov=pos_cov, size=per_class)) y.append(np.ones((per_class, 1))) for mean in neg_mean: X.append(np.random.multivariate_normal(mean=mean, cov=neg_cov, size=per_class)) y.append(np.zeros((per_class, 1))) return np.vstack(X), np.vstack(y).flatten() def plot_contours(ax, cl, xx, yy, **params): """Plot the decision boundaries for a classifier. :param ax: matplotlib axes object :param cl: a classifier :param xx: meshgrid ndarray :param yy: meshgrid ndarray :param params: dictionary of params to pass to contourf, optional """ Z = cl.decision_function(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) # Plot decision boundary and regions out = ax.contour(xx, yy, Z, levels=[-1., 0., 1], colors='black', linestyles=['dashed', 'solid', 'dashed']) out = ax.contourf(xx, yy, Z, levels=[Z.min(), 0, Z.max()], colors=[[0.5, 1.0, 0.5], [1.0, 0.5, 0.5]]) return out import mlai import os def decision_boundary_plot(models, X, y, axs, filename, titles, xlim, ylim): """Plot a decision boundary on the given axes :param axs: the axes to plot on. :param models: the SVM models to plot :param titles: the titles for each axis :param X: input training data :param y: target training data""" for ax in axs.flatten(): ax.clear() X0, X1 = X[:, 0], X[:, 1] if xlim is None: xlim = [X0.min()-1, X0.max()+1] if ylim is None: ylim = [X1.min()-1, X1.max()+1] xx, yy = np.meshgrid(np.arange(xlim[0], xlim[1], 0.02), np.arange(ylim[0], ylim[1], 0.02)) for cl, title, ax in zip(models, titles, axs.flatten()): plot_contours(ax, cl, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8) ax.plot(X0[y==1], X1[y==1], 'r.', markersize=10) ax.plot(X0[y==0], X1[y==0], 'g.', markersize=10) ax.set_xlim(xlim) ax.set_ylim(ylim) ax.set_xticks(()) ax.set_yticks(()) ax.set_title(title) mlai.write_figure(os.path.join(filename), figure=fig, transparent=True) return xlim, ylim import matplotlib font = {'family' : 'sans', 'weight' : 'bold', 'size' : 22} matplotlib.rc('font', **font) import matplotlib.pyplot as plt # Create an instance of SVM and fit the data. C = 100.0 # SVM regularization parameter gammas = [0.001, 0.01, 0.1, 1] per_class=30 num_samps = 20 # Set-up 2x2 grid for plotting. fig, ax = plt.subplots(1, 4, figsize=(10,3)) xlim=None ylim=None for samp in range(num_samps): X, y=create_data(per_class) models = [] titles = [] for gamma in gammas: models.append(svm.SVC(kernel='rbf', gamma=gamma, C=C)) titles.append('$\gamma={}$'.format(gamma)) models = (cl.fit(X, y) for cl in models) xlim, ylim = decision_boundary_plot(models, X, y, axs=ax, filename='../slides/diagrams/ml/bias-variance{samp:0>3}.svg'.format(samp=samp), titles=titles, xlim=xlim, ylim=ylim) import pods from ipywidgets import IntSlider pods.notebook.display_plots('bias-variance{samp:0>3}.svg', directory='../slides/diagrams/ml', samp=IntSlider(0,0,10,1)) from IPython.lib.display import YouTubeVideo YouTubeVideo('py8QrZPT48s') import mlai import pods import pods from ipywidgets import IntSlider pods.notebook.display_plots('olympic_BLM_polynomial_number{num_basis:0>3}.svg', directory='../slides/diagrams/ml/', num_basis=IntSlider(1, 1, 27, 1)) import pods from ipywidgets import IntSlider pods.notebook.display_plots('olympic_val_BLM_polynomial_number{num_basis:0>3}.svg', directory='../slides/diagrams/ml', num_basis=IntSlider(1, 1, 27, 1)) import pods from ipywidgets import IntSlider pods.notebook.display_plots('olympic_5cv{part:0>2}_BLM_polynomial_number{num_basis:0>3}.svg', directory='../slides/diagrams/ml', part=(0, 5), num_basis=IntSlider(1, 1, 27, 1)) from IPython.lib.display import YouTubeVideo YouTubeVideo('mfqmoUN-Cuw') from IPython.lib.display import YouTubeVideo YouTubeVideo('OcoE7JlbXvY')