%matplotlib inline import numpy as np import pods import pylab as plt import GPy from IPython.display import display data = pods.datasets.olympic_marathon_men() x = data['X'] y = data['Y'] k = GPy.kern.RBF(1) model = GPy.models.GPRegression(x, y, k) display(model) model.plot() model.rbf.lengthscale = 10. model.plot() model.constrain_positive('.*') # Constrains all parameters matching .* to be positive, i.e. everything model.optimize(optimizer='scg') model.plot() display(model) # Exercise 5 a) answer kern = GPy.kern.RBF(1, lengthscale=80) + GPy.kern.Matern52(1, lengthscale=10) + GPy.kern.Bias(1) model = GPy.models.GPRegression(x, y, kern) model.optimize() model.plot()# Exercise 5 d) answer model.log_likelihood() display(model) # Exercise 2 answer def contour_objective(x, y, log_length_scales, log_SNRs, kernel_call=GPy.kern.RBF): '''Helper function to contour an objective function in a set up where there is a kernel for signal corrupted by noise.''' lls = [] num_data=y.shape[0] kernel = kernel_call(1, variance=1., lengthscale=1.) model = GPy.models.GPRegression(x, y, kernel=kernel) y = y - y.mean() for log_SNR in log_SNRs: SNR = 10.**log_SNR length_scale_lls = [] for log_length_scale in log_length_scales: model['.*lengthscale'] = 10.**log_length_scale model.kern['.*variance'] = SNR Kinv = GPy.util.linalg.pdinv(model.kern.K(x)+np.eye(num_data))[0] total_var = 1./num_data*np.dot(np.dot(y.T, Kinv), y) noise_var = total_var signal_var = SNR*total_var model.kern['.*variance'] = signal_var model.Gaussian_noise = noise_var length_scale_lls.append(model.log_likelihood()) print SNR, 10.**log_length_scale display(model) lls.append(length_scale_lls) return np.array(lls) data = pods.datasets.della_gatta_TRP63_gene_expression(gene_number=937) x = data['X'] y = data['Y'] y = y - y.mean() kern = GPy.kern.RBF(input_dim=1) model = GPy.models.GPRegression(x, y, kern) resolution = 2 log_lengthscales = np.linspace(1, 3.5, resolution) log_SNRs = np.linspace(-2.5, 1., resolution) lls = contour_objective(x, y, log_lengthscales, log_SNRs) display(model) plt.contour(lengthscales, log_SNRs, lls, 20, cmap=plt.cm.jet) plt.plot(x, y-y.mean()) model.kern.lengthscale=30. model.kern.variance=0.5 model.Gaussian_noise=0.01 model.kern.lengthscale.set_prior(GPy.priors.InverseGamma.from_EV(30.,100.)) model.kern.variance.set_prior(GPy.priors.InverseGamma.from_EV(0.5, 1.)) #Gamma.from_EV(1.,10.)) model.Gaussian_noise.set_prior(GPy.priors.InverseGamma.from_EV(0.01,1.)) _ = model.plot() hmc = GPy.inference.mcmc.HMC(model, stepsize=5e-2) s = hmc.sample(num_samples=1000) plt.plot(s) labels = ['kern variance', 'kern lengthscale','noise variance'] samples = s[300:] # cut out the burn-in period from scipy import stats xmin = samples.min() xmax = samples.max() xs = np.linspace(xmin,xmax,100) for i in xrange(samples.shape[1]-1): kernel = stats.gaussian_kde(samples[:,i]) plt.plot(xs,kernel(xs),label=labels[i]) _ = plt.legend() fig = plt.figure(figsize=(14,4)) ax = fig.add_subplot(131) _=ax.plot(samples[:,0],samples[:,1],'.') ax.set_xlabel(labels[0]); ax.set_ylabel(labels[1]) ax = fig.add_subplot(132) _=ax.plot(samples[:,1],samples[:,2],'.') ax.set_xlabel(labels[1]); ax.set_ylabel(labels[2]) ax = fig.add_subplot(133) _=ax.plot(samples[:,0],samples[:,2],'.') ax.set_xlabel(labels[0]); ax.set_ylabel(labels[2]) # Definition of the Branin test function def branin(X): y = (X[:,1]-5.1/(4*np.pi**2)*X[:,0]**2+5*X[:,0]/np.pi-6)**2 y += 10*(1-1/(8*np.pi))*np.cos(X[:,0])+10 return(y) # Training set defined as a 5*5 grid: xg1 = np.linspace(-5,10,5) xg2 = np.linspace(0,15,5) X = np.zeros((xg1.size * xg2.size,2)) for i,x1 in enumerate(xg1): for j,x2 in enumerate(xg2): X[i+xg1.size*j,:] = [x1,x2] Y = branin(X)[:,None] # Fit a GP # Create an exponentiated quadratic plus bias covariance function kg = GPy.kern.RBF(input_dim=2, ARD=True) kb = GPy.kern.Bias(input_dim=2) k = kg + kb # Build a GP model model = GPy.models.GPRegression(X,Y,k) # fix the noise variance to something low model.Gaussian_noise.variance = 1e-5 model.Gaussian_noise.variance.constrain_fixed() display(model) # optimize the model model.optimize() # Plot the resulting approximation to Brainin # Here you get a two-d plot becaue the function is two dimensional. model.plot() display(model.add.rbf.lengthscale) # Compute the mean of model prediction on 1e5 Monte Carlo samples Xp = np.random.uniform(size=(1e5,2)) Xp[:,0] = Xp[:,0]*15-5 Xp[:,1] = Xp[:,1]*15 Yp = model.predict(Xp)[0] np.mean(Yp) # Exercise 6 a) answer # Exercise 6 b) answer # Exercise 7 a) answer # Exercise 7 b) answer # Exercise 7 c) answer # Exercise 8 a) answer # Exercise 8 b) answer