#!/usr/bin/env python # coding: utf-8 # In[81]: get_ipython().run_line_magic('matplotlib', 'inline') from pymc.gp import * import numpy as np import matplotlib.pylab as plt # ## Mean function # # The mean function of a GP can be interpreted as a "prior guess" at the form of the true function. # In[82]: # Generate mean def quadfun(x, a, b, c): return (a*x**2 + b*x + c) M = Mean(quadfun, a=1., b=0.5, c=2.) # In[83]: x = np.arange(-1,1,0.1) plt.plot(x, M(x), 'k-') # ## Covariance function # # The behavior of individual realizations from the GP is governed by the covariance function. The Matèrn class of functions is a flexible choice. # In[97]: from pymc.gp.cov_funs import matern import numpy as np C = Covariance(eval_fun=matern.euclidean, diff_degree=1.4, amp=0.4, scale=1, rank_limit=1000) plt.subplot(1,2,2) plt.contourf(x, x, C(x,x).view(np.ndarray), origin='lower', extent=(-1,1,-1,1), cmap=plt.cm.bone) plt.colorbar() plt.subplot(1,2,1) plt.plot(x, C(x,0).view(np.ndarray), 'k-') plt.ylabel('C(x,0)') # In[98]: # Returns the diagnonal C(x) # In[99]: np.diag(C(x,x)) # ## Drawing realizations from a GP # In[103]: # Generate realizations f_list = [Realization(M, C) for i in range(3)] # Plot mean and covariance x = np.arange(-1,1,0.01) plot_envelope(M, C, x) # Add realizations for f in f_list: plt.plot(x, f(x)) # ## Non-parametric Regression # Under a GP prior for an unknown function `f`, when the observation error is normally distributed, the posterior us another GP with new mean and covariance functions. # In[110]: M = Mean(quadfun, a=1., b=0.5, c=2.) C = Covariance(eval_fun=matern.euclidean, diff_degree=1.4, amp=0.4, scale=1, rank_limit=1000) obs_x = np.array([-.5, .5]) V = np.array([0.002, 0.002]) data = np.array([3.1, 2.9]) observe(M=M, C=C, obs_mesh=obs_x, obs_V=V, obs_vals=data) # Generate realizations from posterior f_list = [Realization(M,C) for i in range(3)] # The function `observe` informs the mean and covariance functions that values on `obs_mesh` with observation variance `V`. Making observations with no error is called `conditioning`. This is useful when, for example, forcing a rate function to be zero when a population's size is zero. # In[111]: plot_envelope(M, C, mesh=x) for f in f_list: plt.plot(x, f(x)) # ## Salmon Example # In[112]: class salmon: """ Reads and organizes data from csv files, acts as a container for mean and covariance objects, makes plots. """ def __init__(self, name, data): # Read in data self.name = name self.abundance = data[:,0].ravel() self.frye = data[:,1].ravel() # Specify priors # Function for prior mean def line(x, slope): return slope * x self.M = Mean(line, slope = np.mean(self.frye / self.abundance)) self.C = Covariance( matern.euclidean, diff_degree = 1.4, scale = 100. * self.abundance.max(), amp = 200. * self.frye.max()) observe(self.M,self.C,obs_mesh = 0, obs_vals = 0, obs_V = 0) self.xplot = np.linspace(0,1.25 * self.abundance.max(),100) def plot(self): """ Plot posterior from simple nonstochetric regression. """ plt.figure() plot_envelope(self.M, self.C, self.xplot) for i in range(3): f = Realization(self.M, self.C) plt.plot(self.xplot,f(self.xplot)) plt.plot(self.abundance, self.frye, 'k.', markersize=4) plt.xlabel('Female abundance') plt.ylabel('Frye density') plt.title(self.name) plt.axis('tight') # In[113]: sockeye_data = np.reshape([2986,9, 3424,12.39, 1631,4.5, 784,2.56, 9671,32.62, 2519,8.19, 1520,4.51, 6418,15.21, 10857,35.05, 15044,36.85, 10287,25.68, 16525,52.75, 19172,19.52, 17527,40.98, 11424,26.67, 24043,52.6, 10244,21.62, 30983,56.05, 12037,29.31, 25098,45.4, 11362,18.88, 24375,19.14, 18281,33.77, 14192,20.44, 7527,21.66, 6061,18.22, 15536,42.9, 18080,46.09, 17354,38.82, 17301,42.22, 11486,21.96, 20120,45.05, 10700,13.7, 12867,27.71,], (34,2)) # In[114]: # Instantiate salmon object with sockeye abundance sockeye = salmon('sockeye', sockeye_data) # Observe some data observe(sockeye.M, sockeye.C, obs_mesh = sockeye.abundance, obs_vals = sockeye.frye, obs_V = .25*sockeye.frye) # In[115]: sockeye.plot() # ## Multidimensional Function Approximation # # A non-linear function to approximate: # In[116]: def sinxfun(x=0,y=0): ''' for each (x,y) point, it returns d*sin(d), where d is the distance from the origin.''' d = np.sqrt(x**2+y**2) return d*np.sin(d) # In[117]: x_mesh = np.arange(-10, 10, 0.1) y_mesh = np.arange(-10, 10, 0.1) xx, yy = np.meshgrid(x_mesh, y_mesh) z_mesh = sinxfun(xx,yy) # In[118]: from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(xx, yy, z_mesh, cmap=plt.cm.Blues, vmin=-15, vmax=15) # We define our GP using a mean function and a covariance function. # In[119]: #assumed mean of gassian process = 0 def meanfun(xy): return np.zeros((len(xy),1)) M = Mean(meanfun) #covariance is what was used in the example above C = Covariance(eval_fun = matern.euclidean, diff_degree = 10.4, amp = 10.4, scale = 10.) # Let's draw sample functions from the prior # In[120]: xy = np.c_[xx.ravel(), yy.ravel()] xy # In[121]: r = Realization(M, C) fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(xx, yy, np.reshape(r(xy), xx.shape), alpha=.3, cmap=plt.cm.Blues, vmin=-15,vmax=15) ax.set_zlim(-15,15) # In[122]: r = Realization(M, C) fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(xx, yy, np.reshape(r(xy), xx.shape), alpha=.3, cmap=plt.cm.Blues, vmin=-15,vmax=15) ax.set_zlim(-15,15) # Now suppose we observe some data (5 points) # In[123]: # Observational variances V = np.array([.002,.002]) xobs = np.random.uniform(-10,10, (2,5)) yobs = sinxfun(*xobs) yobs # Update prior mean and covariance functions # In[124]: observe(M, C, obs_mesh=xobs.T, obs_V=V, obs_vals=yobs) r = Realization(M,C) # In[125]: fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(xx, yy, np.reshape(r(xy),xx.shape), alpha=.3, cmap=plt.cm.Blues, vmin=-15,vmax=15) ax.set_zlim(-15,15) ax.plot(xobs[0], xobs[1], yobs, '.r', markersize=10) # Now observe 500 points. # In[126]: xobs = np.random.uniform(-10,10, (2,500)) yobs = sinxfun(*xobs) # In[127]: observe(M, C, obs_mesh=xobs.T, obs_V=V, obs_vals=yobs) r = Realization(M,C) # In[128]: fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(xx, yy, np.reshape(r(xy),xx.shape), alpha=.3, cmap=plt.cm.Blues, vmin=-15,vmax=15) ax.set_zlim(-15,15) # In[131]: M.reg_mat # In[ ]: