%pylab inline import scipy.stats as stats figsize(12.5,4) import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() x = y = np.linspace(0,5, 100 ) X, Y = np.meshgrid(x, y) subplot(121) uni_x = stats.uniform.pdf( x,loc=0, scale = 5) uni_y = stats.uniform.pdf(x, loc=0, scale = 5) M = np.dot( uni_x[:,None], uni_y[None,:] ) im = plt.imshow(M, interpolation='none', origin='lower', cmap=cm.jet, vmax=1, vmin = -.15, extent=(0,5,0,5)) plt.xlim( 0, 5) plt.ylim( 0, 5) plt.title( "Landscape formed by Uniform priors." ) ax = fig.add_subplot(122, projection='3d') ax.plot_surface(X, Y, M, cmap=cm.jet, vmax=1, vmin = -.15 ) ax.view_init( azim = 390) plt.title( "Uniform prior landscape; alternate view"); figsize( 12.5,5 ) fig = plt.figure() subplot(121) exp_x = stats.expon.pdf( x, scale = 3) exp_y = stats.expon.pdf(x, scale = 10) M = np.dot( exp_x[:,None], exp_y[None,:] ) CS = plt.contour( X, Y, M ) im = plt.imshow(M, interpolation='none', origin='lower', cmap=cm.jet, extent=(0,5,0,5)) #plt.xlabel( "prior on $p_1$") #plt.ylabel( "prior on $p_2$") plt.title( "$Exp(3), Exp(10)$ prior landscape" ) ax = fig.add_subplot(122, projection='3d') ax.plot_surface(X, Y, M, cmap=cm.jet ) ax.view_init( azim = 390) plt.title( "$Exp(3), Exp(10)$ prior landscape; \nalternate view") ### create the observed data #sample size of data we observe N = 1 #the true parameters, but of course we do not see these values... lambda_1_true = 1 lambda_2_true = 3 #...we see the data generated, dependent on the above two values. data = np.concatenate( [ stats.poisson.rvs( lambda_1_true, size = (N,1)), stats.poisson.rvs( lambda_2_true, size = (N,1)) ], axis=1 ) print "observed (2-dimensional,sample size = %d):"%N, data #plotting details. x = y = np.linspace(.01,5, 100 ) likelihood_x = np.array( [stats.poisson.pmf( data[:,0], _x) for _x in x]).prod(axis=1) likelihood_y = np.array( [stats.poisson.pmf( data[:,1], _y) for _y in y]).prod(axis=1) L = np.dot( likelihood_x[:,None], likelihood_y[None,:] ) figsize( 12.5,12 ) subplot(221) uni_x = stats.uniform.pdf( x,loc=0, scale = 5) uni_y = stats.uniform.pdf(x, loc=0, scale = 5) M = np.dot( uni_x[:,None], uni_y[None,:] ) im = plt.imshow(M, interpolation='none', origin='lower', cmap=cm.jet, vmax=1, vmin = -.15, extent=(0,5,0,5)) plt.scatter( lambda_1_true, lambda_2_true, c = "k", s = 50 ) plt.xlim( 0, 5) plt.ylim( 0, 5) plt.title( "Landscape formed by Uniform priors on $p_1, p_2$." ) subplot(223) plt.contour( X, Y, M*L ) im = plt.imshow(M*L, interpolation='none', origin='lower', cmap=cm.jet, extent=(0,5,0,5)) plt.title( "Landscape warped by data observations;\n Uniform priors on $p_1, p_2$." ) plt.scatter( lambda_1_true, lambda_2_true, c = "k", s = 50 ) plt.xlim( 0, 5) plt.ylim( 0, 5) subplot(222) exp_x = stats.expon.pdf( x,loc=0, scale = 3) exp_y = stats.expon.pdf(x, loc=0, scale = 10) M = np.dot( exp_x[:,None], exp_y[None,:] ) plt.contour( X, Y, M ) im = plt.imshow(M, interpolation='none', origin='lower', cmap=cm.jet, extent=(0,5,0,5)) plt.scatter( lambda_1_true, lambda_2_true, c = "k", s = 50 ) plt.xlim( 0, 5) plt.ylim( 0, 5) plt.title( "Landscape formed by Exponential priors on $p_1, p_2$." ) subplot(224) plt.contour( X, Y, M*L ) im = plt.imshow(M*L, interpolation='none', origin='lower', cmap=cm.jet, extent=(0,5,0,5)) plt.scatter( lambda_1_true, lambda_2_true, c = "k", s = 50 ) plt.title( "Landscape warped by data observations;\n Exponential priors on \ $p_1, p_2$." ) plt.xlim( 0, 5) plt.ylim( 0, 5); figsize( 12.5, 4) data = np.loadtxt( "data/mixture_data.csv", delimiter="," ) hist( data, bins = 20, color = "k" ) plt.title("Histogram of the dataset" ) print data[ :10 ], "..." import pymc as mc p = mc.Uniform( "p", 0, 1) assignment = mc.Categorical("assignment", [p, 1-p], size = data.shape[0] ) print "prior assignment, with p = %.2f:"%p.value print assignment.value[ :10 ], "..." taus = 1.0/mc.Uniform( "stds", 0, 100, size= 2)**2 centers = mc.Normal( "centers", [120, 190], [0.01, 0.01], size =2 ) """ The below deterministic functions map a assignment, in this case 0 or 1, to a set of parameters, located in the (1,2) arrays `taus` and `centers.` """ @mc.deterministic def center_i( assignment = assignment, centers = centers ): return centers[ assignment] @mc.deterministic def tau_i( assignment = assignment, taus = taus ): return taus[ assignment] print "Random assignments: ", assignment.value[ :4 ], "..." print "Assigned center: ", center_i.value[ :4], "..." print "Assigned precision: ", tau_i.value[ :4], "..." #and to combine it with the observations: observations = mc.Normal( "obs", center_i, tau_i, value = data, observed = True ) #below we create a model class model = mc.Model( [p, assignment, taus, centers ] ) mcmc = mc.MCMC( model ) mcmc.sample( 50000 ) figsize( 12.5, 9 ) subplot(311) lw = 1 center_trace = mcmc.trace("centers")[:] #for pretty colors later in the book. colors = ["#348ABD", "#A60628"] if center_trace[-1,0] > center_trace[-1,1] \ else [ "#A60628", "#348ABD"] plot( center_trace[:,0], label = "trace of center 0", c = colors[0], lw = lw ) plot( center_trace[:,1], label = "trace of center 1", c = colors[1], lw = lw ) plt.title( "Traces of unknown parameters" ) leg = plt.legend(loc = "upper right") leg.get_frame().set_alpha(0.7) subplot(312) std_trace = mcmc.trace("stds")[:] plot( std_trace[:,0], label = "trace of standard deviation of cluster 0", c = colors[0], lw = lw ) plot( std_trace[:,1], label = "trace of standard deviation of cluster 1", c = colors[1], lw = lw ) plt.legend(loc = "upper left") subplot(313) p_trace = mcmc.trace("p")[:] plot( p_trace, label = "$p$: frequency of assignment to cluster 0", color = "#467821", lw = 1) plt.xlabel( "Steps" ) plt.ylim(0,1) plt.legend(); mcmc.sample( 100000 ) figsize(12.5, 4) center_trace = mcmc.trace("centers", chain = 1)[:] prev_center_trace = mcmc.trace("centers", chain = 0)[:] x =np.arange(50000) plot(x, prev_center_trace[:,0], label = "previous trace of center 0", lw = lw, alpha = 0.4 , c= colors[1] ) plot(x, prev_center_trace[:,1], label = "previous trace of center 1", lw = lw, alpha = 0.4, c = colors[0] ) x = np.arange(50000, 150000) plot(x, center_trace[:,0], label = "new trace of center 0", lw = lw, c = "#348ABD") plot(x, center_trace[:,1], label = "new trace of center 1", lw = lw, c = "#A60628") plt.title( "Traces of unknown center parameters" ) leg = plt.legend(loc = "upper right") leg.get_frame().set_alpha(0.8) plt.xlabel( "Steps" ); figsize( 11.0, 4 ) std_trace = mcmc.trace("stds")[:] _i = [ 1,2,3,0] for i in range(2): subplot(2,2, _i[ 2*i ] ) plt.title("Posterior of center of cluster %d"%i) plt.hist( center_trace[:, i], color = colors[i],bins = 30, histtype="stepfilled" ) subplot(2,2, _i[ 2*i + 1 ]) plt.title( "Posterior of standard deviation of cluster %d"%i ) plt.hist( std_trace[:, i], color = colors[i], bins = 30, histtype="stepfilled" ) #plt.autoscale(tight=True) plt.tight_layout() figsize( 12.5, 4.5 ) cmap = mpl.colors.ListedColormap(colors) imshow( mcmc.trace("assignment")[::400,np.argsort(data) ], cmap = cmap ,aspect=.4, alpha = .9 ) xticks(np.arange(0, data.shape[0], 40 ), [ "%.2f"%s for s in np.sort( data )[::40] ] ) plt.ylabel("posterior sample") plt.xlabel("value of $i$th data point") plt.title("Posterior labels of data points" ); cmap = mpl.colors.LinearSegmentedColormap.from_list("BMH", colors ) assign_trace = mcmc.trace("assignment")[:] scatter( data,1-assign_trace.mean(axis=0), cmap=cmap, c=assign_trace.mean(axis=0), s = 50) plt.ylim( -0.05, 1.05 ) plt.xlim( 35, 300) plt.title( "Probability of data point belonging to cluster 0" ) plt.ylabel("probability") plt.xlabel("value of data point" ); norm = stats.norm x = np.linspace( 20, 300, 500 ) posterior_center_means = center_trace.mean(axis=0) posterior_std_means = std_trace.mean(axis=0) posterior_p_mean = mcmc.trace("p")[:].mean() hist( data, bins = 20, histtype="step", normed = True, color = "k", lw = 2, label = "histogram of data" ) y = posterior_p_mean*norm.pdf(x, loc=posterior_center_means[0], scale=posterior_std_means[0] ) plt.plot(x, y, label = "Cluster 0 (using posterior-mean parameters)", lw=3 ) plt.fill_between( x, y, color = colors[1], alpha = 0.3 ) y = (1-posterior_p_mean)*norm.pdf(x, loc=posterior_center_means[1], scale=posterior_std_means[1] ) plt.plot(x, y, label = "Cluster 1 (using posterior-mean parameters)", lw = 3 ) plt.fill_between( x,y, color = colors[0], alpha = 0.3, ) plt.legend(loc = "upper left") plt.title( "Visualizing Clusters using posterior-mean parameters" ); import pymc as mc x = mc.Normal("x", 4, 10 ) y = mc.Lambda( "y", lambda x=x: 10-x, trace = True ) ex_mcmc = mc.MCMC( mc.Model( [x,y] ) ) ex_mcmc.sample( 500 ) plt.plot( ex_mcmc.trace("x")[:] ) plt.plot( ex_mcmc.trace("y")[:] ) plt.title( "Displaying (extreme) case of dependence between unknowns" ); norm_pdf = stats.norm.pdf p_trace = mcmc.trace("p")[:] x = 175 v = p_trace*norm_pdf(x, loc = center_trace[:,0], scale = std_trace[:,0] ) > \ (1-p_trace)*norm_pdf(x, loc = center_trace[:,1], scale = std_trace[:,1] ) print "Probability of belonging to cluster 1:", v.mean() figsize(12.5,4) import pymc as mc x_t = mc.rnormal( 0, 1, 200 ) x_t[0] = 0 y_t = np.zeros( 200 ) for i in range(1,200): y_t[i] = mc.rnormal( y_t[i-1], 1 ) plt.plot( y_t, label= "$y_t$", lw = 3 ) plt.plot( x_t, label = "$x_t$", lw = 3 ) plt.xlabel("time, $t$") plt.legend(); def autocorr(x): #from http://tinyurl.com/afz57c4 result = np.correlate(x, x, mode = 'full') result = result / np.max( result) return result[result.size/2:] colors = [ "#348ABD", "#A60628", "#7A68A6"] x = np.arange(1, 200) plt.bar(x, autocorr( y_t )[1:], width =1, label="$y_t$", edgecolor = colors[0], color = colors[0] ) plt.bar(x, autocorr( x_t )[1:], width =1,label = "$x_t$", color = colors[1], edgecolor = colors[1] ) plt.legend( title="Autocorrelation" ) plt.ylabel("measured correlation \nbetween $y_t$ and $y_{t-k}$.") plt.xlabel("k (lag)") plt.title("Autocorrelation plot of $y_t$ and $x_t$ for differing $k$ lags."); max_x = 200/3+1 x = np.arange(1,max_x ) plt.bar(x, autocorr( y_t )[1:max_x], edgecolor=colors[0], label="no thinning", color = colors[0], width =1 ) plt.bar(x, autocorr( y_t[::2] )[1:max_x], edgecolor=colors[1], label="keeping every 2d sample", color = colors[1], width=1 ) plt.bar(x, autocorr( y_t[::3] )[1:max_x], width =1, edgecolor = colors[2], label="keeping every 3rd sample", color = colors[2] ) plt.autoscale(tight=True) plt.legend( title="Autocorrelation plot for $y_t$",loc="lower left" ) plt.ylabel("measured correlation \nbetween $y_t$ and $y_{t-k}$.") plt.xlabel("k (lag)") plt.title("Autocorrelation of $y_t$ (no thinning vs. thinning) \ at differing $k$ lags."); from pymc.Matplot import plot as mcplot mcmc.sample( 25000, 0, 10) mcplot( mcmc.trace("centers",2), common_scale = False ) from IPython.core.display import HTML def css_styling(): styles = open("../styles/custom.css", "r").read() return HTML(styles) css_styling()