""" The book uses a custom matplotlibrc file, which provides the unique styles for matplotlib plots. If executing this book, and you wish to use the book's styling, provided are two options: 1. Overwrite your own matplotlibrc file with the rc-file provided in the book's styles/ dir. See http://matplotlib.org/users/customizing.html 2. Also in the styles is bmh_matplotlibrc.json file. This can be used to update the styles in only this notebook. Try running the following code: import json s = json.load( open("../styles/bmh_matplotlibrc.json") ) matplotlib.rcParams.update(s) """ #the code below can be passed over, as it is currently not important. %pylab inline figsize( 11, 9) import scipy.stats as stats dist = stats.beta n_trials = [0,1,2,3,4,5,8,15, 50, 500] data = stats.bernoulli.rvs(0.5, size = n_trials[-1] ) x = np.linspace(0,1,100) for k, N in enumerate(n_trials): sx = subplot( len(n_trials)/2, 2, k+1) plt.xlabel("$p$, probability of heads") if k in [0,len(n_trials)-1] else None plt.setp(sx.get_yticklabels(), visible=False) heads = data[:N].sum() y = dist.pdf(x, 1 + heads, 1 + N - heads ) plt.plot( x, y, label= "observe %d tosses,\n %d heads"%(N,heads) ) plt.fill_between( x, 0, y, color="#348ABD", alpha = 0.4 ) plt.vlines( 0.5, 0, 4, color = "k", linestyles = "--", lw=1 ) leg = plt.legend() leg.get_frame().set_alpha(0.4) plt.autoscale(tight = True) plt.suptitle( "Bayesian updating of posterior probabilities", y = 1.02, fontsize = 14); plt.tight_layout() figsize(12.5,4) p = np.linspace( 0,1, 50) plt.plot( p, 2*p/(1+p), color = "#348ABD", lw = 3 ) #plt.fill_between( p, 2*p/(1+p), alpha = .5, facecolor = ["#A60628"]) plt.scatter( 0.2, 2*(0.2)/1.2, s = 140, c ="#348ABD" ) plt.xlim( 0, 1) plt.ylim( 0, 1) plt.xlabel( "Prior, $P(A) = p$") plt.ylabel("Posterior, $P(A|X)$, with $P(A) = p$") plt.title( "Are there bugs in my code?"); figsize( 12.5, 4 ) colours = ["#348ABD", "#A60628"] prior = [0.20, 0.80] posterior = [1./3, 2./3] plt.bar( [0,.7], prior ,alpha = 0.70, width = 0.25, \ color = colours[0], label = "prior distribution", lw = "3", edgecolor = colours[0]) plt.bar( [0+0.25,.7+0.25], posterior ,alpha = 0.7, \ width = 0.25, color = colours[1], label = "posterior distribution", lw = "3", edgecolor = colours[1]) plt.xticks( [0.20,.95], ["Bugs Absent", "Bugs Present"] ) plt.title("Prior and Posterior probability of bugs present, prior = 0.2") plt.ylabel("Probability") plt.legend(loc="upper left"); figsize( 12.5, 4) import scipy.stats as stats a = np.arange( 16 ) poi = stats.poisson lambda_ = [1.5, 4.25 ] plt.bar( a, poi.pmf( a, lambda_[0]), color=colours[0], label = "$\lambda = %.1f$"%lambda_[0], alpha = 0.60, edgecolor = colours[0], lw = "3") plt.bar( a, poi.pmf( a, lambda_[1]), color=colours[1], label = "$\lambda = %.1f$"%lambda_[1], alpha = 0.60, edgecolor = colours[1], lw = "3") plt.xticks( a + 0.4, a ) plt.legend() plt.ylabel("probability of $k$") plt.xlabel("$k$") plt.title("Probability mass function of a Poisson random variable; differing \ $\lambda$ values"); a = np.linspace(0,4, 100) expo = stats.expon lambda_ = [0.5, 1] for l,c in zip(lambda_,colours): plt.plot( a, expo.pdf( a, scale=1./l), lw=3, color=c, label = "$\lambda = %.1f$"%l) plt.fill_between( a, expo.pdf( a, scale=1./l), color=c, alpha = .33) plt.legend() plt.ylabel("PDF at $z$") plt.xlabel("$z$") plt.title("Probability density function of an Exponential random variable;\ differing $\lambda$"); figsize( 12.5, 3.5 ) count_data = np.loadtxt("data/txtdata.csv") n_count_data = len(count_data) plt.bar( np.arange( n_count_data ), count_data, color ="#348ABD" ) plt.xlabel( "Time (days)") plt.ylabel("count of text-msgs received") plt.title("Did the user's texting habits change over time?") plt.xlim( 0, n_count_data ); import pymc as mc alpha = 1.0/count_data.mean() #recall count_data is #the variable that holds our txt counts lambda_1 = mc.Exponential( "lambda_1", alpha ) lambda_2 = mc.Exponential( "lambda_2", alpha ) tau = mc.DiscreteUniform( "tau", lower = 0, upper = n_count_data ) print "Random output:", tau.random(),tau.random(), tau.random() @mc.deterministic def lambda_( tau = tau, lambda_1 = lambda_1, lambda_2 = lambda_2 ): out = np.zeros( n_count_data ) out[:tau] = lambda_1 #lambda before tau is lambda1 out[tau:] = lambda_2 #lambda after tau is lambda2 return out observation = mc.Poisson( "obs", lambda_, value = count_data, observed = True) model = mc.Model( [observation, lambda_1, lambda_2, tau] ) ### Mysterious code to be explained in Chapter 3. mcmc = mc.MCMC(model) mcmc.sample( 40000, 10000, 1 ) lambda_1_samples = mcmc.trace( 'lambda_1' )[:] lambda_2_samples = mcmc.trace( 'lambda_2' )[:] tau_samples = mcmc.trace( 'tau' )[:] figsize(12.5, 10) #histogram of the samples: ax = plt.subplot(311) ax.set_autoscaley_on(False) plt.hist( lambda_1_samples, histtype='stepfilled', bins = 30, alpha = 0.85, label = "posterior of $\lambda_1$", color = "#A60628",normed = True ) plt.legend(loc = "upper left") plt.title(r"Posterior distributions of the variables $\lambda_1,\;\lambda_2,\;\tau$") plt.xlim([15,30]) plt.xlabel("$\lambda_2$ value") plt.ylabel("probability") ax = plt.subplot(312) ax.set_autoscaley_on(False) plt.hist( lambda_2_samples,histtype='stepfilled', bins = 30, alpha = 0.85, label = "posterior of $\lambda_2$",color="#7A68A6", normed = True ) plt.legend(loc = "upper left") plt.xlim([15,30]) plt.xlabel("$\lambda_2$ value") plt.ylabel("probability") plt.subplot(313) w = 1.0/ tau_samples.shape[0] * np.ones_like( tau_samples ) plt.hist( tau_samples, bins = n_count_data, alpha = 1, label = r"posterior of $\tau$", color="#467821", weights=w, rwidth =2. ) plt.xticks( np.arange( n_count_data ) ) plt.legend(loc = "upper left"); plt.ylim([0,.75]) plt.xlim([35, len(count_data)-20]) plt.xlabel("$\tau$ (in days)") plt.ylabel("probability"); figsize( 12.5, 5) # tau_samples, lambda_1_samples, lambda_2_samples contain # N samples from the corresponding posterior distribution N = tau_samples.shape[0] expected_texts_per_day = np.zeros(n_count_data) for day in range(0, n_count_data): # ix is a bool index of all tau samples corresponding to # the switchpoint occurring prior to value of 'day' ix = day < tau_samples # Each posterior sample corresponds to a value for tau. # for each day, that value of tau indicates whether we're "before" # (in the lambda1 "regime") or # "after" (in the lambda2 "regime") the switchpoint. # by taking the posterior sample of lambda1/2 accordingly, we can average # over all samples to get an expected value for lambda on that day. # As explained, the "message count" random variable is Poisson distributed, # and therefore lambda (the poisson parameter) is the expected value of "message count" expected_texts_per_day[day] = (lambda_1_samples[ix].sum() + lambda_2_samples[~ix].sum() ) /N plt.plot( range( n_count_data), expected_texts_per_day, lw =4, color = "#E24A33", label = "expected number of text-messages recieved") plt.xlim( 0, n_count_data ) plt.xlabel( "Day" ) plt.ylabel( "Expected # text-messages" ) plt.title( "Expected number of text-messages received") plt.ylim( 0, 50 ) plt.bar( np.arange( len(count_data) ), count_data, color ="#348ABD", alpha = 0.65, label="observed texts per day") plt.legend(loc="upper left"); #type your code here. #type your code here. #type your code here. from IPython.core.display import HTML def css_styling(): styles = open("../styles/custom.css", "r").read() return HTML(styles) css_styling()