social() %matplotlib inline from matplotlib import pyplot as plt import seaborn as sns # Some initial styling rc={'axes.labelsize': 16.0, 'font.size': 16, 'legend.fontsize': 16.0, 'axes.titlesize': 18, 'xtick.labelsize': 15.0, 'ytick.labelsize': 15.0} sns.set(context = 'poster', style = 'whitegrid', rc=rc) import numpy as np import pandas as pd count_data = pd.read_csv('https://raw.githubusercontent.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/master/Chapter1_Introduction/data/txtdata.csv', header=None, names=['msgs']) N = len(count_data) plt.bar(np.arange(N), count_data['msgs'], color="#9b59b6", alpha=0.8) plt.xlabel('Time (days)') plt.ylabel('count of msgs received') plt.title('Did the texting habits change over time?') plt.xlim(0, N) import pymc as pm alpha = 1.0 / count_data.mean() # 'count_data' holds text message counts lambda_1 = pm.Exponential('lambda_1', alpha) lambda_2 = pm.Exponential('lambda_2', alpha) tau = pm.DiscreteUniform('tau', lower=0, upper=N) print('Random Output:', tau.random(), tau.random(), tau.random()) @pm.deterministic def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2): out = np.zeros(N) out[:tau] = lambda_1 # lambda before tau is lambda_1 out[tau:] = lambda_2 # lambda after (and including) tau is lambda_2 return out observation = pm.Poisson('obs', lambda_, value=count_data, observed=True) model = pm.Model([observation, lambda_1, lambda_2, tau]) # Code to be explained later mcmc = pm.MCMC(model) mcmc.sample(40000, 10000, 1) lambda_1_samples = mcmc.trace('lambda_1')[:] lambda_2_samples = mcmc.trace('lambda_2')[:] tau_samples = mcmc.trace('tau')[:] # Histogram of samples ax = plt.subplot(311) ax.set_autoscaley_on(False) plt.hist(lambda_1_samples, histtype='stepfilled', bins=30, alpha=0.85, label='posterior of $\lambda _1$', color='#9b59b6', normed=True) plt.legend(loc='upper right') plt.title(r'''Posterior distributions of the variables $\lambda _1, \ \lambda _2, \ \tau$''') plt.xlim([15, 30]) plt.xlabel('$\lambda _1$ value') ax = plt.subplot(312) ax.set_autoscaley_on(False) plt.hist(lambda_2_samples, histtype='stepfilled', bins=30, alpha=0.85, label='posterior of $\lambda _2$', color='#3498db', normed=True) plt.legend(loc='upper right') plt.xlim([15, 30]) plt.xlabel('$\lambda _2$ value') plt.subplot(313) w = 1.0 / tau_samples.shape[0] * np.ones_like(tau_samples) plt.hist(tau_samples, bins=N, alpha = 1, label=r'posterior of $\tau$', color='#A60628', weights=w, rwidth=2.) plt.xticks(np.arange(N)) plt.legend(loc='upper left') plt.ylim([0, 0.75]) plt.xlim([35, len(count_data) - 20]) plt.xlabel(r'$\tau$ (in days)') plt.ylabel('probability') NN = tau_samples.shape[0] expected_texts_per_day = np.zeros(N) for day in range(0, N): #ix is a bool index of all tau samples corresponding to # the switchpoint occurring prior to value of 'day' ix = day < tau_samples # Each posterior sample corresponds to a value for tau. # for each day, that value of tau indicates whether we are 'before' # in the lambda_1 regime, or 'after' the switchpoint in the lambda_2 regime. # By taking the posterior sample of lambda1/2 accordingly, we can # average over all samples to get an expect value for lambda on that day. # The message count random variable is Poisson distributed, # therefore lambda (the Poisson parameter) is the expected value of # message count. expected_texts_per_day[day] = (lambda_1_samples[ix].sum()+ lambda_2_samples[~ix].sum()) / NN plt.plot(range(N), expected_texts_per_day, color='#9b59b6', lw=4, label='Predicted number of texts') plt.xlim(0, N) plt.xlabel('Day') plt.ylabel('Expected # of texts') plt.title('Expected number of texts received') plt.bar(np.arange(len(count_data)), count_data['msgs'], color="#3498db", alpha=0.65, label="observed texts per day") plt.legend(loc='upper left') from IPython.core.display import HTML def css_styling(): styles = open("/users/ryankelly/desktop/custom_notebook2.css", "r").read() return HTML(styles) css_styling() def social(): code = """ Tweet Follow @Ryanmdk submit to reddit """ return HTML(code)