social()
%matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns
# Some initial styling
rc={'axes.labelsize': 16.0, 'font.size': 16,
'legend.fontsize': 16.0, 'axes.titlesize': 18,
'xtick.labelsize': 15.0, 'ytick.labelsize': 15.0}
sns.set(context = 'poster', style = 'whitegrid', rc=rc)
import numpy as np
import pandas as pd
count_data = pd.read_csv('https://raw.githubusercontent.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/master/Chapter1_Introduction/data/txtdata.csv',
header=None, names=['msgs'])
N = len(count_data)
plt.bar(np.arange(N), count_data['msgs'], color="#9b59b6", alpha=0.8)
plt.xlabel('Time (days)')
plt.ylabel('count of msgs received')
plt.title('Did the texting habits change over time?')
plt.xlim(0, N)
import pymc as pm
alpha = 1.0 / count_data.mean() # 'count_data' holds text message counts
lambda_1 = pm.Exponential('lambda_1', alpha)
lambda_2 = pm.Exponential('lambda_2', alpha)
tau = pm.DiscreteUniform('tau', lower=0, upper=N)
print('Random Output:', tau.random(), tau.random(), tau.random())
@pm.deterministic
def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
out = np.zeros(N)
out[:tau] = lambda_1 # lambda before tau is lambda_1
out[tau:] = lambda_2 # lambda after (and including) tau is lambda_2
return out
observation = pm.Poisson('obs', lambda_, value=count_data, observed=True)
model = pm.Model([observation, lambda_1, lambda_2, tau])
# Code to be explained later
mcmc = pm.MCMC(model)
mcmc.sample(40000, 10000, 1)
lambda_1_samples = mcmc.trace('lambda_1')[:]
lambda_2_samples = mcmc.trace('lambda_2')[:]
tau_samples = mcmc.trace('tau')[:]
# Histogram of samples
ax = plt.subplot(311)
ax.set_autoscaley_on(False)
plt.hist(lambda_1_samples, histtype='stepfilled', bins=30, alpha=0.85,
label='posterior of $\lambda _1$', color='#9b59b6', normed=True)
plt.legend(loc='upper right')
plt.title(r'''Posterior distributions of the variables $\lambda _1, \ \lambda _2, \ \tau$''')
plt.xlim([15, 30])
plt.xlabel('$\lambda _1$ value')
ax = plt.subplot(312)
ax.set_autoscaley_on(False)
plt.hist(lambda_2_samples, histtype='stepfilled', bins=30, alpha=0.85,
label='posterior of $\lambda _2$', color='#3498db', normed=True)
plt.legend(loc='upper right')
plt.xlim([15, 30])
plt.xlabel('$\lambda _2$ value')
plt.subplot(313)
w = 1.0 / tau_samples.shape[0] * np.ones_like(tau_samples)
plt.hist(tau_samples, bins=N, alpha = 1,
label=r'posterior of $\tau$',
color='#A60628', weights=w, rwidth=2.)
plt.xticks(np.arange(N))
plt.legend(loc='upper left')
plt.ylim([0, 0.75])
plt.xlim([35, len(count_data) - 20])
plt.xlabel(r'$\tau$ (in days)')
plt.ylabel('probability')
NN = tau_samples.shape[0]
expected_texts_per_day = np.zeros(N)
for day in range(0, N):
#ix is a bool index of all tau samples corresponding to
# the switchpoint occurring prior to value of 'day'
ix = day < tau_samples
# Each posterior sample corresponds to a value for tau.
# for each day, that value of tau indicates whether we are 'before'
# in the lambda_1 regime, or 'after' the switchpoint in the lambda_2 regime.
# By taking the posterior sample of lambda1/2 accordingly, we can
# average over all samples to get an expect value for lambda on that day.
# The message count random variable is Poisson distributed,
# therefore lambda (the Poisson parameter) is the expected value of
# message count.
expected_texts_per_day[day] = (lambda_1_samples[ix].sum()+
lambda_2_samples[~ix].sum()) / NN
plt.plot(range(N), expected_texts_per_day, color='#9b59b6', lw=4,
label='Predicted number of texts')
plt.xlim(0, N)
plt.xlabel('Day')
plt.ylabel('Expected # of texts')
plt.title('Expected number of texts received')
plt.bar(np.arange(len(count_data)), count_data['msgs'], color="#3498db", alpha=0.65,
label="observed texts per day")
plt.legend(loc='upper left')
from IPython.core.display import HTML
def css_styling():
styles = open("/users/ryankelly/desktop/custom_notebook2.css", "r").read()
return HTML(styles)
css_styling()
def social():
code = """
"""
return HTML(code)