social()
import pymc as pm
param = pm.Exponential('poisson_param', 1)
data_generator = pm.Poisson('data_generator', param)
data_plus_one = data_generator + 1
print param.children, '\n'
print data_generator.parents, '\n'
print data_generator.children, '\n'
print 'parameter.value = ', param.value
print 'data_generator.value = ', data_generator.value
print 'data_plus_one = ', data_plus_one.value
lambda_1 = pm.Exponential('lambda1', 1) # prior on first behaviour
lambda_2 = pm.Exponential('lambda2', 1) # prior on second behaviour
tau = pm.DiscreteUniform('tau', 0, 10) # prior on behaviour change)
print "lambda_1.value = %.3f" % lambda_1.value
print "lambda_2.value = %.3f" % lambda_2.value
print "tau.value = %.3f" % tau.value
# After calling random() the new value is stored in the variables
# 'value' attribute
lambda_1.random(), lambda_2.random(), tau.random()
print "lambda_1.value = %.3f" % lambda_1.value
print "lambda_2.value = %.3f" % lambda_2.value
print "tau.value = %.3f" % tau.value
type(lambda_1 + lambda_2)
import numpy as np
n_data = 5
@pm.deterministic
def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
out = np.zeros(n_data)
out[:tau] = lambda_1 # lambda before tau
out[tau:] = lambda_2 # lambda after tau
return out
%matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns
sns.set(context = 'poster', style = 'whitegrid')
samples = [lambda_1.random() for i in range(20000)]
plt.hist(samples, bins=70, normed=True, histtype='stepfilled', alpha=0.8)
plt.title('Prior distribution for $\lambda _1$')
plt.xlim(0, 8)
data = np.array([10, 5])
fixed_variable = pm.Poisson('fxd', 1, value=data, observed=True)
print 'value: ', fixed_variable.value
print 'calling .random()'
fixed_variable.random()
print 'value: ', fixed_variable.value
# some fake data
data = np.array([10, 25, 15, 20, 35])
obs = pm.Poisson('obs', lambda_, value=data, observed=True)
obs.value
model = pm.Model([obs, lambda_, lambda_1, lambda_2, tau])
tau = pm.rdiscrete_uniform(0, 80)
tau
alpha = 1. / 20.
lambda_1, lambda_2 = pm.rexponential(alpha, 2)
print lambda_1, lambda_2
datau = np.r_[pm.rpoisson(lambda_1, tau), pm.rpoisson(lambda_2, 80 - tau)]
plt.bar(np.arange(80), data, color='#3498db')
plt.bar(tau - 1, data[tau - 1], color='r', label='user behaviour changed')
plt.xlabel('Time (days)')
plt.ylabel('count of texts received')
plt.title('Artificial dataset')
plt.xlim(0, 80)
plt.legend();
def plot_artificial_sms_dataset():
tau = pm.rdiscrete_uniform(0, 80)
alpha = 1. / 20.
lambda_1, lambda_2 = pm.rexponential(alpha, 2)
data = np.r_[pm.rpoisson(lambda_1, tau), pm.rpoisson(lambda_2, 80 - tau)]
plt.bar(np.arange(80), data, color="#348ABD")
plt.bar(tau - 1, data[tau - 1], color="r", label="user behaviour changed")
plt.xlim(0, 80)
plt.title("More example of artificial datasets")
for i in range(4):
plt.subplot(4, 1, i)
plot_artificial_sms_dataset()
import pymc as pm
# The parameters are the bounds of the Uniform.
p = pm.Uniform('p', lower=0, upper=1)
# set constants
p_true = 0.05 # remember this is normally unknown
N = 2500
# sample N Bernoulli random variables from Ber(0.05).
# each random variable has a 0.05 chance of being a 1.
# this is the data-generation step
occurrences = pm.rbernoulli(p_true, N)
print occurrences, occurrences.sum()
# Occurrences.mean is equal to n/N
print 'What is the observed frequency in Group A? {}'.format(
occurrences.mean())
print 'Does this equal the true frequency? {}'.format(
occurrences.mean() == p_true)
# include the observations, which are Bernoulli
obs = pm.Bernoulli('obs', p, value=occurrences, observed=True)
# To be explained in later lessons
mcmc = pm.MCMC([p, obs])
mcmc.sample(18000, 1000)
plt.title('Posterior distribution of $p _A$, the true effectiveness of site A')
plt.vlines(p_true, 0, 90, linestyle='--', label='true $p_A$ (unknown)')
plt.hist(mcmc.trace('p')[:], bins=25, histtype='stepfilled', normed=True)
plt.legend()
# These two quantities are unknown to use
# We just use them to simulate test data
true_p_A = 0.05
true_p_B = 0.04
# Unequal sample sizes are no problem with Bayesian analysis
N_A = 1700
N_B = 950
# Generate observations
obs_A = pm.rbernoulli(true_p_A, N_A)
obs_B = pm.rbernoulli(true_p_B, N_B)
print 'Obs from Site A: ', obs_A[:30].astype(int), '...'
print 'Obs from Site B: ', obs_B[:30].astype(int), '...'
print obs_A.mean()
print obs_B.mean()
# Set up pymc model assuming uniform priors for p_A and p_B.
p_A = pm.Uniform('p_A', 0, 1)
p_B = pm.Uniform('p_B', 0, 1)
# Define the deterministic delta function.
# This is our unknown of interest.
@pm.deterministic
def delta(p_A=p_A, p_B=p_B):
return p_A - p_B
# Set of observations
obs_A = pm.Bernoulli('obs_A', p_A, value=obs_A, observed=True)
obs_b = pm.Bernoulli('obs_B', p_B, value=obs_B, observed=True)
# Explained later
mcmc = pm.MCMC([p_A, p_B, delta, obs_A, obs_B])
mcmc.sample(20000, 1000)
p_A_samples = mcmc.trace('p_A')[:]
p_B_samples = mcmc.trace('p_B')[:]
delta_samples = mcmc.trace('delta')[:]
ax = plt.subplot(311)
plt.xlim(0, 0.1)
plt.hist(p_A_samples, histtype='stepfilled', bins=25, alpha=0.80,
label='posterior of $p_A$', color='red', normed=True)
plt.vlines(true_p_A, 0, 80, linestyle='--', label='true $p_A$ (unknown)')
plt.legend(loc='upper right')
plt.title('Posterior distributions of $p_A$, $p_B$, and delta unknowns')
ax = plt.subplot(312)
plt.xlim(0, 0.1)
plt.hist(p_B_samples, histtype='stepfilled', bins=25, alpha=0.80,
label='posterior of $p_B$', color='#467821', normed=True)
plt.vlines(true_p_B, 0, 80, linestyle='--', label='true $p_B$ (unknown)')
plt.legend(loc='upper right')
ax = plt.subplot(313)
plt.hist(delta_samples, histtype='stepfilled', bins=30, alpha=0.80,
label='posterior of delta', color = '#7A68A6', normed=True)
plt.vlines(true_p_A - true_p_B, 0, 60, linestyle='--',
label='true delta (unknown)')
plt.vlines(0, 0, 60, color='black', alpha=0.2)
plt.legend(loc='upper right');
# Count the number of samples less than 0, i.e. the area under the curve
# before 0, represent the probability that site A is worse than site B.
print 'Probability site A is WORSE than site B: {}'.format(
(delta_samples < 0).mean())
print 'Probability site A is BETTER than site B: {}'.format(
(delta_samples > 0).mean())
from IPython.core.display import HTML
def css_styling():
styles = open("/users/ryankelly/desktop/custom_notebook2.css", "r").read()
return HTML(styles)
css_styling()
def social():
code = """
"""
return HTML(code)