social()
import scipy.stats as stats
import numpy as np
%matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns
sns.set(context = 'notebook', style = 'whitegrid')
binomial = stats.binom
parameters = [(10, 0.4), (10, 0.9)]
colors = ["#9b59b6", "#3498db"]
for i in range(2):
N, p = parameters[i]
_x = np.arange(N + 1)
plt.bar(_x - 0.5, binomial.pmf(_x, N, p), color=colors[i],
alpha=0.6,
label='$N$: {}, $p$: {}'.format(N, p))
plt.legend(loc='upper left')
plt.xlim(0, 10.5)
plt.xlabel('$k$')
plt.ylabel('$P(X=k)$')
plt.title('Probability mass distribution of binomial random variables')
import pymc as pm
N = 100
p = pm.Uniform('freq_cheating', 0, 1)
true_answers = pm.Bernoulli('truths', p, size=N)
first_coin_flip = pm.Bernoulli('first_flip', 0.5, size=N)
first_coin_flip.value
second_coin_flips = pm.Bernoulli('second_flips', 0.5, size=N)
@pm.deterministic
def observed_proportion(t_a=true_answers,
fc=first_coin_flip,
sc=second_coin_flips):
observed = fc * t_a + (1 - fc) * sc
return observed.sum() / float(N)
observed_proportion.value
X = 35
observations = pm.Binomial('obs', N, observed_proportion, observed=True,
value=X)
model = pm.Model([p, true_answers, first_coin_flip,
second_coin_flips, observed_proportion, observations])
# Explained in later notebook
mcmc = pm.MCMC(model)
mcmc.sample(40000, 15000)
p_trace = mcmc.trace('freq_cheating')[:]
plt.hist(p_trace, histtype='stepfilled', normed=True, alpha=0.8, bins=30,
label='posterior distribution', color='#348ABD')
plt.vlines([0.05, 0.35], [0, 0], [5, 5], alpha=0.5)
plt.xlim(0, 1)
plt.legend()
p = pm.Uniform('freq_cheating', 0, 1)
@pm.deterministic
def p_skewed(p=p):
return 0.5 * p + 0.25
yes_responses =pm.Binomial('number_cheats', 100, p_skewed,
value=35, observed=True)
model = pm.Model([yes_responses, p_skewed, p])
mcmc = pm.MCMC(model)
mcmc.sample(25000, 2500)
p_trace = mcmc.trace('freq_cheating')[:]
plt.hist(p_trace, histtype='stepfilled', normed=True, alpha=0.8, bins=30,
label='posterior distribution', color='#348ABD')
plt.vlines([0.05, 0.35], [0,0], [5, 5], alpha=0.5)
plt.xlim(0, 1)
plt.legend();
yes_responses =pm.Binomial('number_cheats', 500, p_skewed,
value=300, observed=True)
model = pm.Model([yes_responses, p_skewed, p])
mcmc = pm.MCMC(model)
mcmc.sample(25000, 2500)
p_trace = mcmc.trace('freq_cheating')[:]
plt.hist(p_trace, histtype='stepfilled', normed=True, alpha=0.8, bins=30,
label='posterior distribution', color='#348ABD')
plt.vlines([p_trace.mean()+2*p_trace.std(),
p_trace.mean() -2* p_trace.std()],
[0,0], [10, 10], alpha=0.5)
plt.xlim(0, 1)
plt.ylim(0, 13)
plt.legend();
print p_trace.mean()+2*p_trace.std()
print p_trace.mean()-2*p_trace.std()
(p_trace.mean()+2*p_trace.std()) - (p_trace.mean() -2* p_trace.std())
N = 10
x = np.empty(N, dtype=object)
for i in range(0, N):
x[i] = pm.Exponential('x_%i' % i, (i + 1) ** 2)
import pandas as pd
np.set_printoptions(precision=3, suppress=True)
data = pd.read_csv('https://raw.githubusercontent.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/master/Chapter2_MorePyMC/data/challenger_data.csv',
header=False, names=['date', 'temperature', 'incident'])
# Other import method, something is up with the data formating: fix later
np.set_printoptions(precision=3, suppress=True)
challenger_data = np.genfromtxt("https://raw.githubusercontent.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/master/Chapter2_MorePyMC/data/challenger_data.csv", skip_header=1,
usecols=[1, 2], missing_values="NA",
delimiter=",")
challenger_data = challenger_data[~np.isnan(challenger_data[:, 1])]
# Drop NA values
data = data[data['incident'].notnull()]
print data
# Plot the data and remove the actual incident
plt.scatter(data['temperature'][:-1], data['incident'][:-1], s= 75, color='k',
alpha=0.5)
plt.yticks([0, 1])
plt.ylabel('Incident?')
plt.xlabel('Outside Temperature (F)')
def logistic(x, beta):
return 1.0 / (1.0 + np.exp(beta * x))
x = np.linspace(-4, 4, 100)
plt.plot(x, logistic(x, 1), label = r"$\beta = 1$")
plt.plot(x, logistic(x, 3), label = r"$\beta = 3$")
plt.plot(x, logistic(x, -5), label = r"$\beta = -5$")
plt.legend();
def logistic(x, beta, alpha=0):
return 1.0 / (1.0 + np.exp(np.dot(beta, x) + alpha))
x = np.linspace(-4, 4, 100)
plt.plot(x, logistic(x, 1), label=r"$\beta = 1$", ls="--", lw=1)
plt.plot(x, logistic(x, 3), label=r"$\beta = 3$", ls="--", lw=1)
plt.plot(x, logistic(x, -5), label=r"$\beta = -5$", ls="--", lw=1)
plt.plot(x, logistic(x, 1, 1), label=r"$\beta = 1, \alpha = 1$",
color="#348ABD")
plt.plot(x, logistic(x, 3, -2), label=r"$\beta = 3, \alpha = -2$",
color="#A60628")
plt.plot(x, logistic(x, -5, 7), label=r"$\beta = -5, \alpha = 7$",
color="#7A68A6")
plt.legend(loc="lower left");
# temp = data['temperature'][:-1].values
# D = data['incident'][:-1].values
temperature = challenger_data[:, 0]
D = challenger_data[:, 1] # defect or not?
beta = pm.Normal("beta", 0, 0.001, value=0)
alpha = pm.Normal("alpha", 0, 0.001, value=0)
@pm.deterministic
def p(t = temperature, alpha = alpha, beta = beta):
return 1.0 / (1.0 + np.exp(beta * t + alpha))
print p.value
# connect the probabilities in `p` with our observations through
# a Bernoulli random variable
obs = pm.Bernoulli('bernoulli_obs', p, value = D, observed = True)
model = pm.Model([obs, beta, alpha])
map_ = pm.MAP(model)
map_.fit()
mcmc = pm.MCMC(model)
mcmc.sample(120000, 100000, 2)
alpha_samples = mcmc.trace('alpha')[:, None] # best to make them 1d
beta_samples = mcmc.trace('beta')[:, None]
# histogram of the samples:
plt.subplot(211)
plt.title(r"Posterior distributions of the variables $\alpha, \beta$")
plt.hist(beta_samples, histtype='stepfilled', bins=35, alpha=0.85,
label=r"posterior of $\beta$", color="#7A68A6", normed=True)
plt.legend()
plt.subplot(212)
plt.hist(alpha_samples, histtype='stepfilled', bins=35, alpha=0.85,
label=r"posterior of $\alpha$", color="#A60628", normed=True)
plt.legend();
t = np.linspace(temperature.min() - 5, temperature.max() + 5, 50)[:, None]
p_t = logistic(t.T, beta_samples, alpha_samples)
mean_prob_t = p_t.mean(axis=0)
from IPython.core.pylabtools import figsize
# figsize(18, 5)
plt.plot(t, mean_prob_t, lw=3, label="average posterior \nprobability \
of defect")
plt.plot(t, p_t[0, :], ls="--", label="realization from posterior")
plt.plot(t, p_t[-2, :], ls="--", label="realization from posterior")
plt.scatter(temperature, D, color="k", s=50, alpha=0.5)
plt.title("Posterior expected value of probability of defect; \
plus realizations")
plt.legend(loc="lower left")
plt.ylim(-0.1, 1.1)
plt.xlim(t.min(), t.max())
plt.ylabel("probability")
plt.xlabel("temperature");
from scipy.stats.mstats import mquantiles
# vectorized bottom and top 2.5% quantiles for 'confidence interval'
qs = mquantiles(p_t, [0.025, 0.975], axis = 0)
plt.fill_between(t[:,0], *qs, alpha=0.7, color='#7A68A6')
plt.plot(t[:, 0], qs[0], label="95% CI", color="#7A68A6", alpha=0.7)
plt.plot(t, mean_prob_t, lw=1, ls="--", color="k",
label="average posterior \nprobability of defect")
plt.xlim(t.min(), t.max())
plt.ylim(-0.02, 1.02)
plt.legend(loc="lower left")
plt.scatter(temperature, D, color="k", s=50, alpha=0.5)
plt.xlabel("temp, $t$")
plt.ylabel("probability estimate")
plt.title("Posterior probability estimates given temp. $t$");
prob_31 = logistic(31, beta_samples, alpha_samples)
plt.xlim(0.995, 1)
plt.hist(prob_31, bins=10000, normed=True, histtype='stepfilled')
plt.title('Posterior distribution of a probability of a defect @ 31 degrees')
plt.xlabel('probability of a defect occurring in O-ring');
from IPython.core.display import HTML
def css_styling():
styles = open("/users/ryankelly/desktop/custom_notebook2.css", "r").read()
return HTML(styles)
css_styling()
def social():
code = """
"""
return HTML(code)