social()

import scipy.stats as stats
import numpy as np

%matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns

sns.set(context = 'notebook', style = 'whitegrid')

binomial = stats.binom

parameters = [(10, 0.4), (10, 0.9)]
colors = ["#9b59b6", "#3498db"]

for i in range(2):
    N, p = parameters[i]
    _x = np.arange(N + 1)
    plt.bar(_x - 0.5, binomial.pmf(_x, N, p), color=colors[i],
            alpha=0.6, 
            label='$N$: {}, $p$: {}'.format(N, p))
plt.legend(loc='upper left')
plt.xlim(0, 10.5)
plt.xlabel('$k$')
plt.ylabel('$P(X=k)$')
plt.title('Probability mass distribution of binomial random variables')

import pymc as pm

N = 100
p = pm.Uniform('freq_cheating', 0, 1)

true_answers = pm.Bernoulli('truths', p, size=N)

first_coin_flip = pm.Bernoulli('first_flip', 0.5, size=N)
first_coin_flip.value

second_coin_flips = pm.Bernoulli('second_flips', 0.5, size=N)

@pm.deterministic
def observed_proportion(t_a=true_answers,
                        fc=first_coin_flip,
                        sc=second_coin_flips):
    observed = fc * t_a + (1 - fc) * sc
    return observed.sum() / float(N)

observed_proportion.value

X = 35

observations = pm.Binomial('obs', N, observed_proportion, observed=True,
                           value=X)

model = pm.Model([p, true_answers, first_coin_flip,
                  second_coin_flips, observed_proportion, observations])

# Explained in later notebook
mcmc = pm.MCMC(model)
mcmc.sample(40000, 15000)

p_trace = mcmc.trace('freq_cheating')[:]

plt.hist(p_trace, histtype='stepfilled', normed=True, alpha=0.8, bins=30,
         label='posterior distribution', color='#348ABD')
plt.vlines([0.05, 0.35], [0, 0], [5, 5], alpha=0.5)
plt.xlim(0, 1)
plt.legend()

p = pm.Uniform('freq_cheating', 0, 1)

@pm.deterministic
def p_skewed(p=p):
    return 0.5 * p + 0.25

yes_responses =pm.Binomial('number_cheats', 100, p_skewed,
                           value=35, observed=True)

model = pm.Model([yes_responses, p_skewed, p])

mcmc = pm.MCMC(model)
mcmc.sample(25000, 2500)

p_trace = mcmc.trace('freq_cheating')[:]

plt.hist(p_trace, histtype='stepfilled', normed=True, alpha=0.8, bins=30,
         label='posterior distribution', color='#348ABD')
plt.vlines([0.05, 0.35], [0,0], [5, 5], alpha=0.5)
plt.xlim(0, 1)
plt.legend();

yes_responses =pm.Binomial('number_cheats', 500, p_skewed,
                           value=300, observed=True)

model = pm.Model([yes_responses, p_skewed, p])

mcmc = pm.MCMC(model)
mcmc.sample(25000, 2500)

p_trace = mcmc.trace('freq_cheating')[:]

plt.hist(p_trace, histtype='stepfilled', normed=True, alpha=0.8, bins=30,
         label='posterior distribution', color='#348ABD')
plt.vlines([p_trace.mean()+2*p_trace.std(), 
            p_trace.mean() -2* p_trace.std()], 
           [0,0], [10, 10], alpha=0.5)
plt.xlim(0, 1)
plt.ylim(0, 13)
plt.legend();

print p_trace.mean()+2*p_trace.std()
print p_trace.mean()-2*p_trace.std()
(p_trace.mean()+2*p_trace.std()) - (p_trace.mean() -2* p_trace.std())

N = 10
x = np.empty(N, dtype=object)
for i in range(0, N):
    x[i] = pm.Exponential('x_%i' % i, (i + 1) ** 2)

import pandas as pd

np.set_printoptions(precision=3, suppress=True)
data = pd.read_csv('https://raw.githubusercontent.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/master/Chapter2_MorePyMC/data/challenger_data.csv', 
                header=False, names=['date', 'temperature', 'incident'])

# Other import method, something is up with the data formating: fix later
np.set_printoptions(precision=3, suppress=True)
challenger_data = np.genfromtxt("https://raw.githubusercontent.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/master/Chapter2_MorePyMC/data/challenger_data.csv", skip_header=1,
                                usecols=[1, 2], missing_values="NA",
                                delimiter=",")

challenger_data = challenger_data[~np.isnan(challenger_data[:, 1])]


# Drop NA values
data = data[data['incident'].notnull()]
print data

# Plot the data and remove the actual incident
plt.scatter(data['temperature'][:-1], data['incident'][:-1], s= 75, color='k',
                                    alpha=0.5)
plt.yticks([0, 1])
plt.ylabel('Incident?')
plt.xlabel('Outside Temperature (F)')

def logistic(x, beta):
    return 1.0 / (1.0 + np.exp(beta * x))

x = np.linspace(-4, 4, 100)
plt.plot(x, logistic(x, 1), label = r"$\beta = 1$")
plt.plot(x, logistic(x, 3), label = r"$\beta = 3$")
plt.plot(x, logistic(x, -5), label = r"$\beta = -5$")
plt.legend();

def logistic(x, beta, alpha=0):
    return 1.0 / (1.0 + np.exp(np.dot(beta, x) + alpha))

x = np.linspace(-4, 4, 100)

plt.plot(x, logistic(x, 1), label=r"$\beta = 1$", ls="--", lw=1)
plt.plot(x, logistic(x, 3), label=r"$\beta = 3$", ls="--", lw=1)
plt.plot(x, logistic(x, -5), label=r"$\beta = -5$", ls="--", lw=1)

plt.plot(x, logistic(x, 1, 1), label=r"$\beta = 1, \alpha = 1$",
         color="#348ABD")
plt.plot(x, logistic(x, 3, -2), label=r"$\beta = 3, \alpha = -2$",
         color="#A60628")
plt.plot(x, logistic(x, -5, 7), label=r"$\beta = -5, \alpha = 7$",
         color="#7A68A6")

plt.legend(loc="lower left");

# temp = data['temperature'][:-1].values
# D = data['incident'][:-1].values

temperature = challenger_data[:, 0]
D = challenger_data[:, 1]  # defect or not?


beta = pm.Normal("beta", 0, 0.001, value=0)
alpha = pm.Normal("alpha", 0, 0.001, value=0)

@pm.deterministic
def p(t = temperature, alpha = alpha, beta = beta):
    return 1.0 / (1.0 + np.exp(beta * t + alpha))

print p.value

# connect the probabilities in `p` with our observations through
# a Bernoulli random variable

obs = pm.Bernoulli('bernoulli_obs', p, value = D, observed = True)
model = pm.Model([obs, beta, alpha])

map_ = pm.MAP(model)
map_.fit()
mcmc = pm.MCMC(model)
mcmc.sample(120000, 100000, 2)

alpha_samples = mcmc.trace('alpha')[:, None]  # best to make them 1d
beta_samples = mcmc.trace('beta')[:, None]

# histogram of the samples:
plt.subplot(211)
plt.title(r"Posterior distributions of the variables $\alpha, \beta$")
plt.hist(beta_samples, histtype='stepfilled', bins=35, alpha=0.85,
         label=r"posterior of $\beta$", color="#7A68A6", normed=True)
plt.legend()

plt.subplot(212)
plt.hist(alpha_samples, histtype='stepfilled', bins=35, alpha=0.85,
         label=r"posterior of $\alpha$", color="#A60628", normed=True)
plt.legend();

t = np.linspace(temperature.min() - 5, temperature.max() + 5, 50)[:, None]
p_t = logistic(t.T, beta_samples, alpha_samples)

mean_prob_t = p_t.mean(axis=0)

from IPython.core.pylabtools import figsize
# figsize(18, 5)

plt.plot(t, mean_prob_t, lw=3, label="average posterior \nprobability \
of defect")
plt.plot(t, p_t[0, :], ls="--", label="realization from posterior")
plt.plot(t, p_t[-2, :], ls="--", label="realization from posterior")
plt.scatter(temperature, D, color="k", s=50, alpha=0.5)
plt.title("Posterior expected value of probability of defect; \
plus realizations")
plt.legend(loc="lower left")
plt.ylim(-0.1, 1.1)
plt.xlim(t.min(), t.max())
plt.ylabel("probability")
plt.xlabel("temperature");

from scipy.stats.mstats import mquantiles

# vectorized bottom and top 2.5% quantiles for 'confidence interval'
qs = mquantiles(p_t, [0.025, 0.975], axis = 0)
plt.fill_between(t[:,0], *qs, alpha=0.7, color='#7A68A6')

plt.plot(t[:, 0], qs[0], label="95% CI", color="#7A68A6", alpha=0.7)

plt.plot(t, mean_prob_t, lw=1, ls="--", color="k",
         label="average posterior \nprobability of defect")

plt.xlim(t.min(), t.max())
plt.ylim(-0.02, 1.02)
plt.legend(loc="lower left")
plt.scatter(temperature, D, color="k", s=50, alpha=0.5)
plt.xlabel("temp, $t$")

plt.ylabel("probability estimate")
plt.title("Posterior probability estimates given temp. $t$");

prob_31 = logistic(31, beta_samples, alpha_samples)

plt.xlim(0.995, 1)
plt.hist(prob_31, bins=10000, normed=True, histtype='stepfilled')
plt.title('Posterior distribution of a probability of a defect @ 31 degrees')
plt.xlabel('probability of a defect occurring in O-ring');

from IPython.core.display import HTML


def css_styling():
    styles = open("/users/ryankelly/desktop/custom_notebook2.css", "r").read()

    return HTML(styles)
css_styling()

def social():
    code = """
    <a style='float:left; margin-right:5px;' href="https://twitter.com/share" class="twitter-share-button" data-text="Check this out" data-via="Ryanmdk">Tweet</a>
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script>
    <a style='float:left; margin-right:5px;' href="https://twitter.com/Ryanmdk" class="twitter-follow-button" data-show-count="false">Follow @Ryanmdk</a>
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script>
    <a style='float:left; margin-right:5px;'target='_parent' href="http://www.reddit.com/submit" onclick="window.location = 'http://www.reddit.com/submit?url=' + encodeURIComponent(window.location); return false"> <img src="http://www.reddit.com/static/spreddit7.gif" alt="submit to reddit" border="0" /> </a>
<script src="//platform.linkedin.com/in.js" type="text/javascript">
  lang: en_US
</script>
<script type="IN/Share"></script>
"""
    return HTML(code)