import pymc as mc

parameter = mc.Exponential( "poisson_param", 1 )

data_generator = mc.Poisson("data_generator", parameter )

data_plus_one = data_generator + 1

print "Children of `parameter`: "
print parameter.children
print "\nParents of `data_generator`: "
print data_generator.parents
print "\nChildren of `data_generator`: "
print  data_generator.children

print "parameter.value =",parameter.value
print "data_generator.value =",data_generator.value
print "data_plus_one.value =", data_plus_one.value

lambda_1 = mc.Exponential( "lambda_1",  1 ) #prior on first behaviour
lambda_2 = mc.Exponential( "lambda_2", 1 ) #prior on second behaviour
tau = mc.DiscreteUniform( "tau", lower = 0, upper = 10 ) #prior on behaviour change

print "lambda_1.value = %.3f"%lambda_1.value
print "lambda_2.value = %.3f"%lambda_2.value
print "tau.value = %.3f"%tau.value
print

lambda_1.random(), lambda_2.random(), tau.random()

print "After calling random() on the variables..."
print "lambda_1.value = %.3f"%lambda_1.value
print "lambda_2.value = %.3f"%lambda_2.value
print "tau.value = %.3f"%tau.value


type( lambda_1 + lambda_2 )

n_data_points = 5 # in CH1 we had ~70 data points

@mc.deterministic
def lambda_( tau = tau, lambda_1 = lambda_1, lambda_2 = lambda_2 ):
    out = np.zeros(n_data_points)  
    out[:tau] = lambda_1 #lambda before tau is lambda1
    out[tau:] = lambda_2 #lambda after tau is lambda1
    return out

%pylab inline
figsize(12.5, 4)

samples = [ lambda_1.random() for i in range( 20000) ]
hist( samples, bins = 70, normed=True )
plt.title( "Prior distribution for $\lambda_1$")
plt.xlim( 0, 8);

data = np.array( [10, 5] )
fixed_variable = mc.Poisson( "fxd", 1, value = data, observed = True )
print "value: ",fixed_variable.value
print "calling .random()"
fixed_variable.random()
print "value: ",fixed_variable.value

#we're using some fake data here
data = np.array( [ 10, 25, 15, 20, 35] )
obs = mc.Poisson( "obs", lambda_, value = data, observed = True )
print obs.value

model = mc.Model( [obs, lambda_, lambda_1, lambda_2, tau] )

tau = mc.rdiscrete_uniform(0, 80)
print tau

alpha = 1./20.
lambda_1, lambda_2 = mc.rexponential( alpha, 2 )
print lambda_1, lambda_2

data = np.r_[ mc.rpoisson( lambda_1, tau ), mc.rpoisson( lambda_2,80 - tau) ]

plt.bar( np.arange( 80 ), data, color ="#348ABD"  )
plt.bar( tau-1, data[tau-1], color = "r", label = "user behaviour changed" )
plt.xlabel( "Time (days)")
plt.ylabel("count of text-msgs received")
plt.title("Artifical dataset")
plt.xlim( 0, 80 );
plt.legend();

def plot_artificial_sms_dataset():
    tau = mc.rdiscrete_uniform(0, 80)
    alpha = 1./20.
    lambda_1, lambda_2 = mc.rexponential( alpha, 2 )
    data = np.r_[ mc.rpoisson( lambda_1, tau ), mc.rpoisson( lambda_2,80- tau) ]
    plt.bar( np.arange( 80 ), data, color ="#348ABD"  )
    plt.bar( tau-1, data[tau-1], color = "r", label = "user behaviour changed" )
    plt.xlim( 0, 80 );

figsize( 12.5,5)
plt.title("More example of artificial datasets" )
for i in range(4):
    plt.subplot(4,1,i)
    plot_artificial_sms_dataset()


figsize( 12.5, 4) 

import scipy.stats as stats
binomial = stats.binom

parameters = [ (10, .4) , (10, .9) ]  
colors = ["#348ABD", "#A60628"]

for i in range(2):
    N, p = parameters[i]
    _x = np.arange( N+1 )
    plt.bar( _x - 0.5,  binomial.pmf( _x, N, p ), color = colors[i], 
             edgecolor=colors[i],
             alpha = 0.6, 
             label = "$N$: %d, $p$: %.1f"%(N,p), 
             linewidth=3)
    
plt.legend(loc="upper left")
plt.xlim(0, 10.5)
plt.xlabel("$k$")
plt.ylabel("$P(X = k)$")
plt.title("Probability mass distributions of binomial random variables");

import pymc as mc

N = 100

p = mc.Uniform( "freq_cheating", 0, 1) 

true_answers = mc.Bernoulli( "truths", p, size = N)

first_coin_flips = mc.Bernoulli( "first_flips", 0.5, size = N)
print first_coin_flips.value

second_coin_flips = mc.Bernoulli("second_flips", 0.5, size = N)

@mc.deterministic
def observed_proportion( t_a = true_answers, 
                         fc = first_coin_flips,
                         sc = second_coin_flips ):
    
    observed = fc*t_a + (1-fc)*sc 
    return observed.sum()/float(N)

observed_proportion.value

X = 35

observations = mc.Binomial("obs", N, observed_proportion, observed = True, value = X)

model = mc.Model( [p, true_answers, first_coin_flips, 
                   second_coin_flips, observed_proportion, observations] )

### To be explained in Chapter 3!
mcmc = mc.MCMC( model )
mcmc.sample( 150000, 120000,4 )

figsize(12.5, 3 )
from scipy.stats.mstats import mquantiles

p_trace = mcmc.trace("freq_cheating")[:]
quantiles =mquantiles( p_trace, prob=[0.05, 0.95] )

plt.title("Posterior distribution of frequency of cheaters")
plt.hist( p_trace, histtype="stepfilled" , normed = True, 
    alpha = 0.85, bins = 30, label = "posterior distribution",
    color = "#348ABD")
plt.vlines( quantiles, [0,0], [5,5], linestyles = "--" )
plt.xlim(0,1)
plt.legend();

p = mc.Uniform( "freq_cheating", 0, 1) 

@mc.deterministic
def p_skewed( p =  p ):
    return 0.5*p + 0.25

yes_responses = mc.Binomial( "number_cheaters", 100, p_skewed, 
                                value = 35, observed = True )

model = mc.Model( [yes_responses, p_skewed, p ] )

### To Be Explained in Chapter 3!
mcmc = mc.MCMC(model)
mcmc.sample( 12500, 2500 )

figsize(12.5, 3 )
p_trace = mcmc.trace("freq_cheating")[:]
quantiles =mquantiles( p_trace, prob=[0.05, 0.95] )

plt.title("Posterior distribution of frequency of cheaters")
plt.hist( p_trace, histtype="stepfilled" , normed = True, 
    alpha = 0.85, bins = 30, label = "posterior distribution",
    color = "#348ABD")
plt.vlines( quantiles, [0,0], [5,5], linestyles = "--" )
plt.xlim(0,1)
plt.legend();

N = 10
x = np.empty( N , dtype=object )
for i in range(0, N):
        x[i] = mc.Exponential('x_%i' % i, (i+1)**2)

figsize( 12.5, 3.5 )
np.set_printoptions(precision=3, suppress= True)
challenger_data = np.genfromtxt("data/challenger_data.csv", skip_header = 1, \
                               usecols=[1,2], missing_values="NA", delimiter=",")
#drop the NA values
challenger_data = challenger_data[ ~np.isnan(challenger_data[:,1]) ]

#plot it, as a function of tempature (the first column)
print "Temp (F), O-Ring failure?"
print challenger_data

plt.scatter( challenger_data[:,0], challenger_data[:,1], s = 75, color="k", 
 alpha = 0.5) 
plt.yticks([0,1])
plt.ylabel("Damage Incident?")
plt.xlabel("Outside temperature (Fahrenheit)" )
plt.title("Defects of the Space Shuttle O-Rings vs temperature");


figsize(12,3)

def logistic( x, beta):
    return 1.0/( 1.0 + np.exp( beta*x) )

x = np.linspace( -4, 4, 100 )
plt.plot(x, logistic( x, 1), label = r"$\beta = 1$")
plt.plot(x, logistic( x, 3), label = r"$\beta = 3$")
plt.plot(x, logistic( x, -5), label = r"$\beta = -5$")
plt.legend();

def logistic( x, beta, alpha=0):
    return 1.0/( 1.0 + np.exp( np.dot(beta, x) + alpha) )

x = np.linspace( -4, 4, 100 )

plt.plot(x, logistic( x, 1), label = r"$\beta = 1$", ls= "--", lw =1 )
plt.plot(x, logistic( x, 3), label = r"$\beta = 3$", ls= "--", lw =1)
plt.plot(x, logistic( x, -5), label = r"$\beta = -5$", ls= "--", lw =1)


plt.plot(x, logistic( x, 1,1), label = r"$\beta = 1, \alpha = 1$", 
    color = "#348ABD")
plt.plot(x, logistic( x, 3, -2), label = r"$\beta = 3, \alpha = -2$", 
    color="#A60628")
plt.plot(x, logistic( x, -5, 7), label = r"$\beta = -5, \alpha = 7$",
    color = "#7A68A6")

plt.legend(loc = "lower left");

import scipy.stats as stats

nor = stats.norm
x = np.linspace( -8, 7, 150 )
mu = (-2, 0, 3)
tau = (.7, 1, 2.8 )
colors = ["#348ABD", "#A60628", "#7A68A6"]
parameters = zip( mu, tau, colors )

for _mu, _tau, _color in parameters:
    plt.plot( x, nor.pdf( x, _mu , scale = 1./_tau ), \
        label ="$\mu = %d,\;\\tau = %.1f$"%(_mu, _tau), color = _color )
    plt.fill_between( x, nor.pdf( x, _mu, scale =1./_tau ), color = _color, \
         alpha = .33)

plt.legend(loc = "upper right")
plt.xlabel("$x$")
plt.ylabel("density function at $x$")
plt.title("Probability distribution of three different Normal random variables");

import pymc as mc

temperature = challenger_data[:,0]
D = challenger_data[:,1] #defect or not?

#notice the`value` here. We explain why below. 
beta = mc.Normal( "beta", 0, 0.001, value = 0 ) 
alpha = mc.Normal( "alpha", 0, 0.001, value = 0 )

@mc.deterministic
def p( t = temperature, alpha = alpha, beta = beta):
    return 1.0/( 1. + np.exp( beta*t + alpha) ) 


p.value

# connect the probabilities in `p` with our observations through a 
# Bernoulli random variable.
observed = mc.Bernoulli( "bernoulli_obs", p, value = D, observed=True)

model = mc.Model( [observed, beta, alpha] )

#mysterious code to be explained in Chapter 3
map_ = mc.MAP(model)
map_.fit()
mcmc = mc.MCMC( model )
mcmc.sample( 120000, 100000, 2 )

alpha_samples = mcmc.trace( 'alpha' )[:, None] #best to make them 1d
beta_samples = mcmc.trace( 'beta' )[:, None]

figsize(12.5, 6)

#histogram of the samples:
plt.subplot(211)
plt.title(r"Posterior distributions of the variables $\alpha, \beta$")
plt.hist( beta_samples, histtype='stepfilled', bins = 35, alpha = 0.85, \
        label = r"posterior of $\beta$", color = "#7A68A6",normed = True )
plt.legend()

plt.subplot(212)
plt.hist( alpha_samples, histtype='stepfilled', bins = 35, alpha = 0.85, \
        label = r"posterior of $\alpha$", color = "#A60628",normed = True )
plt.legend();


t = np.linspace( temperature.min() - 5, temperature.max()+5, 50 )[:,None]
p_t= logistic( t.T, beta_samples, alpha_samples )

mean_prob_t = p_t.mean(axis=0)

figsize( 12.5, 4)

plt.plot( t, mean_prob_t, lw = 3, label = "average posterior \nprobability \
of defect")
plt.plot( t, p_t[0, :], ls="--",label="realization from posterior" )
plt.plot( t, p_t[-2, :], ls="--", label="realization from posterior"  )
plt.scatter( temperature, D, color = "k", s = 50, alpha = 0.5 )
plt.title("Posterior expected value of probability of defect; plus realizations")
plt.legend(loc= "lower left")
plt.ylim( -0.1, 1.1 )
plt.xlim( t.min(), t.max() )
plt.ylabel("probability")
plt.xlabel("temperature");

from scipy.stats.mstats import mquantiles

# vectorized bottom and top 5% quantiles for "confidence interval"
qs = mquantiles(p_t,[0.05,0.95],axis=0)
plt.fill_between(t[:,0],*qs,alpha = 0.7,
                color = "#7A68A6")

plt.plot(t[:,0], qs[0], label="95% CI", color = "#7A68A6", alpha =0.7)

plt.plot( t, mean_prob_t, lw = 1, ls= "--", color = "k",
        label = "average posterior \nprobability of defect")

plt.xlim( t.min(), t.max() )
plt.ylim( -0.02, 1.02 )
plt.legend(loc="lower left")
plt.scatter( temperature, D, color = "k", s = 50, alpha = 0.5 )
plt.xlabel("temp, $t$")

plt.ylabel("probability estimate" )
plt.title( "Posterior probability estimates given temp. $t$" );

figsize(12.5, 2.5)

prob_31 = logistic( 31, beta_samples, alpha_samples )

plt.xlim( 0.995, 1)
plt.hist( prob_31, bins = 1000, normed = True, histtype='stepfilled' )
plt.title( "Posterior distribution of probability of defect, given $t = 31$")
plt.xlabel( "probability of defect occuring in O-ring" );


simulated = mc.Bernoulli( "bernoulli_sim", p)
N = 10000

mcmc = mc.MCMC( [simulated, alpha, beta, observed ] )
mcmc.sample( N )

figsize(12.5, 5)

simulations = mcmc.trace("bernoulli_sim")[:]
print simulations.shape

plt.title( "Simulated dataset using posterior parameters")
figsize( 12.5, 6)
for i in range(4):
    ax = subplot( 4, 1, i+1)
    plt.scatter( temperature, simulations[1000*i,:], color = "k", 
        s = 50, alpha = 0.6 );


posterior_probability = simulations.mean(axis=0)
print "posterior prob of defect | realized defect "
for i in range( len(D) ):
    print "%.2f                     |   %d"%(posterior_probability[i], D[i])

ix = np.argsort( posterior_probability )
print "probb | defect "
for i in range( len(D) ):
    print "%.2f  |   %d"%(posterior_probability[ix[i]], D[ix[i]])

from separation_plot import separation_plot

figsize( 11., 1.5 )

separation_plot(posterior_probability, D )

figsize( 11., 1.25 )

# our temperature-dependent model
separation_plot(posterior_probability, D )
plt.title("Temperature-dependent model")

# perfect model
# i.e. the probability of defect is equal to if a defect occurred or not.
p = D
separation_plot( p, D)
plt.title("Perfect model")

# random predictions
p = np.random.rand( 23 )
separation_plot( p, D)
plt.title("Random model")

# constant model
constant_prob = 7./23*np.ones(23)
separation_plot(constant_prob, D )
plt.title("Constant-prediction model");

#type your code here.
figsize(12.5, 4 )

plt.scatter( alpha_samples, beta_samples, alpha = 0.1 )
plt.title( "Why does the plot look like this?" )
plt.xlabel( r"$\alpha$")
plt.ylabel( r"$\beta$");

from IPython.core.display import HTML
def css_styling():
    styles = open("../styles/custom.css", "r").read()
    return HTML(styles)
css_styling()