%load_ext watermark
%watermark -a 'cs224' -u -d -v -p numpy,xarray,scipy,pandas,sklearn,matplotlib,seaborn,pymc3
%matplotlib inline
import numpy as np, scipy, scipy.stats as stats, scipy.special, scipy.misc, pandas as pd, matplotlib.pyplot as plt, seaborn as sns, xarray as xr
import matplotlib as mpl
import pymc3 as pm
import theano as thno
import theano.tensor as T
import datetime, time, math
from dateutil import relativedelta
from collections import OrderedDict
SEED = 42
np.random.seed(SEED)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# pd.set_option('display.float_format', lambda x: '%.2f' % x)
np.set_printoptions(edgeitems=10)
np.set_printoptions(linewidth=1000)
np.set_printoptions(suppress=True)
np.core.arrayprint._line_width = 180
sns.set()
from IPython.display import display, HTML
from IPython.display import display_html
def display_side_by_side(*args):
html_str=''
for df in args:
if type(df) == np.ndarray:
df = pd.DataFrame(df)
html_str+=df.to_html()
html_str = html_str.replace('table','table style="display:inline"')
# print(html_str)
display_html(html_str,raw=True)
CSS = """
.output {
flex-direction: row;
}
"""
def display_graphs_side_by_side(*args):
html_str='<table><tr>'
for g in args:
html_str += '<td>'
html_str += g._repr_svg_()
html_str += '</td>'
html_str += '</tr></table>'
display_html(html_str,raw=True)
display(HTML("<style>.container { width:70% !important; }</style>"))
# tail-biased factory
omega1, kappa1 = 0.25, 12
# head-biased factory
omega2, kappa2 = 0.75, 12
# Flip coin 9 times and get 6 heads
samples = np.array([0,0,0,1,1,1,1,1,1])
def pD(z,N,a,b):
#return scipy.special.beta(z+a, N-z+b)/scipy.special.beta(a,b)
return np.exp(np.log(scipy.special.beta(z+a, N-z+b)) - np.log(scipy.special.beta(a,b)))
def ok2ab(omega, kappa):
return omega*(kappa-2)+1,(1-omega)*(kappa-2)+1
a1,b1 = ok2ab(omega1,kappa1)
a2,b2 = ok2ab(omega2,kappa2)
p1 = pD(6,9,a1,b1)
p1
p2 = pD(6,9,a2,b2)
p2
def fn(a, b):
return lambda p: stats.bernoulli(p).pmf(samples).prod() * stats.beta(a,b).pdf(p)
fn(1,1)(0.25)
scipy.integrate.quad(fn(a1,b1),0.0,1.0)
p1
scipy.integrate.quad(fn(a2,b2),0.0,1.0)
p2
# Flip coin 9 times and get 6 heads
priors = ((a1, b1), (a2, b2))
models = []
traces = []
for alpha, beta in priors:
with pm.Model() as model:
a = pm.Beta('a', alpha, beta)
yl = pm.Bernoulli('yl', a, observed=samples)
trace = pm.sample_smc(4000, n_steps=100, random_seed=42)
models.append(model)
traces.append(trace)
p1
models[0].marginal_likelihood
p2
models[1].marginal_likelihood
BF_smc = models[1].marginal_likelihood / models[0].marginal_likelihood
BF_smc
p2/p1
# taking as example data that is distributed like gauss(mu=-3.0, sigma=0.5):
samples2 = stats.norm(loc=-3, scale=0.5).rvs(size=1000, random_state=np.random.RandomState(42))
# this is the log likelihood we expect to see from the point estimate:
stats.norm(loc=-3, scale=0.5).logpdf(samples2).sum()
with pm.Model() as mcmc_model:
mcmc_model_m = pm.Uniform('mean', lower=-10., upper=10.) # , transform=None
mcmc_model_s = pm.Uniform('s', lower=0.1, upper=5.0) # , transform=None
mcmc_model_gs = pm.Normal('gs', mu=mcmc_model_m, sigma=mcmc_model_s, observed=samples2)
with mcmc_model:
mcmc_model_trace = pm.sample(2000, tune=1000, init='adapt_diag')
pm.summary(mcmc_model_trace).round(3)
mcmc_model.free_RVs
mcmc_model_trace.varnames
mcmc_model_m_d = mcmc_model.deterministics[0]
mcmc_model_m_d
mcmc_model_m_d.transformation.backward(-3.0).eval()
mcmc_model_m_d.transformation.forward(-3.0).eval()
For our point-estimate the value of the model.logp()
gives a different value than the dependent random variable's mcmc_model_gs.logp()
(see next cell).
The reason is that here, for the mcmc_model.logp()
, we add the log-likelihood of the prior distributions, which make it more negative.
In the next cell, for mcmc_model_gs.logp()
, we treat the prior as an improper prior.
mcmc_model.logp({'mean_interval__': mcmc_model_m_d.transformation.forward(-3.0).eval(), 's_interval__': mcmc_model.deterministics[1].transformation.forward(0.5).eval()})
mcmc_model_gs.logp({'mean_interval__': mcmc_model_m_d.transformation.forward(-3.0).eval(), 's_interval__': mcmc_model.deterministics[1].transformation.forward(0.5).eval()})
import itertools
dict([[varobj.name, mcmc_model_trace[varobj.name].mean()] for varobj in mcmc_model.free_RVs])
mcmc_model.logp(dict([[varobj.name, mcmc_model_trace[varobj.name].mean()] for varobj in mcmc_model.free_RVs]))
The value of the following cell differs slightly from above, because above we used the known $\mu=-3.0$ and $\sigma=0.5$ and in the following cell we use the estimated $\mu=-2.99$ and $\sigma=0.49$
mcmc_model_gs.logp(dict([[varobj.name, mcmc_model_trace[varobj.name].mean()] for varobj in mcmc_model.free_RVs]))