%load_ext watermark
%watermark -a 'cs224' -u -d -v -p numpy,xarray,scipy,pandas,sklearn,matplotlib,seaborn,pymc3
/home/local/cs/local/install/anaconda3-5.3.1-Linux-x86_64/envs/py36ds/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details. data = yaml.load(f.read()) or {} /home/local/cs/local/install/anaconda3-5.3.1-Linux-x86_64/envs/py36ds/lib/python3.6/site-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details. defaults = yaml.load(f)
cs224 last updated: 2020-03-07 CPython 3.6.8 IPython 7.3.0 numpy 1.16.2 xarray 0.11.3 scipy 1.2.1 pandas 0.24.2 sklearn 0.20.3 matplotlib 3.0.3 seaborn 0.9.0 pymc3 3.8
%matplotlib inline
import numpy as np, scipy, scipy.stats as stats, scipy.special, scipy.misc, pandas as pd, matplotlib.pyplot as plt, seaborn as sns, xarray as xr
import matplotlib as mpl
import pymc3 as pm
import theano as thno
import theano.tensor as T
import datetime, time, math
from dateutil import relativedelta
from collections import OrderedDict
SEED = 42
np.random.seed(SEED)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# pd.set_option('display.float_format', lambda x: '%.2f' % x)
np.set_printoptions(edgeitems=10)
np.set_printoptions(linewidth=1000)
np.set_printoptions(suppress=True)
np.core.arrayprint._line_width = 180
sns.set()
from IPython.display import display, HTML
from IPython.display import display_html
def display_side_by_side(*args):
html_str=''
for df in args:
if type(df) == np.ndarray:
df = pd.DataFrame(df)
html_str+=df.to_html()
html_str = html_str.replace('table','table style="display:inline"')
# print(html_str)
display_html(html_str,raw=True)
CSS = """
.output {
flex-direction: row;
}
"""
def display_graphs_side_by_side(*args):
html_str='<table><tr>'
for g in args:
html_str += '<td>'
html_str += g._repr_svg_()
html_str += '</td>'
html_str += '</tr></table>'
display_html(html_str,raw=True)
display(HTML("<style>.container { width:70% !important; }</style>"))
# tail-biased factory
omega1, kappa1 = 0.25, 12
# head-biased factory
omega2, kappa2 = 0.75, 12
# Flip coin 9 times and get 6 heads
samples = np.array([0,0,0,1,1,1,1,1,1])
def pD(z,N,a,b):
#return scipy.special.beta(z+a, N-z+b)/scipy.special.beta(a,b)
return np.exp(np.log(scipy.special.beta(z+a, N-z+b)) - np.log(scipy.special.beta(a,b)))
def ok2ab(omega, kappa):
return omega*(kappa-2)+1,(1-omega)*(kappa-2)+1
a1,b1 = ok2ab(omega1,kappa1)
a2,b2 = ok2ab(omega2,kappa2)
p1 = pD(6,9,a1,b1)
p1
0.0004993438720703131
p2 = pD(6,9,a2,b2)
p2
0.0023385561429537273
def fn(a, b):
return lambda p: stats.bernoulli(p).pmf(samples).prod() * stats.beta(a,b).pdf(p)
fn(1,1)(0.25)
0.000102996826171875
scipy.integrate.quad(fn(a1,b1),0.0,1.0)
(0.0004993438720702594, 2.322913347907592e-09)
p1
0.0004993438720703131
scipy.integrate.quad(fn(a2,b2),0.0,1.0)
(0.002338556142956198, 2.1998184481120156e-11)
p2
0.0023385561429537273
# Flip coin 9 times and get 6 heads
priors = ((a1, b1), (a2, b2))
models = []
traces = []
for alpha, beta in priors:
with pm.Model() as model:
a = pm.Beta('a', alpha, beta)
yl = pm.Bernoulli('yl', a, observed=samples)
trace = pm.sample_smc(4000, n_steps=100, random_seed=42)
models.append(model)
traces.append(trace)
Sample initial stage: ... Stage: 0 Beta: 0.656 Steps: 100 Acce: 1.000 Stage: 1 Beta: 1.000 Steps: 100 Acce: 0.704 Sample initial stage: ... Stage: 0 Beta: 1.000 Steps: 100 Acce: 1.000
p1
0.0004993438720703131
models[0].marginal_likelihood
0.0004979784247939404
p2
0.0023385561429537273
models[1].marginal_likelihood
0.002364542043803754
BF_smc = models[1].marginal_likelihood / models[0].marginal_likelihood
BF_smc
4.748282106362706
p2/p1
4.683257918552034
# taking as example data that is distributed like gauss(mu=-3.0, sigma=0.5):
samples2 = stats.norm(loc=-3, scale=0.5).rvs(size=1000, random_state=np.random.RandomState(42))
# this is the log likelihood we expect to see from the point estimate:
stats.norm(loc=-3, scale=0.5).logpdf(samples2).sum()
-704.9307117016448
with pm.Model() as mcmc_model:
mcmc_model_m = pm.Uniform('mean', lower=-10., upper=10.) # , transform=None
mcmc_model_s = pm.Uniform('s', lower=0.1, upper=5.0) # , transform=None
mcmc_model_gs = pm.Normal('gs', mu=mcmc_model_m, sigma=mcmc_model_s, observed=samples2)
with mcmc_model:
mcmc_model_trace = pm.sample(2000, tune=1000, init='adapt_diag')
Auto-assigning NUTS sampler... Initializing NUTS using adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [s, mean] Sampling 4 chains, 0 divergences: 100%|██████████| 12000/12000 [00:02<00:00, 4874.64draws/s]
pm.summary(mcmc_model_trace).round(3)
mean | sd | hpd_3% | hpd_97% | mcse_mean | mcse_sd | ess_mean | ess_sd | ess_bulk | ess_tail | r_hat | |
---|---|---|---|---|---|---|---|---|---|---|---|
mean | -2.99 | 0.016 | -3.019 | -2.961 | 0.0 | 0.0 | 7452.0 | 7450.0 | 7447.0 | 5543.0 | 1.0 |
s | 0.49 | 0.011 | 0.471 | 0.512 | 0.0 | 0.0 | 7633.0 | 7627.0 | 7636.0 | 6271.0 | 1.0 |
mcmc_model.free_RVs
[mean_interval__, s_interval__]
mcmc_model_trace.varnames
['mean_interval__', 's_interval__', 'mean', 's']
mcmc_model_m_d = mcmc_model.deterministics[0]
mcmc_model_m_d
mcmc_model_m_d.transformation.backward(-3.0).eval()
array(-9.05148253)
mcmc_model_m_d.transformation.forward(-3.0).eval()
array(-0.61903921)
For our point-estimate the value of the model.logp()
gives a different value than the dependent random variable's mcmc_model_gs.logp()
(see next cell).
The reason is that here, for the mcmc_model.logp()
, we add the log-likelihood of the prior distributions, which make it more negative.
In the next cell, for mcmc_model_gs.logp()
, we treat the prior as an improper prior.
mcmc_model.logp({'mean_interval__': mcmc_model_m_d.transformation.forward(-3.0).eval(), 's_interval__': mcmc_model.deterministics[1].transformation.forward(0.5).eval()})
array(-709.00200049)
mcmc_model_gs.logp({'mean_interval__': mcmc_model_m_d.transformation.forward(-3.0).eval(), 's_interval__': mcmc_model.deterministics[1].transformation.forward(0.5).eval()})
array(-704.9307117)
import itertools
dict([[varobj.name, mcmc_model_trace[varobj.name].mean()] for varobj in mcmc_model.free_RVs])
{'mean_interval__': -0.6169506292247524, 's_interval__': -2.4482681357926412}
mcmc_model.logp(dict([[varobj.name, mcmc_model_trace[varobj.name].mean()] for varobj in mcmc_model.free_RVs]))
array(-708.38320197)
The value of the following cell differs slightly from above, because above we used the known $\mu=-3.0$ and $\sigma=0.5$ and in the following cell we use the estimated $\mu=-2.99$ and $\sigma=0.49$
mcmc_model_gs.logp(dict([[varobj.name, mcmc_model_trace[varobj.name].mean()] for varobj in mcmc_model.free_RVs]))
array(-704.28913595)