In [1]:
%load_ext watermark
%watermark -a 'cs224' -u -d -v -p numpy,xarray,scipy,pandas,sklearn,matplotlib,seaborn,pymc3
/home/local/cs/local/install/anaconda3-5.3.1-Linux-x86_64/envs/py36ds/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.
  data = yaml.load(f.read()) or {}
/home/local/cs/local/install/anaconda3-5.3.1-Linux-x86_64/envs/py36ds/lib/python3.6/site-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.
  defaults = yaml.load(f)
cs224 
last updated: 2020-03-07 

CPython 3.6.8
IPython 7.3.0

numpy 1.16.2
xarray 0.11.3
scipy 1.2.1
pandas 0.24.2
sklearn 0.20.3
matplotlib 3.0.3
seaborn 0.9.0
pymc3 3.8
In [2]:
%matplotlib inline
import numpy as np, scipy, scipy.stats as stats, scipy.special, scipy.misc, pandas as pd, matplotlib.pyplot as plt, seaborn as sns, xarray as xr
import matplotlib as mpl

import pymc3 as pm

import theano as thno
import theano.tensor as T

import datetime, time, math
from dateutil import relativedelta

from collections import OrderedDict

SEED = 42
np.random.seed(SEED)

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# pd.set_option('display.float_format', lambda x: '%.2f' % x)
np.set_printoptions(edgeitems=10)
np.set_printoptions(linewidth=1000)
np.set_printoptions(suppress=True)
np.core.arrayprint._line_width = 180

sns.set()
In [3]:
from IPython.display import display, HTML

from IPython.display import display_html
def display_side_by_side(*args):
    html_str=''
    for df in args:
        if type(df) == np.ndarray:
            df = pd.DataFrame(df)
        html_str+=df.to_html()
    html_str = html_str.replace('table','table style="display:inline"')
    # print(html_str)
    display_html(html_str,raw=True)

CSS = """
.output {
    flex-direction: row;
}
"""

def display_graphs_side_by_side(*args):
    html_str='<table><tr>'
    for g in args:
        html_str += '<td>'
        html_str += g._repr_svg_()
        html_str += '</td>'
    html_str += '</tr></table>'
    display_html(html_str,raw=True)
    

display(HTML("<style>.container { width:70% !important; }</style>"))
In [4]:
# tail-biased factory
omega1, kappa1 = 0.25, 12
In [5]:
# head-biased factory
omega2, kappa2 = 0.75, 12
In [6]:
# Flip coin 9 times and get 6 heads
samples = np.array([0,0,0,1,1,1,1,1,1])
In [7]:
def pD(z,N,a,b):
    #return scipy.special.beta(z+a, N-z+b)/scipy.special.beta(a,b)
    return np.exp(np.log(scipy.special.beta(z+a, N-z+b)) - np.log(scipy.special.beta(a,b)))
In [8]:
def ok2ab(omega, kappa):
    return omega*(kappa-2)+1,(1-omega)*(kappa-2)+1
In [9]:
a1,b1 = ok2ab(omega1,kappa1)
a2,b2 = ok2ab(omega2,kappa2)
In [10]:
p1 = pD(6,9,a1,b1)
p1
Out[10]:
0.0004993438720703131
In [11]:
p2 = pD(6,9,a2,b2)
p2
Out[11]:
0.0023385561429537273
In [12]:
def fn(a, b):
    return lambda p: stats.bernoulli(p).pmf(samples).prod() * stats.beta(a,b).pdf(p)
In [13]:
fn(1,1)(0.25)
Out[13]:
0.000102996826171875
In [14]:
scipy.integrate.quad(fn(a1,b1),0.0,1.0)
Out[14]:
(0.0004993438720702594, 2.322913347907592e-09)
In [15]:
p1
Out[15]:
0.0004993438720703131
In [16]:
scipy.integrate.quad(fn(a2,b2),0.0,1.0)
Out[16]:
(0.002338556142956198, 2.1998184481120156e-11)
In [17]:
p2
Out[17]:
0.0023385561429537273

Via PyMC3

In [18]:
# Flip coin 9 times and get 6 heads
priors = ((a1, b1), (a2, b2))
models = []
traces = []
for alpha, beta in priors:
    with pm.Model() as model:
        a = pm.Beta('a', alpha, beta)
        yl = pm.Bernoulli('yl', a, observed=samples)
        trace = pm.sample_smc(4000, n_steps=100, random_seed=42)
        models.append(model)
        traces.append(trace)
Sample initial stage: ...
Stage:   0 Beta: 0.656 Steps: 100 Acce: 1.000
Stage:   1 Beta: 1.000 Steps: 100 Acce: 0.704
Sample initial stage: ...
Stage:   0 Beta: 1.000 Steps: 100 Acce: 1.000
In [19]:
p1
Out[19]:
0.0004993438720703131
In [20]:
models[0].marginal_likelihood
Out[20]:
0.0004979784247939404
In [21]:
p2
Out[21]:
0.0023385561429537273
In [22]:
models[1].marginal_likelihood
Out[22]:
0.002364542043803754
In [23]:
BF_smc = models[1].marginal_likelihood / models[0].marginal_likelihood
BF_smc
Out[23]:
4.748282106362706
In [24]:
p2/p1
Out[24]:
4.683257918552034

PyMC3 log-likelihood at point-estimate

In [25]:
# taking as example data that is distributed like gauss(mu=-3.0, sigma=0.5):
samples2 = stats.norm(loc=-3, scale=0.5).rvs(size=1000, random_state=np.random.RandomState(42))
In [26]:
# this is the log likelihood we expect to see from the point estimate:
stats.norm(loc=-3, scale=0.5).logpdf(samples2).sum()
Out[26]:
-704.9307117016448
In [27]:
with pm.Model() as mcmc_model:
    mcmc_model_m = pm.Uniform('mean', lower=-10., upper=10.) # , transform=None
    mcmc_model_s = pm.Uniform('s', lower=0.1, upper=5.0) # , transform=None
    mcmc_model_gs = pm.Normal('gs', mu=mcmc_model_m, sigma=mcmc_model_s, observed=samples2)
In [28]:
with mcmc_model:
    mcmc_model_trace = pm.sample(2000, tune=1000, init='adapt_diag')
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [s, mean]
Sampling 4 chains, 0 divergences: 100%|██████████| 12000/12000 [00:02<00:00, 4874.64draws/s]
In [29]:
pm.summary(mcmc_model_trace).round(3)
Out[29]:
mean sd hpd_3% hpd_97% mcse_mean mcse_sd ess_mean ess_sd ess_bulk ess_tail r_hat
mean -2.99 0.016 -3.019 -2.961 0.0 0.0 7452.0 7450.0 7447.0 5543.0 1.0
s 0.49 0.011 0.471 0.512 0.0 0.0 7633.0 7627.0 7636.0 6271.0 1.0
In [30]:
mcmc_model.free_RVs
Out[30]:
[mean_interval__, s_interval__]
In [31]:
mcmc_model_trace.varnames
Out[31]:
['mean_interval__', 's_interval__', 'mean', 's']
In [32]:
mcmc_model_m_d = mcmc_model.deterministics[0]
mcmc_model_m_d
Out[32]:
$\text{mean} \sim \text{Uniform}(\mathit{lower}=-10.0,~\mathit{upper}=10.0)$
In [33]:
mcmc_model_m_d.transformation.backward(-3.0).eval()
Out[33]:
array(-9.05148253)
In [34]:
mcmc_model_m_d.transformation.forward(-3.0).eval()
Out[34]:
array(-0.61903921)

For our point-estimate the value of the model.logp() gives a different value than the dependent random variable's mcmc_model_gs.logp() (see next cell). The reason is that here, for the mcmc_model.logp(), we add the log-likelihood of the prior distributions, which make it more negative. In the next cell, for mcmc_model_gs.logp(), we treat the prior as an improper prior.

In [35]:
mcmc_model.logp({'mean_interval__': mcmc_model_m_d.transformation.forward(-3.0).eval(), 's_interval__': mcmc_model.deterministics[1].transformation.forward(0.5).eval()})
Out[35]:
array(-709.00200049)
In [36]:
mcmc_model_gs.logp({'mean_interval__': mcmc_model_m_d.transformation.forward(-3.0).eval(), 's_interval__': mcmc_model.deterministics[1].transformation.forward(0.5).eval()})
Out[36]:
array(-704.9307117)
In [37]:
import itertools
In [38]:
dict([[varobj.name, mcmc_model_trace[varobj.name].mean()] for varobj in mcmc_model.free_RVs])
Out[38]:
{'mean_interval__': -0.6169506292247524, 's_interval__': -2.4482681357926412}
In [39]:
mcmc_model.logp(dict([[varobj.name, mcmc_model_trace[varobj.name].mean()] for varobj in mcmc_model.free_RVs]))
Out[39]:
array(-708.38320197)

The value of the following cell differs slightly from above, because above we used the known $\mu=-3.0$ and $\sigma=0.5$ and in the following cell we use the estimated $\mu=-2.99$ and $\sigma=0.49$

In [40]:
mcmc_model_gs.logp(dict([[varobj.name, mcmc_model_trace[varobj.name].mean()] for varobj in mcmc_model.free_RVs]))
Out[40]:
array(-704.28913595)
In [ ]: