# Think Bayes¶

In [1]:
# Configure Jupyter so figures appear in the notebook
%matplotlib inline

# Configure Jupyter to display the assigned value after an assignment
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'

import numpy as np
import pandas as pd

# import classes from thinkbayes2
from thinkbayes2 import Pmf, Cdf, Suite, Joint

from thinkbayes2 import MakePoissonPmf, EvalBinomialPmf, MakeMixture

import thinkplot


## The Geiger counter problem¶

I got the idea for the following problem from Tom Campbell-Ricketts, author of the Maximum Entropy blog. And he got the idea from E. T. Jaynes, author of the classic Probability Theory: The Logic of Science:

Suppose that a radioactive source emits particles toward a Geiger counter at an average rate of r particles per second, but the counter only registers a fraction, f, of the particles that hit it. If f is 10% and the counter registers 15 particles in a one second interval, what is the posterior distribution of n, the actual number of particles that hit the counter, and r, the average rate particles are emitted?

### Grid algorithm¶

In [2]:
class Logistic(Suite, Joint):

def Likelihood(self, data, hypo):
"""

data: k, number of particles detected
hypo: r, emission rate in particles per second
"""
return 1

In [3]:
r = 160
k = 15
f = 0.1

pmf = MakePoissonPmf(r, high=500)
thinkplot.Hist(pmf)

In [4]:
total = 0
for n, p in pmf.Items():
total += p * EvalBinomialPmf(k, n, f)

total

Out[4]:
0.09921753162215896
In [5]:
def compute_likelihood(k, r, f):
pmf = MakePoissonPmf(r, high=500)
total = 0
for n, p in pmf.Items():
total += p * EvalBinomialPmf(k, n, f)


In [6]:
compute_likelihood(k, r, f)

Out[6]:
0.09921753162215896
In [7]:
likes = pd.Series([])
for kk in range(0, 40):
likes[kk] = compute_likelihood(kk, r, f)

In [8]:
likes.plot()
thinkplot.decorate(xlabel='Counter particles (n)',
ylabel='PMF')

In [9]:
# Solution

class Logistic(Suite, Joint):
f = 0.1

def Likelihood(self, data, hypo):
"""

data: k, number of particles detected
hypo: r, emission rate in particles per second

"""
k = data
r = hypo
return compute_likelihood(k, r, self.f)

In [10]:
rs = np.linspace(0, 300, 51);

In [11]:
suite = Logistic(rs);

In [12]:
suite.Update(15)

Out[12]:
0.03262565933783285
In [13]:
thinkplot.Pdf(suite)
thinkplot.decorate(xlabel='Emission rate (particles/second)',
ylabel='PMF',
title='Posterior marginal distribution')


### MCMC¶

Implement this model using MCMC. As a starting place, you can use this example from the PyMC3 docs.

As a challege, try writing the model more explicitly, rather than using the GLM module.

In [14]:
import pymc3 as pm

/home/downey/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from float to np.floating is deprecated. In future, it will be treated as np.float64 == np.dtype(float).type.
from ._conv import register_converters as _register_converters

In [15]:
# Solution

f = 0.1
model = pm.Model()

with model:
r = pm.Uniform('r', 0, 500)
n = pm.Poisson('n', r)
k = pm.Binomial('k', n, f, observed=15)
trace = pm.sample_prior_predictive(1000)

In [16]:
thinkplot.Cdf(Cdf(trace['r']));

In [17]:
thinkplot.Cdf(Cdf(trace['n']));

In [18]:
thinkplot.Cdf(Cdf(trace['k']));

In [19]:
with model:
trace = pm.sample(1000, tune=3000)

Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>NUTS: [r]
>Metropolis: [n]
Sampling 4 chains: 100%|██████████| 16000/16000 [00:04<00:00, 3852.03draws/s]
The acceptance probability does not match the target. It is 0.46421389596339574, but should be close to 0.8. Try to increase the number of tuning steps.
The estimated number of effective samples is smaller than 200 for some parameters.

In [20]:
pm.traceplot(trace);

In [21]:
n_sample = trace['n']
thinkplot.Cdf(Cdf(n_sample))

Out[21]:
{'xscale': 'linear', 'yscale': 'linear'}
In [22]:
r_sample = trace['r']
thinkplot.Cdf(Cdf(r_sample))

Out[22]:
{'xscale': 'linear', 'yscale': 'linear'}
In [23]:
thinkplot.Cdf(suite.MakeCdf())
thinkplot.Cdf(Cdf(r_sample))

Out[23]:
{'xscale': 'linear', 'yscale': 'linear'}

### Grid algorithm, version 2¶

In [24]:
# Solution

class Logistic(Suite, Joint):
f = 0.1

def Likelihood(self, data, hypo):
"""

data: k, number of particles detected
hypo: r, n
"""
k = data
r, n = hypo
return EvalBinomialPmf(k, n, self.f)

In [25]:
rs = np.linspace(0, 300, 51);

In [26]:
suite = Logistic()

for r in rs:
pmf = MakePoissonPmf(r, high=500)
for n, p in pmf.Items():
suite[r, n] += p

suite.Normalize()

Out[26]:
50.99999999999964
In [27]:
suite.Update(15)

Out[27]:
0.03262565933783306
In [28]:
pmf_r = suite.Marginal(0)
thinkplot.Pdf(pmf_r)
thinkplot.decorate(xlabel='Emission rate (particles/second)',
ylabel='PMF',
title='Posterior marginal distribution')

In [29]:
pmf_n = suite.Marginal(1)
thinkplot.Pdf(pmf_n)
thinkplot.decorate(xlabel='Number of particles (n)',
ylabel='PMF',
title='Posterior marginal distribution')


### Hierarchical version, as in the book¶

In [30]:
class Detector(Suite):

def __init__(self, r, f, high=500):
"""Initializes the suite.

r: known emission rate, r
f: fraction of particles registered
high: maximum number of particles, n
"""
pmf = MakePoissonPmf(r, high)
super().__init__(pmf)
self.r = r
self.f = f

def Likelihood(self, data, hypo):
"""Likelihood of the data given the hypothesis.

data: number of particles counted
hypo: number of particles hitting the counter, n
"""
k = data
n = hypo

return EvalBinomialPmf(k, n, self.f)

In [31]:
r = 160
k = 15
f = 0.1

suite = Detector(r, f);

In [32]:
suite.Update(15)

Out[32]:
0.09921753162215896
In [33]:
class Emitter(Suite):

def Likelihood(self, data, hypo):
"""Likelihood of the data given the hypothesis.

data: number of counted per unit time
hypo: Detector object
"""
return hypo.Update(data)

In [34]:
rs = np.linspace(0, 300, 51);

In [35]:
detectors = [Detector(r, f=0.1) for r in rs[1:]]
suite = Emitter(detectors);

In [36]:
suite.Update(15)

Out[36]:
0.03327817252458951
In [37]:
pmf_r = Pmf()
for detector, p in suite.Items():
pmf_r[detector.r] = p

thinkplot.Pdf(pmf_r)

In [38]:
mix = MakeMixture(suite);

In [39]:
thinkplot.Pdf(mix)

In [ ]: