Think Bayes

Copyright 2018 Allen B. Downey

MIT License: https://opensource.org/licenses/MIT

In [1]:
# Configure Jupyter so figures appear in the notebook
%matplotlib inline

# Configure Jupyter to display the assigned value after an assignment
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'

import numpy as np
import pandas as pd

# import classes from thinkbayes2
from thinkbayes2 import Pmf, Cdf, Suite, Joint

from thinkbayes2 import MakePoissonPmf, EvalBinomialPmf, MakeMixture

import thinkplot

The Geiger counter problem

I got the idea for the following problem from Tom Campbell-Ricketts, author of the Maximum Entropy blog. And he got the idea from E. T. Jaynes, author of the classic Probability Theory: The Logic of Science:

Suppose that a radioactive source emits particles toward a Geiger counter at an average rate of r particles per second, but the counter only registers a fraction, f, of the particles that hit it. If f is 10% and the counter registers 15 particles in a one second interval, what is the posterior distribution of n, the actual number of particles that hit the counter, and r, the average rate particles are emitted?

Grid algorithm

In [2]:
class Logistic(Suite, Joint):
    
    def Likelihood(self, data, hypo):
        """
        
        data: k, number of particles detected
        hypo: r, emission rate in particles per second
        """
        return 1
In [3]:
r = 160
k = 15
f = 0.1

pmf = MakePoissonPmf(r, high=500)
thinkplot.Hist(pmf)
In [4]:
total = 0
for n, p in pmf.Items():
    total += p * EvalBinomialPmf(k, n, f)
    
total
Out[4]:
0.09921753162215896
In [5]:
def compute_likelihood(k, r, f):
    pmf = MakePoissonPmf(r, high=500)
    total = 0
    for n, p in pmf.Items():
        total += p * EvalBinomialPmf(k, n, f)
    
    return total
In [6]:
compute_likelihood(k, r, f)
Out[6]:
0.09921753162215896
In [7]:
likes = pd.Series([])
for kk in range(0, 40):
    likes[kk] = compute_likelihood(kk, r, f)
In [8]:
likes.plot()
thinkplot.decorate(xlabel='Counter particles (n)',
                   ylabel='PMF')
In [9]:
# Solution

class Logistic(Suite, Joint):
    f = 0.1
    
    def Likelihood(self, data, hypo):
        """
        
        data: k, number of particles detected
        hypo: r, emission rate in particles per second

        """
        k = data
        r = hypo
        return compute_likelihood(k, r, self.f)
In [10]:
rs = np.linspace(0, 300, 51);
In [11]:
suite = Logistic(rs);
In [12]:
suite.Update(15)
Out[12]:
0.03262565933783285
In [13]:
thinkplot.Pdf(suite)
thinkplot.decorate(xlabel='Emission rate (particles/second)',
                   ylabel='PMF',
                   title='Posterior marginal distribution')

MCMC

Implement this model using MCMC. As a starting place, you can use this example from the PyMC3 docs.

As a challege, try writing the model more explicitly, rather than using the GLM module.

In [14]:
import pymc3 as pm
/home/downey/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
In [15]:
# Solution

f = 0.1
model = pm.Model()

with model:
    r = pm.Uniform('r', 0, 500)
    n = pm.Poisson('n', r)
    k = pm.Binomial('k', n, f, observed=15)
    trace = pm.sample_prior_predictive(1000)
In [16]:
thinkplot.Cdf(Cdf(trace['r']));
In [17]:
thinkplot.Cdf(Cdf(trace['n']));
In [18]:
thinkplot.Cdf(Cdf(trace['k']));
In [19]:
with model:
    trace = pm.sample(1000, tune=3000)
Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>NUTS: [r]
>Metropolis: [n]
Sampling 4 chains: 100%|██████████| 16000/16000 [00:04<00:00, 3852.03draws/s]
The acceptance probability does not match the target. It is 0.46421389596339574, but should be close to 0.8. Try to increase the number of tuning steps.
The estimated number of effective samples is smaller than 200 for some parameters.
In [20]:
pm.traceplot(trace);
In [21]:
n_sample = trace['n']
thinkplot.Cdf(Cdf(n_sample))
Out[21]:
{'xscale': 'linear', 'yscale': 'linear'}
In [22]:
r_sample = trace['r']
thinkplot.Cdf(Cdf(r_sample))
Out[22]:
{'xscale': 'linear', 'yscale': 'linear'}
In [23]:
thinkplot.Cdf(suite.MakeCdf())
thinkplot.Cdf(Cdf(r_sample))
Out[23]:
{'xscale': 'linear', 'yscale': 'linear'}

Grid algorithm, version 2

In [24]:
# Solution

class Logistic(Suite, Joint):
    f = 0.1
    
    def Likelihood(self, data, hypo):
        """
        
        data: k, number of particles detected
        hypo: r, n
        """
        k = data
        r, n = hypo
        return EvalBinomialPmf(k, n, self.f)
In [25]:
rs = np.linspace(0, 300, 51);
In [26]:
suite = Logistic()

for r in rs:
    pmf = MakePoissonPmf(r, high=500)
    for n, p in pmf.Items():
        suite[r, n] += p
        
suite.Normalize()
Out[26]:
50.99999999999964
In [27]:
suite.Update(15)
Out[27]:
0.03262565933783306
In [28]:
pmf_r = suite.Marginal(0)
thinkplot.Pdf(pmf_r)
thinkplot.decorate(xlabel='Emission rate (particles/second)',
                   ylabel='PMF',
                   title='Posterior marginal distribution')
In [29]:
pmf_n = suite.Marginal(1)
thinkplot.Pdf(pmf_n)
thinkplot.decorate(xlabel='Number of particles (n)',
                   ylabel='PMF',
                   title='Posterior marginal distribution')

Hierarchical version, as in the book

In [30]:
class Detector(Suite):
    """Represents hypotheses about n."""

    def __init__(self, r, f, high=500):
        """Initializes the suite.

        r: known emission rate, r
        f: fraction of particles registered
        high: maximum number of particles, n
        """
        pmf = MakePoissonPmf(r, high)
        super().__init__(pmf)
        self.r = r
        self.f = f

    def Likelihood(self, data, hypo):
        """Likelihood of the data given the hypothesis.

        data: number of particles counted
        hypo: number of particles hitting the counter, n
        """
        k = data
        n = hypo

        return EvalBinomialPmf(k, n, self.f)
In [31]:
r = 160
k = 15
f = 0.1

suite = Detector(r, f);
In [32]:
suite.Update(15)
Out[32]:
0.09921753162215896
In [33]:
class Emitter(Suite):
    """Represents hypotheses about r."""

    def Likelihood(self, data, hypo):
        """Likelihood of the data given the hypothesis.

        data: number of counted per unit time
        hypo: Detector object
        """
        return hypo.Update(data)
In [34]:
rs = np.linspace(0, 300, 51);
In [35]:
detectors = [Detector(r, f=0.1) for r in rs[1:]]
suite = Emitter(detectors);
In [36]:
suite.Update(15)
Out[36]:
0.03327817252458951
In [37]:
pmf_r = Pmf()
for detector, p in suite.Items():
    pmf_r[detector.r] = p
    
thinkplot.Pdf(pmf_r)
In [38]:
mix = MakeMixture(suite);
In [39]:
thinkplot.Pdf(mix)
In [ ]: