%matplotlib inline
import pymc3 as pm
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_format = 'retina'
plt.style.use(['seaborn-colorblind', 'seaborn-darkgrid'])
PrPV = 0.95
PrPM = 0.01
PrV = 0.001
PrP = PrPV * PrV + PrPM * (1 - PrV)
PrVP = PrPV * PrV / PrP
PrVP
0.08683729433272395
We are goint to use the same function we use on chapter 2 (code 2.3)
def posterior_grid_approx(grid_points=100, success=6, tosses=9):
"""
"""
# define grid
p_grid = np.linspace(0, 1, grid_points)
# define prior
prior = np.repeat(5, grid_points) # uniform
#prior = (p_grid >= 0.5).astype(int) # truncated
#prior = np.exp(- 5 * abs(p_grid - 0.5)) # double exp
# compute likelihood at each point in the grid
likelihood = stats.binom.pmf(success, tosses, p_grid)
# compute product of likelihood and prior
unstd_posterior = likelihood * prior
# standardize the posterior, so it sums to 1
posterior = unstd_posterior / unstd_posterior.sum()
return p_grid, posterior
p_grid, posterior = posterior_grid_approx(grid_points=100, success=6, tosses=9)
samples = np.random.choice(p_grid, p=posterior, size=int(1e4), replace=True)
_, (ax0, ax1) = plt.subplots(1,2, figsize=(12,6))
ax0.plot(samples, 'o', alpha=0.2)
ax0.set_xlabel('sample number', fontsize=14)
ax0.set_ylabel('proportion water (p)', fontsize=14)
sns.kdeplot(samples, ax=ax1)
ax1.set_xlabel('proportion water (p)', fontsize=14)
ax1.set_ylabel('density', fontsize=14);
sum(posterior[ p_grid < 0.5 ])
0.17183313110747478
sum( samples < 0.5 ) / 1e4
0.1747
sum((samples > 0.5) & (samples < 0.75)) / 1e4
0.6089
np.percentile(samples, 80)
0.7575757575757577
np.percentile(samples, [10, 90])
array([0.44444444, 0.80808081])
p_grid, posterior = posterior_grid_approx(success=3, tosses=3)
plt.plot(p_grid, posterior)
plt.xlabel('proportion water (p)', fontsize=14)
plt.ylabel('Density', fontsize=14);
samples = np.random.choice(p_grid, p=posterior, size=int(1e4), replace=True)
np.percentile(samples, [25, 75])
array([0.70707071, 0.93939394])
pm.hpd(samples, alpha=0.5)
array([0.82828283, 0.98989899])
p_grid[posterior == max(posterior)]
array([1.])
stats.mode(samples)[0]
array([0.98989899])
np.mean(samples), np.median(samples)
(0.8004969696969696, 0.8383838383838385)
sum(posterior * abs(0.5 - p_grid))
0.31626874808692995
loss = [sum(posterior * abs(p - p_grid)) for p in p_grid]
p_grid[loss == min(loss)]
array([0.84848485])
stats.binom.pmf(range(3), n=2, p=0.7)
array([0.09, 0.42, 0.49])
stats.binom.rvs(n=2, p=0.7, size=1)
array([1])
stats.binom.rvs(n=2, p=0.7, size=10)
array([2, 1, 1, 1, 1, 1, 1, 1, 2, 2])
dummy_w = stats.binom.rvs(n=2, p=0.7, size=int(1e5))
[(dummy_w == i).mean() for i in range(3)]
[0.09013, 0.41897, 0.4909]
dummy_w = stats.binom.rvs(n=9, p=0.7, size=int(1e5))
#dummy_w = stats.binom.rvs(n=9, p=0.6, size=int(1e4))
#dummy_w = stats.binom.rvs(n=9, p=samples)
plt.hist(dummy_w, bins=50)
plt.xlabel('dummy water count', fontsize=14)
plt.ylabel('Frequency', fontsize=14);
p_grid, posterior = posterior_grid_approx(grid_points=100, success=6, tosses=9)
np.random.seed(100)
samples = np.random.choice(p_grid, p=posterior, size=int(1e4), replace=True)
birth1 = np.array([1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0, 0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0, 1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,1,1,1])
birth2 = np.array([0,1,0,1,0,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,
1,1,1,0,1,1,1,0,1,0,0,1,1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,
0,0,0,1,1,1,0,0,0,0])
sum(birth1) + sum(birth2)
111
import sys, IPython, scipy, matplotlib, platform
print("This notebook was createad on a computer %s running %s and using:\nPython %s\nIPython %s\nPyMC3 %s\nNumPy %s\nSciPy %s\nMatplotlib %s\nSeaborn %s\n" % (platform.machine(), ' '.join(platform.linux_distribution()[:2]), sys.version[:5], IPython.__version__, pm.__version__, np.__version__, scipy.__version__, matplotlib.__version__, sns.__version__))
This notebook was createad on a computer x86_64 running and using: Python 3.5.4 IPython 6.2.1 PyMC3 3.4.1 NumPy 1.14.5 SciPy 1.0.0 Matplotlib 2.2.2 Seaborn 0.8.1