import pandas as pd
import seaborn as sns
import torch
import pyro
import pyro.distributions as dist
import pyro.ops.stats as stats
import rethinking
PrPV = 0.95
PrPM = 0.01
PrV = 0.001
PrP = PrPV * PrV + PrPM * (1 - PrV)
PrVP = PrPV * PrV / PrP
PrVP
0.08683729433272395
p_grid = torch.linspace(start=0, end=1, steps=1000)
prior = torch.tensor(1.).repeat(1000)
likelihood = dist.Binomial(total_count=9,
probs=p_grid).log_prob(torch.tensor(6.)).exp()
posterior = likelihood * prior
posterior = posterior / sum(posterior)
samples = dist.Empirical(p_grid, posterior.log()).sample(torch.Size([int(1e4)]))
ax = sns.scatterplot(range(len(samples)), samples, s=80, alpha=0.4, edgecolor="none")
ax.set(xlabel="sample number", ylabel="proportion water (p)");
ax = sns.distplot(samples)
ax.set(xlabel="proportion water (p)", ylabel="Density");
# add up posterior probability where p < 0.5
(posterior[p_grid < 0.5]).sum()
tensor(0.1719)
(samples < 0.5).sum().float() / 1e4
tensor(0.1698)
((samples > 0.5) & (samples < 0.75)).sum().float() / 1e4
tensor(0.6057)
stats.quantile(samples, 0.8)
tensor(0.7618)
stats.quantile(samples, [0.1, 0.9])
tensor([0.4515, 0.8148])
p_grid = torch.linspace(start=0, end=1, steps=1000)
prior = torch.tensor(1.).repeat(1000)
likelihood = dist.Binomial(total_count=3,
probs=p_grid).log_prob(torch.tensor(3.)).exp()
posterior = likelihood * prior
posterior = posterior / posterior.sum()
samples = dist.Empirical(p_grid, posterior.log()).sample(torch.Size([int(1e4)]))
stats.pi(samples, prob=0.5)
tensor([0.7107, 0.9329])
stats.hpdi(samples, prob=0.5)
tensor([0.8448, 1.0000])
p_grid[posterior.argmax()]
tensor(1.)
adj = 0.01
silverman_factor = (0.75 * samples.size(0)) ** (-0.2)
bandwidth = adj * silverman_factor * samples.std()
x = torch.linspace(samples.min(), samples.max(), 1000)
y = dist.Normal(samples, bandwidth).log_prob(x.unsqueeze(-1)).logsumexp(-1).exp()
x[y.argmax()]
tensor(0.9880)
print(samples.mean())
print(samples.median())
tensor(0.8023) tensor(0.8448)
(posterior * (0.5 - p_grid).abs()).sum()
tensor(0.3129)
loss = (posterior * (p_grid.unsqueeze(1) - p_grid).abs()).sum(1)
p_grid[loss.argmin()]
tensor(0.8408)
dist.Binomial(total_count=2, probs=0.7).log_prob(torch.arange(3.)).exp()
tensor([0.0900, 0.4200, 0.4900])
dist.Binomial(total_count=2, probs=0.7).sample().long()
tensor(2)
dist.Binomial(total_count=2, probs=0.7).sample(torch.Size([10])).long()
tensor([1, 2, 1, 1, 2, 2, 0, 1, 1, 0])
dummy_w = dist.Binomial(total_count=2, probs=0.7).sample(torch.Size([int(1e5)]))
dummy_w.long().bincount().float() / 1e5
tensor([0.0918, 0.4171, 0.4911])
dummy_w = dist.Binomial(total_count=9, probs=0.7).sample(torch.Size([int(1e5)]))
ax = sns.distplot(dummy_w, kde=False)
ax.set(xlabel="dummy water count", ylabel="Frequency");
w = dist.Binomial(total_count=9, probs=0.7).sample(torch.Size([int(1e4)]))
w = dist.Binomial(total_count=9, probs=samples).sample()
p_grid = torch.linspace(start=0, end=1, steps=1000)
prior = torch.tensor(1.).repeat(1000)
likelihood = dist.Binomial(total_count=3,
probs=p_grid).log_prob(torch.tensor(3.)).exp()
posterior = likelihood * prior
posterior = posterior / posterior.sum()
torch.manual_seed(100)
samples = dist.Empirical(p_grid, posterior.log()).sample(torch.Size([int(1e4)]))
birth1 = [1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1,
0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1]
birth2 = [0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1,
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0]
homeworkch3 = pd.read_csv("../data/homeworkch3.csv")
sum(birth1) + sum(birth2)
111