import pymc3 as pm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
%qtconsole --colors=linux
plt.style.use('ggplot')
from matplotlib import gridspec
from theano import tensor as tt
from scipy import stats
The Pearson product-moment correlation coefficient, usually denoted r, is a widely used measure of the relationship between two variables. It ranges from −1, indicating a perfect negative linear relationship, to +1, indicating a perfect positive relationship. A value of 0 indicates that there is no linear relationship. Usually the correlation r is reported as a single point estimate, perhaps together with a frequentist significance test.
But, rather than just having a single number to measure the correlation, it would be nice to have a posterior distribution for r, saying how likely each possible level of correlation was. There are frequentist confidence interval methods that try to do this, as well as various analytic Bayesian results based on asymptotic approximations (e.g., Donner & Wells, 1986).
Inferring the mean and variance of a Gaussian distribution.
$$ \mu_{1},\mu_{2} \sim \text{Gaussian}(0, .001) $$
$$ \sigma_{1},\sigma_{2} \sim \text{InvSqrtGamma} (.001, .001) $$
$$ r \sim \text{Uniform} (-1, 1) $$
$$ x_{i} \sim \text{MvGaussian} ((\mu_{1},\mu_{2}), \begin{bmatrix}\sigma_{1}^2 & r\sigma_{1}\sigma_{2}\\r\sigma_{1}\sigma_{2} & \sigma_{2}^2\end{bmatrix}^{-1}) $$
The observed data take the form xi = (xi1, xi2) for the ith observation, and, following the theory behind the correlation coefficient, are modeled as draws from a multivariate Gaussian distribution. The parameters of this distribution are the means μ = (μ1,μ2) and standard deviations σ = (σ1,σ2) of the two variables, and the correlation coefficient r that links them.
# The datasets:
y = np.array([.8, 102, 1,98, .5,100, 0.9,105, .7,103,
0.4,110, 1.2,99, 1.4,87, 0.6,113, 1.1,89, 1.3,93]).reshape((11, 2))
# y = np.array([.8,102, 1,98, .5,100, 0.9,105, .7,103,
# 0.4,110, 1.2,99, 1.4,87, 0.6,113, 1.1,89, 1.3,93,
# .8,102, 1,98, .5,100, 0.9,105, .7,103,
# 0.4,110, 1.2,99, 1.4,87, 0.6,113, 1.1,89, 1.3,93]).reshape((22, 2))
with pm.Model() as model:
# r∼Uniform(−1,1)
r = pm.Uniform('r', lower=-1, upper=1)
# μ1,μ2∼Gaussian(0,.001)
mu = pm.Normal('mu', mu=0, tau=.001, shape=2)
# σ1,σ2∼InvSqrtGamma(.001,.001)
lambda1 = pm.Gamma('lambda1', alpha=.001, beta=.001)
lambda2 = pm.Gamma('lambda2', alpha=.001, beta=.001)
sigma1 = pm.Deterministic('sigma1', 1/np.sqrt(lambda1))
sigma2 = pm.Deterministic('sigma2', 1/np.sqrt(lambda2))
cov = pm.Deterministic('cov', tt.stacklists([[lambda1**-1, r*sigma1*sigma2],
[r*sigma1*sigma2, lambda2**-1]]))
# xi∼MvGaussian((μ1,μ2),[σ1^2,rσ1σ2;rσ1σ2,σ2^2]^−1)
yd = pm.MvNormal('yd', mu=mu, cov=cov, observed=y,shape=2)
trace2=pm.sample(3e3, njobs=2)
pm.traceplot(trace2[50:], varnames=['r', 'mu', 'sigma1', 'sigma2']);
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 3500/3500.0 [00:37<00:00, 93.48it/s]
from matplotlib import gridspec
from scipy.stats.kde import gaussian_kde
import scipy as scipy
r = trace2['r'][50:]
freqr = scipy.corrcoef(y[:, 0], y[:, 1])
print('The r estimation is: ', np.mean(r, axis=0))
print('The Pearson correlation r is: ', freqr[0, 1])
fig = plt.figure(figsize=(12, 4))
gs = gridspec.GridSpec(1, 2, width_ratios=[1, 1])
ax0 = plt.subplot(gs[0])
my_pdf = gaussian_kde(r)
x=np.linspace(-1, 1, 200)
ax0.plot(x, my_pdf(x), 'r') # distribution function
ax0.hist(r, bins=100, normed=1, alpha=.3)
ax0.plot([freqr[0, 1], freqr[0, 1]], [0, 3.5], 'k', lw=2.5)
plt.xlabel('Pearson correlation')
plt.ylabel('Posterior Density')
ax1 = plt.subplot(gs[1])
ax1.scatter(y[:, 0], y[:, 1]);
The r estimation is: -0.695576221086 The Pearson correlation r is: -0.810967075636
$$ y_{i} \sim \text{MvGaussian} ((\mu_{1},\mu_{2}), \begin{bmatrix}\sigma_{1}^2 & r\sigma_{1}\sigma_{2}\\r\sigma_{1}\sigma_{2} & \sigma_{2}^2\end{bmatrix}^{-1}) $$ $$ x_{ij} \sim \text{Gaussian}(y_{ij},\lambda_{j}^e) $$
# The datasets:
y = np.array([.8, 102, 1,98, .5,100, 0.9,105, .7,103,
0.4,110, 1.2,99, 1.4,87, 0.6,113, 1.1,89, 1.3,93]).reshape((11, 2))
# y = np.array([.8,102, 1,98, .5,100, 0.9,105, .7,103,
# 0.4,110, 1.2,99, 1.4,87, 0.6,113, 1.1,89, 1.3,93,
# .8,102, 1,98, .5,100, 0.9,105, .7,103,
# 0.4,110, 1.2,99, 1.4,87, 0.6,113, 1.1,89, 1.3,93]).reshape((22, 2))
n1, n2 = np.shape(y)
sigmaerror = np.array([.03, 1])
se = np.tile(sigmaerror, (n1, 1))
with pm.Model() as model2:
# r∼Uniform(−1,1)
r = pm.Uniform('r', lower=-1, upper=1)
# μ1,μ2∼Gaussian(0,.001)
mu = pm.Normal('mu', mu=0, tau=.001, shape=n2)
# σ1,σ2∼InvSqrtGamma(.001,.001)
lambda1 = pm.Gamma('lambda1', alpha=.001, beta=.001)
lambda2 = pm.Gamma('lambda2', alpha=.001, beta=.001)
sigma1 = pm.Deterministic('sigma1', 1/np.sqrt(lambda1))
sigma2 = pm.Deterministic('sigma2', 1/np.sqrt(lambda2))
cov = pm.Deterministic('cov', tt.stacklists([[lambda1**-1, r*sigma1*sigma2],
[r*sigma1*sigma2, lambda2**-1]]))
# xi∼MvGaussian((μ1,μ2),[σ1^2,rσ1σ2;rσ1σ2,σ2^2]^−1)
yd = pm.MvNormal('yd', mu=mu, cov=cov, shape=(n1, n2))
xi = pm.Normal('xi', mu=yd , sd=sigmaerror, observed=y)
trace2=pm.sample(1e3, njobs=2)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 1500/1500.0 [00:32<00:00, 46.12it/s]
pm.traceplot(trace2, varnames=['r', 'mu', 'sigma1', 'sigma2']);
r = trace2['r']
freqr=scipy.corrcoef(y[:, 0], y[:, 1])
print('The r estimation is: ', np.mean(r, axis=0))
print('The Pearson correlation r is: ', freqr[0, 1])
fig = plt.figure(figsize=(12, 4))
gs = gridspec.GridSpec(1, 2, width_ratios=[1, 1])
ax0 = plt.subplot(gs[0])
my_pdf = gaussian_kde(r)
x=np.linspace(-1, 1, 200)
ax0.plot(x, my_pdf(x), 'r') # distribution function
ax0.hist(r, bins=100, normed=1, alpha=.3)
ax0.plot([freqr[0, 1], freqr[0, 1]], [0, 3.5], 'k', lw=2.5)
plt.xlabel('Pearson correlation')
plt.ylabel('Posterior Density')
ax1 = plt.subplot(gs[1])
ax1.scatter(y[:, 0], y[:, 1]);
The r estimation is: -0.70135557984 The Pearson correlation r is: -0.810967075636
A measurement to decide how well one decision-making method agrees with another. For this problem, when both decision-making methods make n independent assessments, the data y take the form of four counts: a observations where both methods decide “one,” b observations where the objective method decides “one” but the surrogate method decides “zero,” c observations where the objective method decides “zero” but the surrogate method decides “one,” and d observations where both methods decide “zero,” with n = a + b + c + d.
$$ \kappa = (\xi-\psi)/(1-\psi) $$$$ \xi = \alpha\beta + (1-\alpha) \gamma $$$$ \psi = (\pi_{a}+\pi_{b})(\pi_{a}+\pi_{c})+(\pi_{b}+\pi_{d})(\pi_{c}+\pi_{d}) $$$$ \alpha,\beta,\gamma \sim \text{Beta} (1, 1) $$$$ \pi_{a} = \alpha\beta $$
$$ \pi_{b} = \alpha(1-\beta) $$
$$ \pi_{c} = (1-\alpha)(1-\gamma) $$
$$ \pi_{d} = (1-\alpha)\gamma $$
$$ y \sim \text{Multinomial} ([\pi_{a},\pi_{b},\pi_{c},\pi_{d}],n) $$
# CHOOSE a data set:
# Influenza
y = np.array([14, 4, 5, 210])
# Hearing Loss
# y = np.array([20, 7, 103, 417])
# Rare Disease
# y = np.array([0, 0, 13, 157])
with pm.Model() as model1:
# prior
alpha = pm.Beta('alpha', alpha=1, beta=1)
beta = pm.Beta('beta', alpha=1, beta=1)
gamma = pm.Beta('gamma', alpha=1, beta=1)
pi1 = alpha*beta
pi2 = alpha*(1-beta)
pi3 = (1-alpha)*(1-gamma)
pi4 = (1-alpha)*gamma
# Derived Measures
# Rate Surrogate Method Agrees With the Objective Method
xi = alpha*beta+(1-alpha)*gamma
yd = pm.Multinomial('yd', n=y.sum(), p=[pi1, pi2, pi3, pi4], observed=y)
# Rate of Chance Agreement
psi = pm.Deterministic('psi', (pi1+pi2)*(pi1+pi3)+(pi2+pi4)*(pi3+pi4))
# Chance-Corrected Agreement
kappa = pm.Deterministic('kappa', (xi-psi)/(1-psi))
trace=pm.sample(1e3, njobs=2)
pm.traceplot(trace, varnames=['kappa', 'psi']);
# Compare to Cohen's point estimate
n = y.sum()
p0 = (y[0]+y[3])/n
pe = (((y[0]+y[1]) * (y[0]+y[2])) + ((y[1]+y[3]) * (y[2]+y[3]))) / n**2
kappa_Cohen = (p0-pe) / (1-pe)
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/theano/tensor/basic.py:2105: UserWarning: theano.tensor.round() changed its default from `half_away_from_zero` to `half_to_even` to have the same default as NumPy. Use the Theano flag `warn.round=False` to disable this warning. "theano.tensor.round() changed its default from" Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 1500/1500.0 [00:01<00:00, 807.39it/s]
import matplotlib.pyplot as plt
from scipy.stats.kde import gaussian_kde # for plotting: to calculate a continuous
# approximation of the posterior and prior densities.
kappa = trace['kappa']
my_pdf = gaussian_kde(kappa)
x = np.linspace(0.2, 1, 200)
plt.plot(x, my_pdf(x), 'r') # distribution function
plt.hist(kappa, bins=100, normed=1, alpha=.3)
plt.plot([kappa_Cohen, kappa_Cohen], [0, 6], 'k', lw=2.5)
plt.xlabel('kappa')
plt.ylabel('Posterior Density');
print("Cohen's point estimate: ", kappa_Cohen)
print("MCMC estimate: ", np.mean(kappa))
Cohen's point estimate: 0.735794380748 MCMC estimate: 0.696304224857
Detecting a single change point in time series.
$$ \mu_{1},\mu_{2} \sim \text{Gaussian}(0, .001) $$$$ \lambda \sim \text{Gamma} (.001, .001) $$$$ \tau \sim \text{Uniform} (0, t_{max}) $$$$ c_{i} \sim \begin{cases} \text{Gaussian}(\mu_{1}, \lambda), & \text{if $t_{i} \lt \tau$} \\ \text{Gaussian}(\mu_{2}, \lambda), & \text{if $t_{i} \ge \tau$} \end{cases} $$
# pymc3
# Load data
dat = pd.read_csv('changepointdata.csv')
c = dat.data
n = np.size(c)
sample = np.arange(0, n)
with pm.Model() as model3:
# μ1,μ2∼Gaussian(0,.001)
mu = pm.Normal('mu', mu=0, tau=.001, shape=2)
# λ∼Gamma(.001,.001)
lambd = pm.Gamma('lambd', alpha=.001, beta=.001)
# τ∼Uniform(0,tmax)
tau = pm.DiscreteUniform('tau', lower=0, upper=n)
muvect = pm.math.switch(tau>=sample, mu[0], mu[1])
cobs = pm.Normal('cobs', mu=muvect, tau=lambd, observed=c)
trace1=pm.sample(1e3, njobs=2)
pm.traceplot(trace1, varnames=['mu', 'tau']);
Assigned NUTS to mu Assigned NUTS to lambd_log__ Assigned Metropolis to tau 100%|██████████| 1500/1500.0 [00:02<00:00, 605.96it/s] /Users/jlao/Documents/Github/pymc3/pymc3/step_methods/hmc/nuts.py:452: UserWarning: The acceptance probability in chain 1 does not match the target. It is 0.911090496441, but should be close to 0.8. Try to increase the number of tuning steps. % (self._chain_id, mean_accept, target_accept))
MCtau = trace1['tau'][100:]
MCmu = trace1['mu'][100:]
mu1 = np.mean(MCmu, axis = 0)
tau1 = int(np.mean(MCtau))
fited = np.zeros(n)
fited[:tau1] = np.ones(tau1)*mu1[0]
fited[tau1:] = np.ones(n-tau1)*mu1[1]
plt.figure(1, figsize=(15, 9))
plt.plot(np.arange(0, n, 1), c, color='k')
plt.plot(np.arange(0, n, 1), fited, color='r', lw=2.5)
plt.xlabel("Samples")
plt.ylabel("Values");
print (mu1)
print (tau1)
[ 37.86792845 30.57069104] 731
After 949 failed attempts, Cha Sa-Soon from Korea finally passed her theoretical drivers exam. What can we infer about theta, the probability of answering any one question correctly?
$$ \theta \sim \text{Uniform}(0.25, 1) $$$$ z_{i} \sim \text{Binomial}(\theta, n) $$$$ 15 \le z_{i} \le 25, \text{if} \; y_{i} = 1 $$nattempts = 950
nfails = 949
n = 50 # Number of questions
y = np.zeros(nattempts)
y[nattempts-1] = 1
z = 30
unobsmin = 15
unobsmax = 25
unobsrange = np.arange(unobsmin, unobsmax+1)
from pymc3.distributions import dist_math
def logp(value, n, p):
return dist_math.bound(dist_math.binomln(n, value) +
dist_math.logpow(p, value) +
dist_math.logpow(1-p, n-value),
0 <= value, value <= n,
0 <= p, p <= 1)
def Censorlike(value=z, n=n, censorn=nfails, unobs=unobsrange):
ylikeobs = logp(value=value, n=n, p=theta)
ylikeunobs = 0
for i in unobs:
ylikeunobs += logp(value=i, n=n, p=theta)
return ylikeobs+ylikeunobs*censorn
with pm.Model():
theta = pm.Uniform('theta', lower=.25, upper=1)
ylike = pm.DensityDist('ylike', Censorlike,
observed={'value': z, 'n': n, 'censorn': nfails, 'unobs': unobsrange})
trace = pm.sample(3e3, njobs=2)
pm.traceplot(trace);
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 99%|█████████▉| 3463/3500.0 [00:02<00:00, 1043.60it/s]/Users/jlao/Documents/Github/pymc3/pymc3/step_methods/hmc/nuts.py:452: UserWarning: The acceptance probability in chain 0 does not match the target. It is 0.888622685346, but should be close to 0.8. Try to increase the number of tuning steps. % (self._chain_id, mean_accept, target_accept)) 100%|██████████| 3500/3500.0 [00:02<00:00, 1258.55it/s] /Users/jlao/Documents/Github/pymc3/pymc3/step_methods/hmc/nuts.py:452: UserWarning: The acceptance probability in chain 1 does not match the target. It is 0.901444297128, but should be close to 0.8. Try to increase the number of tuning steps. % (self._chain_id, mean_accept, target_accept))
$$ t \sim \text{Categorical}(\alpha) $$
import matplotlib.pyplot as plt
import pymc as pymc
import numpy as np
%matplotlib inline
x = 10 # number of captures
k = 4 # number of recaptures from n
n = 5 # size of second sample
tmax = 50 # maximum population size
tau = pymc.DiscreteUniform("tau", lower = x + (n-k), upper = tmax)
kobs = pymc.Hypergeometric("kobs", N = tau, n = n, m = x, value = k, observed=True)
testmodel = pymc.Model([kobs,tau])
mcmc = pymc.MCMC(testmodel)
mcmc.sample(iter = 20000, burn = 50, thin = 2)
pymc.Matplot.plot(mcmc)
tau = mcmc.trace("tau")[:]
plt.figure(figsize=(12, 6))
plt.hist(tau, bins=tmax - (x + (n-k)), normed=1,alpha=.5,color='k')
plt.xlabel('Number of Planes')
plt.ylabel('Posterior Mass');
[-----------------100%-----------------] 20000 of 20000 complete in 1.1 secPlotting tau
# pymc3 - Hypergeometric Likelihood Via Zeros Trick as in JAGS
x = 10 # number of captures
k = 4 # number of recaptures from n
n = 5 # size of second sample
tmax = 50 # maximum population size
factln = dist_math.factln
with pm.Model():
tau = pm.DiscreteUniform("tau", lower=x + (n-k), upper=tmax)
logterm1 = factln(x+.01) - factln(k+.01) - factln(x-k+.01)
logterm2 = factln(tau-x+.01) - factln(n-k+.01) - factln((tau-x)-(n-k)+.01)
logterm3 = factln(tau+.01) - factln(n+.01) - factln(tau-n+.01)
C = 1000
phi = -(logterm1+logterm2-logterm3)+C
obs = pm.Poisson("obs", mu=phi, observed=0)
trace = pm.sample(1e3)
tau = trace['tau']
plt.figure(figsize=(12, 6))
plt.hist(tau, bins=tmax - (x + (n-k)), normed=1,alpha=.5,color='k')
plt.xlabel('Number of Planes')
plt.ylabel('Posterior Mass');
Assigned Metropolis to tau 100%|██████████| 1500/1500.0 [00:00<00:00, 5332.16it/s]