Image('backbox_ml.png')
Image('openbox_pp.png')
Moreover...
stats.beta(2,2).rvs(1)
array([ 0.37833632])
import random
import numpy as np
def choose_coin():
return stats.beta(2, 2).rvs(1) # random.uniform(0,1) # np.random.normal(0,1) #
# pylab.hist([choose_coin() for dummy in range(100000)], normed=True, bins=100)
successes = []
returns = 0.
for i in range(10000):
prob_heads = choose_coin()
results = [random.uniform(0,1) < prob_heads for dummy in range(10)]
if results.count(True) == 9:
successes.append(prob_heads)
if random.uniform(0,1) < prob_heads:
returns += -1
else:
returns += 1
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111)
r = ax.hist(np.array(successes), normed=True, bins=20)
print len(successes)
print "Average return", returns / len(successes)
719 Average return -0.613351877608
Image('coin3.png')
Image('coin4.png')
$\theta$: Parameters of model (chance of getting heads)).
Image('wantget.png')
Image('synth_data.png')
with $$ \epsilon \sim \mathcal{N}(0, \sigma^2) $$
import pymc as pm
with pm.Model() as model: # model specifications in PyMC3 are wrapped in a with-statement
# Define priors
alpha = pm.Normal('alpha', mu=0, sd=20)
beta = pm.Normal('beta', mu=0, sd=20)
sigma = pm.Uniform('sigma', lower=0, upper=20)
# Define linear regression
y_est = alpha + beta * x
# Define likelihood
likelihood = pm.Normal('y', mu=y_est, sd=sigma, observed=y)
# Inference!
start = pm.find_MAP() # Find starting value by optimization
step = pm.NUTS(state=start) # Instantiate MCMC sampling algorithm
trace = pm.sample(2000, step, start=start, progressbar=False) # draw 2000 posterior samples using NUTS sampling
/Users/alexcoventry/Library/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/theano/scan_module/scan_perform_ext.py:85: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility from scan_perform.scan_perform import *
with pm.Model() as model:
# specify glm and pass in data. The resulting linear model, its likelihood and
# and all its parameters are automatically added to our model.
pm.glm.glm('y ~ x', data)
step = pm.NUTS() # Instantiate MCMC sampling algorithm
trace = pm.sample(2000, step, progressbar=False) # draw 2000 posterior samples using NUTS sampling
fig = pm.traceplot(trace, lines={'alpha': 1, 'beta': 2, 'sigma': .5});
Image('ppc1.png')
# Add outliers
x_out = np.append(x, [.1, .15, .2, .25, .25])
y_out = np.append(y, [8, 6, 9, 7, 9])
data_out = dict(x=x_out, y=y_out)
fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(111, xlabel='Value of gold', ylabel='Value of gold miners', title='Posterior predictive regression lines')
ppl.scatter(ax, x_out, y_out, label='data')
<matplotlib.axes.AxesSubplot at 0x118ba0150>
with pm.Model() as model:
glm.glm('y ~ x', data_out)
trace = pm.sample(2000, pm.NUTS(), progressbar=False)
Image('ppc2.png')
Image('t-dist.png')
with pm.Model() as model_robust:
family = pm.glm.families.T()
pm.glm.glm('y ~ x', data_out, family=family)
trace_robust = pm.sample(2000, pm.NUTS(), progressbar=False)
Image('ppc3.png')
import zipline
import pytz
from datetime import datetime
fig = plt.figure(figsize=(8, 4))
prices = zipline.data.load_from_yahoo(stocks=['GLD', 'GDX'],
end=datetime(2013, 8, 1, 0, 0, 0, 0, pytz.utc)).dropna()[:1000]
prices.plot();
GLD GDX
<matplotlib.figure.Figure at 0x123e0d210>
Image('price_corr.png')
with pm.Model() as model_reg:
family = pm.glm.families.Normal()
pm.glm.glm('GLD ~ GDX', prices, family=family)
trace_reg = pm.sample(2000, pm.NUTS(), progressbar=False)
Image('ppc4.png')
model_randomwalk = pm.Model()
with model_randomwalk:
# std of random walk, best sampled in log space.
sigma_alpha, log_sigma_alpha = model_randomwalk.TransformedVar(
'sigma_alpha',
pm.Exponential.dist(1./.02, testval = .1),
pm.logtransform
)
sigma_beta, log_sigma_beta = model_randomwalk.TransformedVar(
'sigma_beta',
pm.Exponential.dist(1./.02, testval = .1),
pm.logtransform
)
# To make the model simpler, we will apply the same coefficient for 50 data points at a time
subsample_alpha = 50
subsample_beta = 50
with model_randomwalk:
alpha = GaussianRandomWalk('alpha', sigma_alpha**-2,
shape=len(prices) / subsample_alpha)
beta = GaussianRandomWalk('beta', sigma_beta**-2,
shape=len(prices) / subsample_beta)
# Make coefficients have the same length as prices
alpha_r = repeat(alpha, subsample_alpha)
beta_r = repeat(beta, subsample_beta)
with model_randomwalk:
# Define regression
regression = alpha_r + beta_r * prices.GDX.values
# Assume prices are Normally distributed, the mean comes from the regression.
sd = pm.Uniform('sd', 0, 20)
likelihood = pm.Normal('y',
mu=regression,
sd=sd,
observed=prices.GLD.values)
from scipy import optimize
with model_randomwalk:
# First optimize random walk
start = pm.find_MAP(vars=[alpha, beta], fmin=optimize.fmin_l_bfgs_b)
# Sample
step = pm.NUTS(scaling=start)
trace_rw = pm.sample(2000, step, start=start, progressbar=False)
Image('rwalk_alpha.png')
Image('rwalk_beta.png')
Image('ppc5.png')
Scalability
Usability