%matplotlib inline
from sklearn.datasets import load_boston
import numpy as np
import pymc as pm
import pandas as pd
boston = load_boston()
features = ['INDUS', 'NOX', 'RM', 'TAX', 'PTRATIO', 'LSTAT']
df = pd.DataFrame(boston.data, columns=boston.feature_names)
X = np.array(df.ix[:, features])
y = boston.target
gamma = pm.Binomial('gamma', 1, 0.5, size=len(features))
var = pm.Lambda('var', lambda gamma=gamma: (1-gamma)*0.001 + gamma*10)
prec = pm.Lambda('prec', lambda var=var: 1.0/var)
b = pm.Normal('b', 0, prec)
int_ = pm.Normal('int_', 0, 0.01)
taue = pm.Gamma('taue', 0.1, 0.1)
mu = int_ + X[:,0]*b[0] + X[:,1]*b[1] + X[:,2]*b[2] + X[:,3]*b[3] + X[:,4]*b[4] + X[:,5]*b[5]
observed = pm.Normal('obs', mu, taue, observed=True, value=y)
M = pm.MCMC([observed, mu, int_, b, prec, taue, var, gamma])
M.sample(10000, 500, 5)
[-----------------100%-----------------] 10000 of 10000 complete in 3.5 sec
int_.stats()
{'95% HPD interval': array([-82.23023941, 34.05807169]), 'mc error': 4.0775156170050106, 'mean': -9.3811298932046085, 'n': 1900, 'quantiles': {2.5: -82.182744891019681, 25: -43.780936763079715, 50: 7.0756300003831951, 75: 24.127800859958935, 97.5: 34.250495843107721}, 'standard deviation': 40.788646352591847}
pm.Matplot.plot(int_)
Plotting int_
gamma.stats()
{'95% HPD interval': array([[ 0., 1., 1., 0., 0., 0.], [ 1., 1., 1., 1., 1., 1.]]), 'mc error': array([ 0.02982943, 0. , 0.01438836, 0.01606565, 0.03076783, 0.02588083]), 'mean': array([ 0.38315789, 1. , 0.95157895, 0.06947368, 0.88631579, 0.79684211]), 'n': 1900, 'quantiles': {2.5: array([ 0., 1., 0., 0., 0., 0.]), 25: array([ 0., 1., 1., 0., 1., 1.]), 50: array([ 0., 1., 1., 0., 1., 1.]), 75: array([ 1., 1., 1., 0., 1., 1.]), 97.5: array([ 1., 1., 1., 1., 1., 1.])}, 'standard deviation': array([ 0.48615627, 0. , 0.21465427, 0.25425792, 0.31742733, 0.40234906])}
pm.Matplot.plot(gamma)
Plotting gamma_0 Plotting gamma_1 Plotting gamma_2 Plotting gamma_3 Plotting
pm.Matplot.plot(var)
pm.Matplot.plot(prec)
pm.Matplot.plot(b)
pm.Matplot.plot(taue)