### Exercise¶

1. Use Julia to simulate a vector of 1000 values for $\beta_{1}$ from a normal distribution with mean zero and variance 3. Plot a histogram of these values.

2. Use $\beta_{0}=1$, $\beta_{1}=2$ and $\sigma_{e}^{2}=5$, to generate a vector of observations, $\mathbf{y},$ that follows a simple linear regression model.

3. Use the Gibbs sampler to draw 10,000 samples for $\beta_{1}$ from its posterior distribution.

1. Compute the mean and variance of the sampled values.

2. Draw a histogram of the sampled values. Compare with prior.

## Simulation of Data¶

In [1]:
using Distributions
using StatsBase

In [11]:
n = 10 #number of observations
k = 1    #number of covariates

x = sample([0,1,2],(n,k))
X = [ones(n) x]

betaTrue = [1,2]
y = X*betaTrue+ randn(n)*sqrt(5);


### Calculations in Julia:¶

In [12]:
# loop for Gibbs sampler
niter = 10000 # number of samples
b     = [0.0, 0.0]
meanB = [0.0, 0.0]
a=Float64[]

varBeta = 3
varRes  = 5
λ       =varRes/varBeta

for iter = 1:niter

# sampling intercept
w = y - X[:,2] * b[2]
x = X[:,1]
xpxi = 1/(x'x)[1,1]
bHat = (xpxi*x'w)[1,1]
b[1] = rand(Normal(bHat, sqrt(xpxi*varRes))) # using residual var = 1

# sampling slope
w = y - X[:,1]*b[1]
x = X[:,2]
xpxi = 1/((x'x)[1,1]+λ)
bHat = (xpxi*x'w)[1,1]
b[2] = rand(Normal(bHat, sqrt(xpxi*varRes))) # using residual var = 1
meanB = meanB + b
push!(a,b[2])

if ((iter%1000) == 0)
@printf("Intercept = %6.3f \n", meanB[1]/iter)
@printf("Slope     = %6.3f \n", meanB[2]/iter)
end
end

Intercept =  1.625
Slope     =  1.285
Intercept =  1.622
Slope     =  1.287
Intercept =  1.653
Slope     =  1.266
Intercept =  1.617
Slope     =  1.287
Intercept =  1.613
Slope     =  1.296
Intercept =  1.620
Slope     =  1.292
Intercept =  1.636
Slope     =  1.283
Intercept =  1.637
Slope     =  1.282
Intercept =  1.633
Slope     =  1.283
Intercept =  1.638
Slope     =  1.279

In [6]:
using Gadfly

In [7]:
plot(x=randn(1000)*sqrt(3), Geom.histogram,
Guide.title("Prior distribution of β1"),
Guide.ylabel("Frequency"),
Guide.xlabel("β1"))

Out[7]:
In [13]:
mean(a)

Out[13]:
1.278652924515963
In [14]:
var(a)

Out[14]:
0.6829740692336969
In [15]:
plot(x=a, Geom.histogram,
Guide.title("Posterior distribution of β1 with n=10"),
Guide.ylabel("Frequency"),
Guide.xlabel("β1"))

Out[15]:
In [17]:
n = 1000 #number of observations
k = 1    #number of covariates

x = sample([0,1,2],(n,k))
X = [ones(n) x]

betaTrue = [1,2]
y = X*betaTrue+ randn(n)*sqrt(5);
# loop for Gibbs sampler
niter = 10000 # number of samples
b     = [0.0, 0.0]
meanB = [0.0, 0.0]
a=Float64[]

varBeta = 3
varRes  = 5
λ       =varRes/varBeta

for iter = 1:niter

# sampling intercept
w = y - X[:,2] * b[2]
x = X[:,1]
xpxi = 1/(x'x)[1,1]
bHat = (xpxi*x'w)[1,1]
b[1] = rand(Normal(bHat, sqrt(xpxi*varRes))) # using residual var = 1

# sampling slope
w = y - X[:,1]*b[1]
x = X[:,2]
xpxi = 1/((x'x)[1,1]+λ)
bHat = (xpxi*x'w)[1,1]
b[2] = rand(Normal(bHat, sqrt(xpxi*varRes))) # using residual var = 1
meanB = meanB + b
push!(a,b[2])

if ((iter%1000) == 0)
@printf("Intercept = %6.3f \n", meanB[1]/iter)
@printf("Slope     = %6.3f \n", meanB[2]/iter)
end
end

Intercept =  0.904
Slope     =  2.043
Intercept =  0.893
Slope     =  2.051
Intercept =  0.894
Slope     =  2.050
Intercept =  0.894
Slope     =  2.051
Intercept =  0.892
Slope     =  2.052
Intercept =  0.891
Slope     =  2.053
Intercept =  0.891
Slope     =  2.053
Intercept =  0.893
Slope     =  2.052
Intercept =  0.894
Slope     =  2.051
Intercept =  0.894
Slope     =  2.051

In [18]:
mean(a)

Out[18]:
2.0506345429317
In [19]:
var(a)

Out[19]:
0.00798380824289942
In [21]:
plot(x=a, Geom.histogram,
Guide.title("Posterior distribution of β1 with n=1000"),
Guide.ylabel("Frequency"),
Guide.xlabel("β1"))

Out[21]: