using(Distributions)
nObs = 100
nMarkers = 1000
X = sample([0,1,2],(nObs,nMarkers))
α = randn(nMarkers)
a = X*α
stdGen = std(a)
a = a/stdGen
y = a + randn(nObs)
saveAlpha = α
nothing
meanXCols = mean(X,1)
X = X - ones(nObs,1)*meanXCols;
seed = 10 # set the seed for the random number generator
chainLength = 2000 # number of iterations
dfEffectVar = 4 # hyper parameter (degrees of freedom) for locus effect variance
nuRes = 4 # hyper parameter (degrees of freedom) for residual variance
varGenotypic = 1 # used to derive hyper parameter (scale) for locus effect variance
varResidual = 1 # used to derive hyper parameter (scale) for locus effect variance
scaleVar = varGenotypic*(dfEffectVar-2)/dfEffectVar # scale factor for locus effects
scaleRes = varResidual*(nuRes-2)/nuRes # scale factor for residual variance
nothing
function get_column(X,nRows,j)
indx = 1 + (j-1)*nRows
ptr = pointer(X,indx)
pointer_to_array(ptr,nRows)
end
get_column (generic function with 1 method)
xpx = [(X[:,i]'X[:,i])[1]::Float64 for i=1:nMarkers]
xArray = Array(Array{Float64,1},nMarkers)
for i=1:nMarkers
xArray[i] = get_column(X,nObs,i)
end
We want to compute: $$ rhs = \mathbf{X}_j'(\mathbf{y}_{corr} + \mathbf{X}_j\mathbf{\alpha}_j) $$ This is more efficiently obtained as $$ rhs = \mathbf{X}_j'\mathbf{y}_{corr} + \mathbf{X}_j'\mathbf{X}_j\mathbf{\alpha}_j, $$ using the diagonals of $\mathbf{X}'\mathbf{X}$ that have already been computed (line 4 of the function below).
function sampleEffects!(nMarkers,xArray,xpx,yCorr,α,meanAlpha,vare,locusEffectVarl)
nObs = size(X,1)
for j=1:nMarkers
rhs::Float64 = dot(xArray[j],yCorr) + xpx[j]*α[j]
lhs::Float64 = xpx[j] + vare/locusEffectVar[j]
invLhs::Float64 = 1.0/lhs
mean::Float64 = invLhs*rhs
oldAlpha::Float64 = α[j]
α[j] = mean + randn()*sqrt(invLhs*vare)
BLAS.axpy!(oldAlpha-α[j],xArray[j],yCorr)
end
nothing
end
sampleEffects! (generic function with 1 method)
length(xpx)
1000
The intercept is sampled first and the sampleEffects! function is called to sample the marker effects.
chi1=Chisq(nObs+nuRes)
chi2=Chisq(dfEffectVar+1)
function BayesC0!(numIter,nMarkers,X,xpx,yCorr,mu,meanMu,α,meanAlpha,vare,locusEffectVar)
for i=1:numIter
# sample residula variance
vare = (dot(yCorr,yCorr)+nuRes*scaleRes)/rand(chi1)
# sample intercept
yCorr = yCorr+mu
rhs = sum(yCorr)
invLhs = 1.0/(nObs)
mean = rhs*invLhs
mu = mean + randn()*sqrt(invLhs*vare)
yCorr = yCorr - mu
meanMu = meanMu + (mu - meanMu)/i
# sample effects
sampleEffects!(nMarkers,xArray,xpx,yCorr,α,meanAlpha,vare,locusEffectVar)
meanAlpha = meanAlpha + (α - meanAlpha)/i
#sameple locus effect variance
for j=1:nMarkers
locusEffectVar[j] = (scaleVar*dfEffectVar + α[j]^2)/rand(chi2)
end
if (i%1000)==0
yhat = meanMu+X*meanAlpha
resCorr = cor(a,yhat)
println ("Correlation of between true and predicted breeding value: ", resCorr)
end
end
end
BayesC0! (generic function with 1 method)
meanMu = 0
meanAlpha = zeros(nMarkers)
#initial valus
vare = 1
varEffects = 1.0
mu = mean(y)
yCorr = y - mu
alpha = fill(0.0,nMarkers)
locusEffectVar = fill(varEffects,nMarkers)
#run it
@time BayesC0!(chainLength,nMarkers,X,xpx,yCorr,mu,meanMu,alpha,meanAlpha,vare,locusEffectVar)
Correlation of between true and predicted breeding value: 0.7886358253499116 Correlation of between true and predicted breeding value: 0.7890681933545429 elapsed time: 1.045476748 seconds (243489164 bytes allocated, 13.68% gc time)