#Pkg.add("Distributions")
This needs to be done only once.
But, to access the functions in the Distributions package the "using" command has to be invoked as:
using Distributions
nRows = 10
nCols = 5
X = sample([0;1;2],(nRows,nCols))
10x5 Array{Int64,2}: 0 0 2 0 1 2 1 1 0 0 0 0 2 2 0 0 2 1 2 0 0 2 2 1 2 2 1 2 2 2 1 2 0 1 1 1 0 2 1 2 2 0 0 2 0 2 2 2 0 2
Each element in $\mathbf{X}$ is sampled from the array [0,1,2].
methods(sample)
X = [ones(nRows,1) X]
10x6 Array{Float64,2}: 1.0 0.0 0.0 2.0 0.0 1.0 1.0 2.0 1.0 1.0 0.0 0.0 1.0 0.0 0.0 2.0 2.0 0.0 1.0 0.0 2.0 1.0 2.0 0.0 1.0 0.0 2.0 2.0 1.0 2.0 1.0 2.0 1.0 2.0 2.0 2.0 1.0 1.0 2.0 0.0 1.0 1.0 1.0 1.0 0.0 2.0 1.0 2.0 1.0 2.0 0.0 0.0 2.0 0.0 1.0 2.0 2.0 2.0 0.0 2.0
nRowsX, nColsX = size(X)
mean = 0.0
std = 0.5
b = rand(Normal(mean,std),nColsX)
6-element Array{Float64,1}: -0.784395 0.87821 0.714257 -0.439881 0.30109 -0.834222
resStd = 1.0
y = X*b + rand(Normal(0,resStd),nRowsX)
10-element Array{Float64,1}: -2.85294 1.95599 -1.92352 0.568759 -1.33026 1.00019 0.442988 -4.11003 0.512736 0.897029
using Distributions
function simDat(nObs,nLoci,bMean,bStd,resStd)
X = [ones(nObs,1) sample([0;1;2],(nObs,nLoci))]
b = rand(Normal(bMean,bStd),size(X,2))
y = X*b + rand(Normal(0.0, resStd),nObs)
return (y,X,b)
end
simDat (generic function with 1 method)
nObs = 10
nLoci = 5
bMean = 0.0
bStd = 0.5
resStd = 1.0
res = simDat(nObs,nLoci,bMean,bStd,resStd)
y = res[1]
X = res[2]
b = res[3]
6-element Array{Float64,1}: 0.347378 0.234197 -0.131814 -0.845628 0.0233665 0.123051
# installing package
# only needs to be done once
#Pkg.clone("https://github.com/reworkhow/XSim.jl.git")
using XSim
chrLength = 1.0
numChr = 1
numLoci = 2000
mutRate = 0.0
locusInt = chrLength/numLoci
mapPos = collect(0:locusInt:(chrLength-0.0001))
geneFreq = fill(0.5,numLoci)
XSim.init(numChr,numLoci,chrLength,geneFreq,mapPos,mutRate)
popSizeFounder = 500
sires = sampleFounders(popSizeFounder)
dams = sampleFounders(popSizeFounder)
nothing
Sampling 500 animals into base population. Sampling 500 animals into base population.
ngen,popSize = 20,500
sires1,dams1,gen1 = sampleRan(popSize, ngen, sires, dams);
Generation 2: sampling 250 males and 250 females Generation 3: sampling 250 males and 250 females Generation 4: sampling 250 males and 250 females Generation 5: sampling 250 males and 250 females Generation 6: sampling 250 males and 250 females Generation 7: sampling 250 males and 250 females Generation 8: sampling 250 males and 250 females Generation 9: sampling 250 males and 250 females Generation 10: sampling 250 males and 250 females Generation 11: sampling 250 males and 250 females Generation 12: sampling 250 males and 250 females Generation 13: sampling 250 males and 250 females Generation 14: sampling 250 males and 250 females Generation 15: sampling 250 males and 250 females Generation 16: sampling 250 males and 250 females Generation 17: sampling 250 males and 250 females Generation 18: sampling 250 males and 250 females Generation 19: sampling 250 males and 250 females Generation 20: sampling 250 males and 250 females Generation 21: sampling 250 males and 250 females
animals = concatCohorts(sires1,dams1)
M = getOurGenotypes(animals)
500x2000 Array{Int64,2}: 0 1 1 0 2 1 1 1 1 1 1 1 0 … 2 2 2 1 1 0 1 1 2 1 1 2 1 1 2 0 1 1 1 0 1 2 0 0 1 0 1 0 1 2 0 1 2 2 1 2 1 2 1 0 2 1 1 1 2 1 1 1 1 0 1 0 2 1 0 1 1 2 1 2 0 0 1 1 1 1 1 2 1 1 2 2 2 1 2 0 0 1 2 2 0 1 2 1 1 2 1 0 0 1 2 0 1 0 1 1 0 1 1 0 1 2 2 2 2 2 0 1 0 1 1 2 0 0 1 0 0 1 0 2 2 2 0 1 1 … 0 2 0 1 1 1 0 2 1 0 2 1 2 0 0 2 1 1 1 1 1 2 0 0 1 1 1 0 2 0 2 0 1 0 0 1 0 1 2 1 2 1 1 1 1 1 0 1 0 1 1 1 1 0 2 1 1 2 2 1 2 2 2 1 1 1 2 1 1 2 1 0 1 2 1 2 1 0 1 1 1 0 1 1 2 1 1 1 0 2 2 1 1 1 1 2 0 1 2 0 0 1 1 1 2 1 1 2 1 1 1 1 1 1 2 1 1 1 0 0 1 1 0 1 1 … 1 2 1 1 2 2 0 1 0 2 0 0 1 1 1 2 0 0 0 1 0 1 0 0 0 0 0 2 1 1 0 1 2 1 0 1 1 1 0 1 0 0 0 0 1 1 2 0 2 1 2 0 2 2 0 2 1 2 1 2 0 1 ⋮ ⋮ ⋮ ⋱ ⋮ ⋮ 2 1 1 2 1 1 2 2 2 1 2 1 0 1 0 2 1 1 1 1 1 0 1 0 2 1 2 1 0 2 1 1 1 1 1 0 1 2 1 0 1 1 1 1 1 2 1 1 1 1 0 0 1 1 2 1 0 2 0 1 0 1 0 … 2 2 1 0 1 0 2 1 1 1 2 0 0 2 2 0 2 0 2 2 2 2 0 2 2 0 1 2 1 0 1 1 2 1 1 0 1 1 2 1 2 1 2 1 0 2 1 1 1 2 1 1 1 2 1 1 1 0 0 1 0 2 0 0 2 1 0 0 1 1 1 1 0 2 0 2 0 0 2 2 2 0 0 0 2 0 2 1 0 1 0 1 0 0 0 0 1 0 2 1 2 0 1 1 0 2 2 1 0 2 0 2 1 0 1 1 1 2 1 1 1 1 1 0 1 … 2 1 2 1 1 1 1 2 1 0 1 1 0 1 0 0 2 1 2 0 1 2 0 0 1 0 2 1 1 0 1 1 1 1 1 1 1 0 0 1 0 1 0 2 1 2 0 2 0 0 0 0 1 1 2 0 1 1 2 2 2 2 1 1 2 2 1 2 0 0 2 1 1 0 1 0 1 1 0 0 0 1 1 0 1 0 0 1 0 2 1 0 2 1 2 1 2 0 0 0 1 1 2 2 2 0 1 2 1 0 2 1
nQTL = 50
selQTL = fill(false,numLoci)
selQTL[sample(1:numLoci,nQTL,replace=false)] = true
selMrk = !selQTL;
Q = M[:,selQTL]
500x50 Array{Int64,2}: 1 2 1 0 2 0 0 2 1 0 0 0 2 … 2 1 0 2 1 1 0 0 1 1 1 2 2 1 1 1 0 2 0 1 0 1 2 0 1 2 2 1 2 1 1 2 1 1 0 1 2 1 2 2 2 0 1 1 2 1 2 0 1 1 1 0 2 1 0 0 1 0 1 1 1 0 2 2 1 2 1 1 1 2 0 1 1 0 1 1 1 2 1 2 1 1 1 0 2 1 1 0 0 1 0 1 1 2 2 1 2 1 1 1 2 1 2 2 1 0 2 0 0 2 0 1 2 2 1 1 1 1 2 2 1 2 0 1 1 … 1 1 2 2 0 0 1 0 0 0 1 1 2 1 1 2 0 0 2 2 2 2 0 1 2 1 1 0 0 1 1 0 1 1 1 1 1 0 1 0 1 1 1 1 2 0 2 2 2 2 2 1 1 2 1 2 0 1 0 1 1 2 0 1 0 2 0 1 1 2 2 2 1 0 1 0 0 0 1 0 1 2 0 1 2 1 2 0 0 0 1 1 1 0 2 2 1 1 0 0 0 2 0 1 2 0 1 0 1 1 0 2 1 2 1 1 2 2 2 1 1 1 1 2 2 … 1 0 0 1 1 2 1 2 0 1 0 1 1 1 1 2 0 0 1 2 1 1 1 1 2 2 0 0 1 1 1 2 1 1 1 2 2 2 1 2 2 1 1 1 2 0 0 1 0 1 2 0 1 2 0 0 1 1 0 2 0 1 ⋮ ⋮ ⋮ ⋱ ⋮ ⋮ 1 2 1 1 1 1 1 2 2 2 0 0 1 1 0 2 1 2 1 1 0 1 2 1 1 1 1 2 1 1 2 1 1 2 1 2 0 1 1 2 0 2 1 1 1 0 0 1 1 2 1 1 0 2 0 2 1 1 1 2 2 0 0 … 0 1 1 0 1 2 2 0 1 2 0 0 2 0 2 1 2 2 0 1 2 0 1 1 2 2 0 2 2 2 1 0 1 1 2 1 1 1 2 0 1 2 1 1 1 1 2 2 1 0 1 1 1 1 1 1 1 1 1 0 2 1 1 0 2 1 1 0 0 2 0 2 1 0 0 1 1 2 0 1 0 1 1 2 0 2 0 1 2 2 2 1 2 0 2 1 0 1 1 2 2 2 1 0 1 2 1 2 0 2 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 … 2 0 1 2 2 2 1 0 1 2 1 2 2 2 1 0 0 1 1 2 1 1 2 0 1 2 0 2 0 2 2 2 0 0 0 2 1 0 1 2 1 0 0 0 1 0 0 2 0 2 1 1 0 1 0 1 0 0 0 1 0 1 1 2 1 1 0 0 1 1 1 1 2 1 1 1 0 2 1 2 2 2 0 1 1 1 2 2 1 0 2 0 2 1 0 1 0 2 1 1 1 0 2 2 1 1 1 0 0 2 1 2
X = M[:,selMrk]
500x1950 Array{Int64,2}: 0 1 1 0 2 1 1 1 1 1 1 0 0 … 2 2 2 1 1 0 1 1 2 1 1 2 1 1 2 0 1 1 1 0 1 0 0 1 1 0 1 0 1 2 0 1 2 2 1 2 1 2 1 0 2 1 1 1 2 1 1 1 0 1 1 0 2 1 0 1 1 2 1 2 0 0 1 1 1 1 1 2 1 1 2 2 1 2 1 0 0 1 2 2 0 1 2 1 1 2 1 0 0 1 2 0 1 0 1 1 1 1 0 1 1 2 2 2 2 2 0 1 0 1 1 2 0 0 1 0 0 1 0 2 2 0 1 1 2 … 0 2 0 1 1 1 0 2 1 0 2 1 2 0 0 2 1 1 1 1 1 0 0 1 1 1 1 0 2 0 2 0 1 0 0 1 0 1 2 1 2 1 1 1 1 1 1 0 1 2 1 1 1 0 2 1 1 2 2 1 2 2 2 1 1 1 2 1 1 2 1 1 2 1 0 2 1 0 1 1 1 0 1 1 2 1 1 1 0 2 2 1 1 1 1 2 1 2 0 0 0 1 1 1 2 1 1 2 1 1 1 1 1 1 2 1 1 1 0 0 1 0 1 1 1 … 1 2 1 1 2 2 0 1 0 2 0 0 1 1 1 2 0 0 0 1 0 0 0 0 1 0 0 2 1 1 0 1 2 1 0 1 1 1 0 1 0 0 0 0 1 1 0 2 1 1 2 0 2 2 0 2 1 2 1 2 0 1 ⋮ ⋮ ⋮ ⋱ ⋮ ⋮ 2 1 1 2 1 1 2 2 2 2 1 0 0 1 0 2 1 1 1 1 1 0 1 0 2 1 2 1 0 2 1 1 1 1 0 1 2 2 1 0 1 1 1 1 1 2 1 1 1 1 0 0 1 1 2 1 0 2 0 0 1 0 0 … 2 2 1 0 1 0 2 1 1 1 2 0 0 2 2 0 2 0 2 2 2 0 2 2 2 0 1 2 1 0 1 1 2 1 1 0 1 1 2 1 2 1 2 1 0 2 1 1 2 1 1 1 1 2 1 1 1 0 0 1 0 2 0 0 2 1 0 0 1 1 1 0 2 0 0 2 0 0 2 2 2 0 0 0 2 0 2 1 0 1 0 1 0 0 0 0 0 2 1 1 2 0 1 1 0 2 2 1 0 2 0 2 1 0 1 1 1 2 1 1 1 1 0 1 1 … 2 1 2 1 1 1 1 2 1 0 1 1 0 1 0 0 2 1 2 0 1 0 0 1 2 0 2 1 1 0 1 1 1 1 1 1 1 0 0 1 0 1 0 2 1 2 2 0 0 1 0 0 1 1 2 0 1 1 2 2 2 2 1 1 2 2 1 2 0 0 2 1 0 1 2 0 1 1 0 0 0 1 1 0 1 0 0 1 0 2 1 0 2 1 2 1 0 0 0 1 1 1 2 2 2 0 1 2 1 0 2 1
nQTL = size(Q,2)
nObs = size(Q,1)
α = rand(Normal(0,1),nQTL)
a = Q*α
# scaling breeding values to have variance 25.0
v = var(a)
genVar = 25.0
a *= sqrt(genVar/v)
va = var(a)
# formatted printing
@printf "genetic variance = %8.2f " va
genetic variance = 25.00
resVar = 75.0
resStd = sqrt(resVar)
e = rand(Normal(0,resStd),nObs)
y = 100 + a + e
@printf "phenotypic mean = %8.2f \n" Base.mean(y)
@printf "phenotypic variance = %8.2f \n" var(y)
phenotypic mean = 108.39 phenotypic variance = 91.74
ngen,popSize = 20,500
siresA,damsA,gen1 = sampleRan(popSize, ngen, sires, dams)
siresB,damsB,gen1 = sampleRan(popSize, ngen, sires, dams)
siresAB,damsAB,gen1 = sampleRan(popSize, ngen, siresA, damsB,gen=gen1);
Generation 2: sampling 250 males and 250 females Generation 3: sampling 250 males and 250 females Generation 4: sampling 250 males and 250 females Generation 5: sampling 250 males and 250 females Generation 6: sampling 250 males and 250 females Generation 7: sampling 250 males and 250 females Generation 8: sampling 250 males and 250 females Generation 9: sampling 250 males and 250 females Generation 10: sampling 250 males and 250 females Generation 11: sampling 250 males and 250 females Generation 12: sampling 250 males and 250 females Generation 13: sampling 250 males and 250 females Generation 14: sampling 250 males and 250 females Generation 15: sampling 250 males and 250 females Generation 16: sampling 250 males and 250 females Generation 17: sampling 250 males and 250 females Generation 18: sampling 250 males and 250 females Generation 19: sampling 250 males and 250 females Generation 20: sampling 250 males and 250 females Generation 21: sampling 250 males and 250 females Generation 2: sampling 250 males and 250 females Generation 3: sampling 250 males and 250 females Generation 4: sampling 250 males and 250 females Generation 5: sampling 250 males and 250 females Generation 6: sampling 250 males and 250 females Generation 7: sampling 250 males and 250 females Generation 8: sampling 250 males and 250 females Generation 9: sampling 250 males and 250 females Generation 10: sampling 250 males and 250 females Generation 11: sampling 250 males and 250 females Generation 12: sampling 250 males and 250 females Generation 13: sampling 250 males and 250 females Generation 14: sampling 250 males and 250 females Generation 15: sampling 250 males and 250 females Generation 16: sampling 250 males and 250 females Generation 17: sampling 250 males and 250 females Generation 18: sampling 250 males and 250 females Generation 19: sampling 250 males and 250 females Generation 20: sampling 250 males and 250 females Generation 21: sampling 250 males and 250 females Generation 22: sampling 250 males and 250 females Generation 23: sampling 250 males and 250 females Generation 24: sampling 250 males and 250 females Generation 25: sampling 250 males and 250 females Generation 26: sampling 250 males and 250 females Generation 27: sampling 250 males and 250 females Generation 28: sampling 250 males and 250 females Generation 29: sampling 250 males and 250 females Generation 30: sampling 250 males and 250 females Generation 31: sampling 250 males and 250 females Generation 32: sampling 250 males and 250 females Generation 33: sampling 250 males and 250 females Generation 34: sampling 250 males and 250 females Generation 35: sampling 250 males and 250 females Generation 36: sampling 250 males and 250 females Generation 37: sampling 250 males and 250 females Generation 38: sampling 250 males and 250 females Generation 39: sampling 250 males and 250 females Generation 40: sampling 250 males and 250 females Generation 41: sampling 250 males and 250 females
freq=Base.mean(M,1)/2;
using Gadfly
plot(x=freq,Geom.histogram,
Guide.title("Distribution of Gene Frequency"),
Guide.ylabel("Frequency"),
Guide.xlabel("Gene Frequency"))
;ipython nbconvert --to slides dataSimulation.ipynb