# Analysis of blood pressure data # The bpdata.csv dataset contains diastolic and systolic blood pressure measurements for 1000 individuals, # and genotype data for 11 SNPs in a candidate gene for blood pressure. Covariates such as gender (sex) and # body mass index (bmi) are included as well. bpdata=read.csv("~/Dropbox/Teaching/2015/BMS262/Module1/Excercises/bpdata.csv",header=TRUE) head(bpdata) table(bpdata$snp3) # Exercise 1: linear regression different models # Perform a linear regression in R of systolic blood pressure (sbp) on SNP3 # using lm(). Compare the estimates, intervals and p-values you get using: # additive (linear) model # dominant model # recessive model # 2 parameter (genotypic) model # Additive model bpdata$n.minor <- (bpdata$snp3=="TC") + 2*(bpdata$snp3=="TT") lm1 = lm(sbp~n.minor, data=bpdata) # Obtain a confidence interval for the linear regression paramaters confint.default(lm1) summary(lm1) qqnorm(lm1$residuals) qqline(lm1$residuals,col="red") hist(lm1$residuals, breaks=20) # Dominant effect of T bpdata$domvar <- (bpdata$snp3=="TC") | (bpdata$snp3=="TT") with(bpdata, table(domvar, snp3)) lm2 <- lm(sbp~domvar, data=bpdata) confint.default(lm2) summary(lm2) qqnorm(lm2$residuals) qqline(lm2$residuals,col="red") hist(lm2$residuals, breaks=20) # Recessive model and two-degree of freedom model (genotypic model) bpdata$recvar <- (bpdata$snp3=="TT") with(bpdata, table(recvar, snp3)) lm3 <- lm(sbp~recvar, data=bpdata) confint.default(lm3) summary(lm3) lm4 <- lm(sbp~snp3, data=bpdata) confint.default(lm4) summary(lm4) # Exercise 2: Provide plots illustrating the relationship between # sbp and the three genotypes at SNP3 plot(sbp~n.minor, data=bpdata,col="darkgreen") plot(sbp~jitter(n.minor), data=bpdata,col="darkgreen") abline(lm1, col="red", lwd=2) boxplot(sbp~n.minor,data=bpdata, col=c("purple","red","darkblue")) abline(lm1, lwd=2) boxplot(sbp~n.minor,data=bpdata, col=c("purple","red","darkblue"),varwidth=T) abline(lm1, lwd=2) # Exercise 3 - redo the linear regression analysis of sbp from question 2 # for the additive model, but this time adjust for sex and bmi. Do the results change? lm1adj = lm(sbp~n.minor+sex+bmi, data=bpdata) #Get confidence intervals and summary data # confint.default(lm1adj) summary(lm1adj) # Exercise 4 - What proportion of the variance in sbp is explained by all of the 11 SNPs together? # Perform a linear regression for additive model using all SNPs ndf <- as.data.frame(bpdata[,c("sbp")]) colnames(ndf) <- "sbp" summary(bpdata) # Convert to minor allele counts ndf$snp1 <- (bpdata$snp1=="CT") + 2*(bpdata$snp1=="CC") ndf$snp2 <- (bpdata$snp2=="AT") + 2*(bpdata$snp2=="AA") ndf$snp3 <- (bpdata$snp3=="TC") + 2*(bpdata$snp3=="TT") ndf$snp4 <- (bpdata$snp4=="CT") + 2*(bpdata$snp4=="TT") ndf$snp5 <- (bpdata$snp5=="CT") + 2*(bpdata$snp5=="TT") ndf$snp6 <- (bpdata$snp6=="AG") + 2*(bpdata$snp6=="GG") ndf$snp7 <- (bpdata$snp7=="AT") + 2*(bpdata$snp7=="AA") ndf$snp8 <- (bpdata$snp8=="CT") + 2*(bpdata$snp8=="TT") ndf$snp9 <- (bpdata$snp9=="CT") + 2*(bpdata$snp9=="CC") ndf$snp10 <- (bpdata$snp10=="CT") + 2*(bpdata$snp10=="TT") ndf$snp11 <- (bpdata$snp11=="CT") + 2*(bpdata$snp11=="CC") summary(ndf) lmall <- lm(sbp~snp1+snp2+snp3+snp4+snp5+snp6+snp7+snp8+snp9+snp10+snp11,data=ndf) summary(lmall) lmall <- lm(sbp~.,data=ndf) summary(lmall)