In [2]:
# The Validation Set Approach

library(ISLR)
set.seed(1)
train=sample(392,196)
lm.fit=lm(mpg~horsepower,data=Auto,subset=train)
attach(Auto)
mean((mpg-predict(lm.fit,Auto))[-train]^2)
lm.fit2=lm(mpg~poly(horsepower,2),data=Auto,subset=train)
mean((mpg-predict(lm.fit2,Auto))[-train]^2)
lm.fit3=lm(mpg~poly(horsepower,3),data=Auto,subset=train)
mean((mpg-predict(lm.fit3,Auto))[-train]^2)
set.seed(2)
train=sample(392,196)
lm.fit=lm(mpg~horsepower,subset=train)
mean((mpg-predict(lm.fit,Auto))[-train]^2)
lm.fit2=lm(mpg~poly(horsepower,2),data=Auto,subset=train)
mean((mpg-predict(lm.fit2,Auto))[-train]^2)
lm.fit3=lm(mpg~poly(horsepower,3),data=Auto,subset=train)
mean((mpg-predict(lm.fit3,Auto))[-train]^2)
26.1414211520072
19.8225850408262
19.7825166856023
23.2955851508862
18.9012408317778
19.2573982608642
In [3]:
# Leave-One-Out Cross-Validation

glm.fit=glm(mpg~horsepower,data=Auto)
coef(glm.fit)
lm.fit=lm(mpg~horsepower,data=Auto)
coef(lm.fit)
library(boot)
glm.fit=glm(mpg~horsepower,data=Auto)
cv.err=cv.glm(Auto,glm.fit)
cv.err$delta
cv.error=rep(0,5)
for (i in 1:5){
 glm.fit=glm(mpg~poly(horsepower,i),data=Auto)
 cv.error[i]=cv.glm(Auto,glm.fit)$delta[1]
 }
cv.error
(Intercept)
39.9358610211705
horsepower
-0.157844733353654
(Intercept)
39.9358610211705
horsepower
-0.157844733353654
  1. 24.2315135179292
  2. 24.2311440937562
  1. 24.2315135179292
  2. 19.2482131244897
  3. 19.334984064029
  4. 19.4244303104302
  5. 19.0332138547041
In [4]:
# k-Fold Cross-Validation

set.seed(17)
cv.error.10=rep(0,10)
for (i in 1:10){
 glm.fit=glm(mpg~poly(horsepower,i),data=Auto)
 cv.error.10[i]=cv.glm(Auto,glm.fit,K=10)$delta[1]
 }
cv.error.10
  1. 24.2051967567753
  2. 19.1892390663471
  3. 19.3066158967501
  4. 19.3379909004929
  5. 18.8791148363354
  6. 19.0210341885227
  7. 18.8960903802809
  8. 19.7120146188182
  9. 18.9514005667306
  10. 19.5019592285538
In [5]:
# The Bootstrap

alpha.fn=function(data,index){
 X=data$X[index]
 Y=data$Y[index]
 return((var(Y)-cov(X,Y))/(var(X)+var(Y)-2*cov(X,Y)))
 }
alpha.fn(Portfolio,1:100)
set.seed(1)
alpha.fn(Portfolio,sample(100,100,replace=T))
boot(Portfolio,alpha.fn,R=1000)
0.57583207459283
0.596383302006392
ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = Portfolio, statistic = alpha.fn, R = 1000)


Bootstrap Statistics :
     original        bias    std. error
t1* 0.5758321 -7.315422e-05  0.08861826
In [6]:
# Estimating the Accuracy of a Linear Regression Model

boot.fn=function(data,index)
 return(coef(lm(mpg~horsepower,data=data,subset=index)))
boot.fn(Auto,1:392)
set.seed(1)
boot.fn(Auto,sample(392,392,replace=T))
boot.fn(Auto,sample(392,392,replace=T))
boot(Auto,boot.fn,1000)
(Intercept)
39.9358610211705
horsepower
-0.157844733353654
(Intercept)
38.7387133554397
horsepower
-0.148195186363759
(Intercept)
40.0383085722796
horsepower
-0.159610359262947
ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = Auto, statistic = boot.fn, R = 1000)


Bootstrap Statistics :
      original      bias    std. error
t1* 39.9358610  0.02972191 0.860007896
t2* -0.1578447 -0.00030823 0.007404467
In [7]:
summary(lm(mpg~horsepower,data=Auto))$coef
EstimateStd. Errort valuePr(>|t|)
(Intercept) 3.993586e+01 7.174987e-01 5.565984e+011.220362e-187
horsepower-1.578447e-01 6.445501e-03-2.448914e+01 7.031989e-81
In [8]:
boot.fn=function(data,index)
coefficients(lm(mpg~horsepower+I(horsepower^2),data=data,subset=index))
In [9]:
set.seed(1)
boot(Auto,boot.fn,1000)
summary(lm(mpg~horsepower+I(horsepower^2),data=Auto))$coef
ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = Auto, statistic = boot.fn, R = 1000)


Bootstrap Statistics :
        original        bias     std. error
t1* 56.900099702  6.098115e-03 2.0944855842
t2* -0.466189630 -1.777108e-04 0.0334123802
t3*  0.001230536  1.324315e-06 0.0001208339
EstimateStd. Errort valuePr(>|t|)
(Intercept) 5.690010e+01 1.800427e+00 3.160367e+011.740911e-109
horsepower-4.661896e-01 3.112462e-02-1.497816e+01 2.289429e-40
I(horsepower^2)1.230536e-031.220759e-041.008009e+012.196340e-21