a <- 4
a
b <- 5
a + b
d <- c(1,10,11,12)
d
a + d
f <- c(1,2)
d + f
d <- 1:5
str(d)
int [1:5] 1 2 3 4 5
d <- 2*d
str(d)
num [1:5] 2 4 6 8 10
d
d[3]
d
d[c(1,4)]
a <- as.integer(4)
str(a) ## scalars are length 1 vectors in R
length(a)
a[1]
int 4
d
d[3] <- "howdy"
d
str(d)
chr [1:5] "2" "4" "howdy" "8" "10"
Important point: vectors are homogeneous i.e. all elements of the same type. R does not warn when casting. This is much like arrays in numpy.
a <- 1:4
b <- matrix(a,nrow=2,ncol=2)
b
1 | 3 |
2 | 4 |
b + 4
5 | 7 |
6 | 8 |
b + c(1,2)
2 | 4 |
4 | 6 |
d <- matrix(5:8,nrow=2,ncol=2)
d
d[1,1]
d[1,]
5 | 7 |
6 | 8 |
b
d
b*d ## elementwise
1 | 3 |
2 | 4 |
5 | 7 |
6 | 8 |
5 | 21 |
12 | 32 |
b%*%d ## matrix multiplication
23 | 31 |
34 | 46 |
str(b)
int [1:2, 1:2] 1 2 3 4
d <- array(rnorm(16),dim=c(2,2,4))
dim(d)
str(d)
d
d[1,1,4]
num [1:2, 1:2, 1:4] -0.327 -1.544 1.837 1.332 -1.315 ...
## do rowSums, colMeans, apply
Lists are like heterogeneous vectors. More flexible but take up more space, fewer mathematical operations available.
a <- list("first"=10,"second"="howdy","third"=mean)
a
function (x, ...)
UseMethod("mean")
a$first
a[[1]]
b <- rnorm(1e7)
d <- as.list(rnorm(1e7))
head(b)
head(d)
length(b)
length(d)
## huge difference
object.size(b)
object.size(d)
80000040 bytes
560000040 bytes
iris
head(iris)
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
---|---|---|---|---|
5.1 | 3.5 | 1.4 | 0.2 | setosa |
4.9 | 3.0 | 1.4 | 0.2 | setosa |
4.7 | 3.2 | 1.3 | 0.2 | setosa |
4.6 | 3.1 | 1.5 | 0.2 | setosa |
5.0 | 3.6 | 1.4 | 0.2 | setosa |
5.4 | 3.9 | 1.7 | 0.4 | setosa |
4.6 | 3.4 | 1.4 | 0.3 | setosa |
5.0 | 3.4 | 1.5 | 0.2 | setosa |
4.4 | 2.9 | 1.4 | 0.2 | setosa |
4.9 | 3.1 | 1.5 | 0.1 | setosa |
5.4 | 3.7 | 1.5 | 0.2 | setosa |
4.8 | 3.4 | 1.6 | 0.2 | setosa |
4.8 | 3.0 | 1.4 | 0.1 | setosa |
4.3 | 3.0 | 1.1 | 0.1 | setosa |
5.8 | 4.0 | 1.2 | 0.2 | setosa |
5.7 | 4.4 | 1.5 | 0.4 | setosa |
5.4 | 3.9 | 1.3 | 0.4 | setosa |
5.1 | 3.5 | 1.4 | 0.3 | setosa |
5.7 | 3.8 | 1.7 | 0.3 | setosa |
5.1 | 3.8 | 1.5 | 0.3 | setosa |
5.4 | 3.4 | 1.7 | 0.2 | setosa |
5.1 | 3.7 | 1.5 | 0.4 | setosa |
4.6 | 3.6 | 1.0 | 0.2 | setosa |
5.1 | 3.3 | 1.7 | 0.5 | setosa |
4.8 | 3.4 | 1.9 | 0.2 | setosa |
5.0 | 3.0 | 1.6 | 0.2 | setosa |
5.0 | 3.4 | 1.6 | 0.4 | setosa |
5.2 | 3.5 | 1.5 | 0.2 | setosa |
5.2 | 3.4 | 1.4 | 0.2 | setosa |
4.7 | 3.2 | 1.6 | 0.2 | setosa |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
6.9 | 3.2 | 5.7 | 2.3 | virginica |
5.6 | 2.8 | 4.9 | 2.0 | virginica |
7.7 | 2.8 | 6.7 | 2.0 | virginica |
6.3 | 2.7 | 4.9 | 1.8 | virginica |
6.7 | 3.3 | 5.7 | 2.1 | virginica |
7.2 | 3.2 | 6.0 | 1.8 | virginica |
6.2 | 2.8 | 4.8 | 1.8 | virginica |
6.1 | 3.0 | 4.9 | 1.8 | virginica |
6.4 | 2.8 | 5.6 | 2.1 | virginica |
7.2 | 3.0 | 5.8 | 1.6 | virginica |
7.4 | 2.8 | 6.1 | 1.9 | virginica |
7.9 | 3.8 | 6.4 | 2.0 | virginica |
6.4 | 2.8 | 5.6 | 2.2 | virginica |
6.3 | 2.8 | 5.1 | 1.5 | virginica |
6.1 | 2.6 | 5.6 | 1.4 | virginica |
7.7 | 3.0 | 6.1 | 2.3 | virginica |
6.3 | 3.4 | 5.6 | 2.4 | virginica |
6.4 | 3.1 | 5.5 | 1.8 | virginica |
6.0 | 3.0 | 4.8 | 1.8 | virginica |
6.9 | 3.1 | 5.4 | 2.1 | virginica |
6.7 | 3.1 | 5.6 | 2.4 | virginica |
6.9 | 3.1 | 5.1 | 2.3 | virginica |
5.8 | 2.7 | 5.1 | 1.9 | virginica |
6.8 | 3.2 | 5.9 | 2.3 | virginica |
6.7 | 3.3 | 5.7 | 2.5 | virginica |
6.7 | 3.0 | 5.2 | 2.3 | virginica |
6.3 | 2.5 | 5.0 | 1.9 | virginica |
6.5 | 3.0 | 5.2 | 2.0 | virginica |
6.2 | 3.4 | 5.4 | 2.3 | virginica |
5.9 | 3.0 | 5.1 | 1.8 | virginica |
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
---|---|---|---|---|
5.1 | 3.5 | 1.4 | 0.2 | setosa |
4.9 | 3.0 | 1.4 | 0.2 | setosa |
4.7 | 3.2 | 1.3 | 0.2 | setosa |
4.6 | 3.1 | 1.5 | 0.2 | setosa |
5.0 | 3.6 | 1.4 | 0.2 | setosa |
5.4 | 3.9 | 1.7 | 0.4 | setosa |
str(iris)
'data.frame': 150 obs. of 5 variables: $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## data frames are a list of vectors of the same length, but possibly different types
## pandas in python is meant to replicate R data frames
head(iris[1]) ## first column
Sepal.Length |
---|
5.1 |
4.9 |
4.7 |
4.6 |
5.0 |
5.4 |
### example of lapply for computing set of functions on vector
In C, C++, and many compiled languages for loops are commonly used to repeat operations across vectors. In R it is more computationally efficient and often clearer to use built in functions.
for(ii in 1:10){
print(ii)
}
[1] 1 [1] 2 [1] 3 [1] 4 [1] 5 [1] 6 [1] 7 [1] 8 [1] 9 [1] 10
a <- 1:5
for(ii in 1:length(a)){
a[ii] <- a[ii] + 1
}
a
a + 1
### time these with large a
### use proc.time() function
a <- 1:1e7
tm <- proc.time()
for(ii in 1:length(a)){
a[ii] <- a[ii] + 1
}
proc.time() - tm
user system elapsed 0.672 0.012 0.687
a <- 1:1e7
tm <- proc.time()
a <- a + 1
proc.time() - tm
user system elapsed 0.036 0.000 0.033
1 | 501 |
2 | 502 |
3 | 503 |
4 | 504 |
5 | 505 |
6 | 506 |
### built in functions for vectorizing operations
a <- 1:4
sum(a)
mean(a)
var(a)
max(a)
min(a)
## sum rows with a for loop
a <- matrix(1:1000,nrow=500,ncol=2)
head(a)
rs <- rep(0,nrow(a))
for(ii in 1:nrow(a)){
rs[ii] <- sum(a[ii,])
}
head(rs)
1 | 501 |
2 | 502 |
3 | 503 |
4 | 504 |
5 | 505 |
6 | 506 |
rs2 <- rowSums(a)
head(rs2)
AddTwo <- function(x,y){
return(x+y)
}
AddTwo(4,9)
## scoping: where R loops up values for symbol
## by default 1) variables created within function are destroyed after function is run
## 2) functions first look for variable within its environment
## then in calling environment
rm(a) ## remove a so not found
## a is not defined in function,
## so will look outside function for value
## this is not good programming practice
f <- function(){
print(a)
return(10)
}
f()
Error in print(a): object 'a' not found Traceback: 1. f() 2. print(a) # at line 13 of file <text>
a <- 13
f()
[1] 13
## a is not defined in function,
## so will look outside function for value
## this is not good programming practice
f <- function(){
a <- 6
print(a)
return(10)
}
rm(a)
f()
[1] 6
## a changed within function, does not change our a
## this is good
a <- 4
f()
a
[1] 6
## a is not defined in function,
## so will look outside function for value
## this is not good programming practice
f <- function(){
a <<- 6 ## changes a outside function, ALMOST ALWAYS A BAD IDEA, similar to python global
print(a)
return(10)
}
a <- 4
a
b <- f()
a
[1] 6