factor()
function is used to create a factorlevels()
function can be useful to check the levels of factor variable# To create factors in R, you make use of the function factor()
# Sex vector
sex_vect <- c("Male", "Female", "Female", "Male", "Male", "Female")
# Convert vector into a factor
factor_sex_vect <- factor(sex_vect)
print(factor_sex_vect)
[1] Male Female Female Male Male Female Levels: Female Male
levels()
function.levels()
functionnlevels()
to check the number of levels# Animals without order
animals_vector <- c("Elephant", "Dog", "Donkey", "Horse")
factor_animals_vector <- factor(animals_vector)
print(factor_animals_vector)
print(levels(factor_animals_vector))
nlevels(factor_animals_vector) # to check the number of levels
# Temperature with order
temperature_vector <- c("High", "Low", "High","Low", "Medium")
factor_temperature_vector <- factor(temperature_vector, order = TRUE, levels = c("Low", "Medium", "High"))
nlevels(factor_temperature_vector)
[1] Elephant Dog Donkey Horse Levels: Dog Donkey Elephant Horse [1] "Dog" "Donkey" "Elephant" "Horse"
ordered()
command, or by using factor()
with the ordered=TRUE
argument# Temperature with order
temperature_vector <- c("High", "Low", "High","Low", "Medium")
factor_temperature_vector <- factor(temperature_vector, order = TRUE, levels = c("Low", "Medium", "High"))
print(factor_temperature_vector)
# levels(factor_vector) <- c("name1", "name2",...) # syntax to assign the levels
print(levels(factor_temperature_vector))
[1] High Low High Low Medium Levels: Low < Medium < High [1] "Low" "Medium" "High"
# High
High <- factor_temperature_vector[1]
# Low
Low <- factor_temperature_vector[2]
# check whether High greater than Low? similarly we can use for heavier, larger, faster, strongly agree depends on the context of the data
High > Low
"low", "medium",
and "high"
is more descriptive than 1, 2, 3
data = c(1,2,2,3,1,2,3,3,1,2,3,3,1)
fdata = factor(data)
print(fdata) # without labels
rdata = factor(data,labels=c("Low","Medium","High"))
print(rdata) # with labels
[1] 1 2 2 3 1 2 3 3 1 2 3 3 1 Levels: 1 2 3 [1] Low Medium Medium High Low Medium High High Low Medium [11] High High Low Levels: Low Medium High
gl()
function)¶gl()
function generates factors by specifying the pattern of their levels.
gl(n, k, length = n*k, labels = 1:n, ordered = FALSE)
n
: number of levels
k
: number of replications
length
: length of the result
labels
: labels for the resulting factor levels
ordered
: whether the result sould be ordered or not
gl(3,2,labels = c("green","red","yellow"))
[1] green green red red yellow yellow
Levels: green red yellow
# usage of gl function in data frame
clinical.trial <-
data.frame(patient = 1:100,
age = rnorm(100, mean = 60, sd = 6),
treatment = gl(2, 50,
labels = c("Treatment", "Control")),
center = sample(paste("Center", LETTERS[1:5]), 100, replace = TRUE))
print(head(clinical.trial,20))
patient age treatment center 1 1 56.37911 Treatment Center B 2 2 72.05500 Treatment Center D 3 3 54.26399 Treatment Center D 4 4 59.67424 Treatment Center A 5 5 58.36585 Treatment Center C 6 6 50.42562 Treatment Center C 7 7 61.84052 Treatment Center C 8 8 57.49042 Treatment Center C 9 9 59.41043 Treatment Center C 10 10 57.12441 Treatment Center D 11 11 58.12204 Treatment Center E 12 12 59.25759 Treatment Center C 13 13 51.23221 Treatment Center E 14 14 56.24048 Treatment Center E 15 15 55.28236 Treatment Center D 16 16 62.27550 Treatment Center A 17 17 64.87295 Treatment Center C 18 18 55.22675 Treatment Center C 19 19 56.19110 Treatment Center C 20 20 67.15331 Treatment Center C
droplevels()
¶aq <- transform(airquality, Month = factor(Month, labels = month.abb[5:9]))
print(levels(aq$Month))
aq <- subset(aq, Month != "Jul")
print(levels(aq$Month)) # still the same levels
table(aq$Month) # even though one level has 0 entries!
table(droplevels(aq)$Month)
[1] "May" "Jun" "Jul" "Aug" "Sep" [1] "May" "Jun" "Jul" "Aug" "Sep"
May Jun Jul Aug Sep 31 30 0 31 30
May Jun Aug Sep 31 30 31 30
is.factor()
, is.ordered()
, as.factor()
and as.ordered()
¶is.factor(temperature_vector)
is.ordered(temperature_vector)
is.factor(factor_temperature_vector)
is.ordered(factor_temperature_vector)
strptime()
POSIXct()
and POSIXlt()
as.Date()
help(strptime) #- to get conversion formats see .
myDays <- c("10/11/1945", "8/19/2003", "5/15/1964")
myDates<- as.Date(myDays, format = "%m/%d/%Y")
myDates