height <- c(58,59,60,61,62,63,64,65,66, 67, 68, 69, 70, 71, 72)
weight <- c(115,117,120,123,126,129,132,135,139, 142, 146, 150, 154, 159, 164)
htwtmatrix = matrix(c(height,weight),15,2) # what do 15 and 2 refer to?
print(htwtmatrix)
dim(htwtmatrix)
is.array(htwtmatrix)
# even its a matrix it will return TRUE, because matrix also a type of array in 2 dimentional
# to assign names for each column first convert it into a dataframe
htwtdata = data.frame(htwtmatrix) # as.dataframe is also works well here
names(htwtdata) = c("height", "weight")
# here we used names() function to assign the names for our dataframe
names(htwtdata) # here we are extracting the names , names function can be used to set or get the names
# Let us see how R operates on matrices, and how that compares to data frames
htwtmatrix * 2 # multiplying with 2 gives us result of multiplication on each value
htwtmatrix[, 1]/12 # convert height in inches to feet
mean(htwtmatrix[, 2]) # find mean of weight
dim(htwtdata)
nrow(htwtdata)
str(htwtdata)
summary(htwtdata)
htwtdata[,2]*703/htwtdata[,1]^2
# How would you get R to give you the height and weight of the 8th student in the
# data set? The 8th and 10th student?
names(htwtdata)
htwtdata[8,1] # 8th student height
htwtdata[8,2] #1 0th student height
htwtdata[10,1] # 10th student height
htwtdata[10,2] # 10th student weight
head(htwtdata,10) #check with head command
ifelse(3 > 4, x <- 5, x <- 6)
print(x)
ifelse(4 > 3, x <- 5, x <- 6)
print(x)
hmean = mean(htwtdata$height)
wmean = mean(htwtdata$weight)
?cat
cat("mean height=",hmean,"\n","mean weight=",wmean)
ifelse( hmean > 61 && wmean > 120, x <- 5, x <- 6) # multiple conditions in an if statement
htwt_cat<-ifelse (height>=70 | weight>159, "high", "low") # apply element-wise to vectors
print(htwt_cat)
is.vector(htwt_cat)
#htwtdata1<-head(htwtdata,6)
cbind(htwtdata[c(1:3,c(13:15)),],htwt_cat[c(1:3,c(13:15))])
#print(htwtdata1[1:3])
htwt_cat[1:6]
htwt_cat <- ifelse(height > 67 || weight > 150, "high", "low")
htwt_cat
# Notice that in the above ifelse statement only the first element in the series was computed.
htwt_cat <- ifelse(height > 57 || weight > 110, "high", "low")
htwt_cat
#This can also be extended to include multiple conditions. Suppose we have the following data:
final_score<- c(39, 51, 60, 65, 72, 78, 79, 83, 85, 85, 87, 89, 91, 95, 96, 97, 100, 100)
passfail<-ifelse(final_score>=60, "pass", "fail")
passfail
grade <- ifelse(final_score < 60, "F", ifelse(final_score < 70, "D", ifelse(final_score <
80, "C", ifelse(final_score < 90, "B", "A"))))
grade
#if we want to resize the image we can use the below code
# let's take airquality dataset which is the Daily air quality measurements in
# New York, May to September 1973. for the details use
?airquality
# we want to figure out which days were good air quality days (1) or bad air
# quality (0), based on a cutoff of ozone levels above 60ppb.
numdays <- nrow(airquality)
print(numdays)
# creates an object which will store the vector
goodair <- numeric(numdays)
print(goodair)
#Does the command above work? Why/why not?
#Let's check the Ozone variable. What do you notice below?
airquality$Ozone
# When there are missing values, many operations in R fail. One way to get around
# this is to create a new data frame that deletes all the rows corresponding to
# observations with missing rows. This can be done by means of the command
# 'na.omit'
airqualfull = na.omit(airquality)
dim(airqualfull)
dim(airquality)
#Now let's try doing this again with the data with the complete cases.
numdays = nrow(airqualfull)
numdays
print(numdays)
goodair = numeric(numdays) # initialize the vector
for(i in 1:numdays)
if (airqualfull$Ozone[i] >60) goodair[i] = 0 else goodair[i] = 1
goodair
# At this point we might be interested in which days were the ones with good air
# quality. The 'which' command returns a set of indices corresponding to the
# condition specified. We can then use the indices to find the day of the month
# this corresponds to
which(goodair == 1) ## notice the double "=" signs!
goodindices <- which(goodair == 1)
airqualfull[goodindices,]
airquality$Temp
for(i in 1:numdays)
ifelse(airquality$Ozone[i] <60 && airquality$Temp<80,goodair[i] <- 1,goodair[i] <- 0)
goodindices1 <- which(goodair == 1)
#airquality[goodindices1,]
print(goodindices1)
airquality[goodindices1,]
#export and save the result into working directory with file name as goodquality.txt
write.table(airquality[goodindices1,], "goodquality.txt", sep=",")
# check whether the file exported into working directory
list.files()
#open the file and check the data
file.edit('goodquality.txt')
library(Biobase)
openPDF("Data Science Process.pdf")
z <- 0
while (z < 5) {
z <- z + 2
print(z)
}
#Another option for looping is the repeat function. An example follows:
i<-1
repeat{
print(i)
if( i == 15) break
i<-i+1
}
x <- 1
repeat{
print(x)
x <- x+1
if (x == 6){
break
}
}
# For the first exercise, write a repeat{} loop that prints all the even numbers
# from 2 10, via incrementing the variable, i <- 0.
i <- 0
repeat {
i <- i + 2
print(i)
if (i == 10)
break
}
msg <- c("Hello")
i <- 1
repeat {
i <- i + 1
print(msg)
if (i == 5) {break}
}
msg <- c('Hello')
i <- 1
repeat{
print(msg)
i <- i +1
if (i == 5)break
}
#With, i <- 1, write a while() loop that prints the odd numbers from 1 through 7.
i <- 1
while(i<=7){
print(i)
i <- i +2
}
#Write a while() loop that increments the variable, “i“, 6 times, and prints “msg” at every iteration.
i <- 1
while(i<=6){
print(msg)
i <- i +1
}
a <- c(15,23,78,45,124,82,75)
for (i in 1:4){
print(a[i])
}
for (i in seq(a)){
print(i)
}
for (i in seq_along(a)){
print(a[i])
}
a <- "Last Checkpoint: 16 hours ago (unsaved changes)"
for (letter in 2){
print(letter)
}
# Create a vector filled with random normal values
u1 <- rnorm(30)
print("This loop calculates the square of the first 10 elements of vector u1")
# Initialize `usq`
usq <- 0
for(i in 1:10) {
# i-th element of `u1` squared into `i`-th position of `usq`
usq[i] <- u1[i]*u1[i]
print(usq[i])
}
print(i)