height <- c(58,59,60,61,62,63,64,65,66, 67, 68, 69, 70, 71, 72) weight <- c(115,117,120,123,126,129,132,135,139, 142, 146, 150, 154, 159, 164) htwtmatrix = matrix(c(height,weight),15,2) # what do 15 and 2 refer to? print(htwtmatrix) dim(htwtmatrix) is.array(htwtmatrix) # even its a matrix it will return TRUE, because matrix also a type of array in 2 dimentional # to assign names for each column first convert it into a dataframe htwtdata = data.frame(htwtmatrix) # as.dataframe is also works well here names(htwtdata) = c("height", "weight") # here we used names() function to assign the names for our dataframe names(htwtdata) # here we are extracting the names , names function can be used to set or get the names # Let us see how R operates on matrices, and how that compares to data frames htwtmatrix * 2 # multiplying with 2 gives us result of multiplication on each value htwtmatrix[, 1]/12 # convert height in inches to feet mean(htwtmatrix[, 2]) # find mean of weight dim(htwtdata) nrow(htwtdata) str(htwtdata) summary(htwtdata) htwtdata[,2]*703/htwtdata[,1]^2 # How would you get R to give you the height and weight of the 8th student in the # data set? The 8th and 10th student? names(htwtdata) htwtdata[8,1] # 8th student height htwtdata[8,2] #1 0th student height htwtdata[10,1] # 10th student height htwtdata[10,2] # 10th student weight head(htwtdata,10) #check with head command ifelse(3 > 4, x <- 5, x <- 6) print(x) ifelse(4 > 3, x <- 5, x <- 6) print(x) hmean = mean(htwtdata$height) wmean = mean(htwtdata$weight) ?cat cat("mean height=",hmean,"\n","mean weight=",wmean) ifelse( hmean > 61 && wmean > 120, x <- 5, x <- 6) # multiple conditions in an if statement htwt_cat<-ifelse (height>=70 | weight>159, "high", "low") # apply element-wise to vectors print(htwt_cat) is.vector(htwt_cat) #htwtdata1<-head(htwtdata,6) cbind(htwtdata[c(1:3,c(13:15)),],htwt_cat[c(1:3,c(13:15))]) #print(htwtdata1[1:3]) htwt_cat[1:6] htwt_cat <- ifelse(height > 67 || weight > 150, "high", "low") htwt_cat # Notice that in the above ifelse statement only the first element in the series was computed. htwt_cat <- ifelse(height > 57 || weight > 110, "high", "low") htwt_cat #This can also be extended to include multiple conditions. Suppose we have the following data: final_score<- c(39, 51, 60, 65, 72, 78, 79, 83, 85, 85, 87, 89, 91, 95, 96, 97, 100, 100) passfail<-ifelse(final_score>=60, "pass", "fail") passfail grade <- ifelse(final_score < 60, "F", ifelse(final_score < 70, "D", ifelse(final_score < 80, "C", ifelse(final_score < 90, "B", "A")))) grade #if we want to resize the image we can use the below code # let's take airquality dataset which is the Daily air quality measurements in # New York, May to September 1973. for the details use ?airquality # we want to figure out which days were good air quality days (1) or bad air # quality (0), based on a cutoff of ozone levels above 60ppb. numdays <- nrow(airquality) print(numdays) # creates an object which will store the vector goodair <- numeric(numdays) print(goodair) #Does the command above work? Why/why not? #Let's check the Ozone variable. What do you notice below? airquality$Ozone # When there are missing values, many operations in R fail. One way to get around # this is to create a new data frame that deletes all the rows corresponding to # observations with missing rows. This can be done by means of the command # 'na.omit' airqualfull = na.omit(airquality) dim(airqualfull) dim(airquality) #Now let's try doing this again with the data with the complete cases. numdays = nrow(airqualfull) numdays print(numdays) goodair = numeric(numdays) # initialize the vector for(i in 1:numdays) if (airqualfull$Ozone[i] >60) goodair[i] = 0 else goodair[i] = 1 goodair # At this point we might be interested in which days were the ones with good air # quality. The 'which' command returns a set of indices corresponding to the # condition specified. We can then use the indices to find the day of the month # this corresponds to which(goodair == 1) ## notice the double "=" signs! goodindices <- which(goodair == 1) airqualfull[goodindices,] airquality$Temp for(i in 1:numdays) ifelse(airquality$Ozone[i] <60 && airquality$Temp<80,goodair[i] <- 1,goodair[i] <- 0) goodindices1 <- which(goodair == 1) #airquality[goodindices1,] print(goodindices1) airquality[goodindices1,] #export and save the result into working directory with file name as goodquality.txt write.table(airquality[goodindices1,], "goodquality.txt", sep=",") # check whether the file exported into working directory list.files() #open the file and check the data file.edit('goodquality.txt') library(Biobase) openPDF("Data Science Process.pdf") z <- 0 while (z < 5) { z <- z + 2 print(z) } #Another option for looping is the repeat function. An example follows: i<-1 repeat{ print(i) if( i == 15) break i<-i+1 } x <- 1 repeat{ print(x) x <- x+1 if (x == 6){ break } } # For the first exercise, write a repeat{} loop that prints all the even numbers # from 2 10, via incrementing the variable, i <- 0. i <- 0 repeat { i <- i + 2 print(i) if (i == 10) break } msg <- c("Hello") i <- 1 repeat { i <- i + 1 print(msg) if (i == 5) {break} } msg <- c('Hello') i <- 1 repeat{ print(msg) i <- i +1 if (i == 5)break } #With, i <- 1, write a while() loop that prints the odd numbers from 1 through 7. i <- 1 while(i<=7){ print(i) i <- i +2 } #Write a while() loop that increments the variable, “i“, 6 times, and prints “msg” at every iteration. i <- 1 while(i<=6){ print(msg) i <- i +1 } a <- c(15,23,78,45,124,82,75) for (i in 1:4){ print(a[i]) } for (i in seq(a)){ print(i) } for (i in seq_along(a)){ print(a[i]) } a <- "Last Checkpoint: 16 hours ago (unsaved changes)" for (letter in 2){ print(letter) } # Create a vector filled with random normal values u1 <- rnorm(30) print("This loop calculates the square of the first 10 elements of vector u1") # Initialize `usq` usq <- 0 for(i in 1:10) { # i-th element of `u1` squared into `i`-th position of `usq` usq[i] <- u1[i]*u1[i] print(usq[i]) } print(i)