### find square root , cube root, fifth root and ... so on¶

In [1]:
#square root of 16
sqrt(16)

#Cube root of a number
4096^(1/3)

#fifth root of a number
59049^(1/5)

4
16
9
In [1]:
my.name <- readline(prompt="Enter name: ")

Enter name: sumendar

In [6]:
my.age <- readline(prompt="Enter age: ")

Enter age: 35

In [7]:
class(my.age)
my.age <- as.integer(my.age)
class(my.age)

'character'
'integer'
In [8]:
print(paste("Hi,", my.name, "next year you will be", my.age+1, "years old."))

[1] "Hi, sumendar next year you will be 36 years old."

In [14]:
my.name <- scan()

In [12]:


[1] "10,52,65,98,78,"

In [9]:
my.age <- readline(prompt="Enter age: ")

# convert character into integer
my.age <- as.integer(my.age)

print(paste("Hi,", my.name, "next year you will be", my.age+1, "years old."))

Enter name: sumendar
Enter age: 35
[1] "Hi, sumendar next year you will be 36 years old."

In [5]:
v<-c(a = 1:10)
m <- matrix(c(1:10, 11:20), nrow = 10, ncol = 2)
a <- array(1:24, c(4,4,3))
d <- data.frame(col1 = c(1,2,3,4), col2= c("ram", "raheem", "bheem", "Jai"), col3=c(TRUE,FALSE,FALSE,TRUE) )
l <-  list(v, m, a, d)

In [15]:
print(apply(d, 2, class))

       col1        col2        col3
"character" "character" "character"

In [12]:
print(lapply(d, class))
print(lapply(l, class))

$col1 [1] "numeric"$col2
[1] "factor"

$col3 [1] "logical" [[1]] [1] "integer" [[2]] [1] "matrix" [[3]] [1] "array" [[4]] [1] "data.frame"  In [2]: lapply(myVect, mean) $a1
1
$a2 2$a3
3
$a4 4$a5
5
$a6 6$a7
7
$a8 8$a9
9
$a10 10 #### beautiful LATEX, HTML and ASCII tables from R statistical output¶ In [7]: library(stargazer) mydata <- mtcars stargazer(mydata, type = "text", title="Descriptive statistics", digits=1, out="table1.txt")  Please cite as: Hlavac, Marek (2015). stargazer: Well-Formatted Regression and Summary Statistics Tables. R package version 5.2. http://CRAN.R-project.org/package=stargazer  Descriptive statistics ====================================== Statistic N Mean St. Dev. Min Max -------------------------------------- mpg 32 20.1 6.0 10.4 33.9 cyl 32 6.2 1.8 4 8 disp 32 230.7 123.9 71.1 472.0 hp 32 146.7 68.6 52 335 drat 32 3.6 0.5 2.8 4.9 wt 32 3.2 1.0 1.5 5.4 qsec 32 17.8 1.8 14.5 22.9 vs 32 0.4 0.5 0 1 am 32 0.4 0.5 0 1 gear 32 3.7 0.7 3 5 carb 32 2.8 1.6 1 8 --------------------------------------  In [1]: mean(c(70,72,74,76,80,114))  81 In [2]: median(c(70,72,74,76,80,114))  75 In [5]: library(DescTools) Mode(c(70,72,74,76,80,114))  1. 70 2. 72 3. 74 4. 76 5. 80 6. 114 In [3]: hist(c(70,72,74,76,80,114))  In [1]: mySD <- c( 5, 4, 7, 6, 12, 45, 8, 345, 23, 45, 122, 221, 200 )  In [10]: hist(mySD) abline(v=mean(mySD), col="blue") abline(v=sd(mySD), col="red")  In [5]: mean(mySD) sd(mySD)  80.2307692307692 109.537781797084 ### Mean & SD¶ In [11]: head(mtcars)  mpgcyldisphpdratwtqsecvsamgearcarb Mazda RX421.0 6 160 110 3.90 2.62016.460 1 4 4 Mazda RX4 Wag21.0 6 160 110 3.90 2.87517.020 1 4 4 Datsun 71022.8 4 108 93 3.85 2.32018.611 1 4 1 Hornet 4 Drive21.4 6 258 110 3.08 3.21519.441 0 3 1 Hornet Sportabout18.7 8 360 175 3.15 3.44017.020 0 3 2 Valiant18.1 6 225 105 2.76 3.46020.221 0 3 1 In [19]: round(mean(mtcars$disp))

231
In [20]:
round(sd(mtcars$disp))  124 In [14]: mySum<- sum(mtcars$disp)
mySum

7383.1
In [15]:
myAVG<-mySum/nrow(mtcars)
myAVG

230.721875
In [ ]:
round(sqrt(round(mean((round(mean(mtcars$disp)) - round(mtcars$disp))^2))))

In [31]:
data.entry(mtcars$disp)  In [35]: vi(mtcars$disp)

Error in .External2(C_edit, name, file, title, editor): unable to run editor 'vi'
Traceback:

1. vi(mtcars$disp) 2. edit.default(name, file, editor = "vi") In [34]: edit(mtcars$disp)

Error in edit(mtcars$disp): 'edit()' not yet supported in the Jupyter R kernel Traceback: 1. edit(mtcars$disp)
2. stop(sQuote("edit()"), " not yet supported in the Jupyter R kernel")
In [1]:
mtcars1 <- mtcars

In [2]:
mtcars1$RoundDisp <- round(mtcars1$disp)

In [4]:
mean(mtcars1$RoundDisp)  230.78125 In [1]: library(lattice)  In [2]: attach(mtcars)  In [5]: # create factors with value labels gear.f<-factor(gear,levels=c(3,4,5), labels=c("3gears","4gears","5gears")) cyl.f <-factor(cyl,levels=c(4,6,8), labels=c("4cyl","6cyl","8cyl"))  In [7]: # kernel density plot densityplot(~mpg, main="Density Plot", xlab="Miles per Gallon")  In [8]: # kernel density plots by factor level densityplot(~mpg|cyl.f, main="Density Plot by Number of Cylinders", xlab="Miles per Gallon")  In [10]: # boxplots for each combination of two factors bwplot(cyl.f~mpg|gear.f, ylab="Cylinders", xlab="Miles per Gallon", main="Mileage by Cylinders and Gears", layout=(c(1,3)))  In [14]: install.packages("mlmRev",repos = "https://cran.cnr.berkeley.edu/")  Installing package into 'C:/Users/Suman/Documents/R/win-library/3.4' (as 'lib' is unspecified) also installing the dependencies 'minqa', 'nloptr', 'RcppEigen', 'lme4'  package 'minqa' successfully unpacked and MD5 sums checked package 'nloptr' successfully unpacked and MD5 sums checked package 'RcppEigen' successfully unpacked and MD5 sums checked package 'lme4' successfully unpacked and MD5 sums checked package 'mlmRev' successfully unpacked and MD5 sums checked The downloaded binary packages are in C:\Users\Suman\AppData\Local\Temp\RtmpMZkfzy\downloaded_packages  In [15]: data(Chem97, package = "mlmRev")  In [16]: dim(Chem97)  1. 31022 2. 8 In [17]: head(Chem97)  leaschoolstudentscoregenderagegcsescoregcsecnt 1 1 1 4 F 3 6.625 0.3393157 1 1 2 10 F -3 7.625 1.3393157 1 1 3 10 F -4 7.250 0.9643157 1 1 4 10 F -2 7.500 1.2143157 1 1 5 8 F -1 6.444 0.1583157 1 1 6 10 F 4 7.750 1.4643157 In [18]: head(Chem97[c("score", "gender", "gcsescore")])  scoregendergcsescore 4 F 6.625 10 F 7.625 10 F 7.250 10 F 7.500 8 F 6.444 10 F 7.750 In [19]: mtcars  mpgcyldisphpdratwtqsecvsamgearcarb Mazda RX421.0 6 160.0110 3.90 2.62016.460 1 4 4 Mazda RX4 Wag21.0 6 160.0110 3.90 2.87517.020 1 4 4 Datsun 71022.8 4 108.0 93 3.85 2.32018.611 1 4 1 Hornet 4 Drive21.4 6 258.0110 3.08 3.21519.441 0 3 1 Hornet Sportabout18.7 8 360.0175 3.15 3.44017.020 0 3 2 Valiant18.1 6 225.0105 2.76 3.46020.221 0 3 1 Duster 36014.3 8 360.0245 3.21 3.57015.840 0 3 4 Merc 240D24.4 4 146.7 62 3.69 3.19020.001 0 4 2 Merc 23022.8 4 140.8 95 3.92 3.15022.901 0 4 2 Merc 28019.2 6 167.6123 3.92 3.44018.301 0 4 4 Merc 280C17.8 6 167.6123 3.92 3.44018.901 0 4 4 Merc 450SE16.4 8 275.8180 3.07 4.07017.400 0 3 3 Merc 450SL17.3 8 275.8180 3.07 3.73017.600 0 3 3 Merc 450SLC15.2 8 275.8180 3.07 3.78018.000 0 3 3 Cadillac Fleetwood10.4 8 472.0205 2.93 5.25017.980 0 3 4 Lincoln Continental10.4 8 460.0215 3.00 5.42417.820 0 3 4 Chrysler Imperial14.7 8 440.0230 3.23 5.34517.420 0 3 4 Fiat 12832.4 4 78.7 66 4.08 2.20019.471 1 4 1 Honda Civic30.4 4 75.7 52 4.93 1.61518.521 1 4 2 Toyota Corolla33.9 4 71.1 65 4.22 1.83519.901 1 4 1 Toyota Corona21.5 4 120.1 97 3.70 2.46520.011 0 3 1 Dodge Challenger15.5 8 318.0150 2.76 3.52016.870 0 3 2 AMC Javelin15.2 8 304.0150 3.15 3.43517.300 0 3 2 Camaro Z2813.3 8 350.0245 3.73 3.84015.410 0 3 4 Pontiac Firebird19.2 8 400.0175 3.08 3.84517.050 0 3 2 Fiat X1-927.3 4 79.0 66 4.08 1.93518.901 1 4 1 Porsche 914-226.0 4 120.3 91 4.43 2.14016.700 1 5 2 Lotus Europa30.4 4 95.1113 3.77 1.51316.901 1 5 2 Ford Pantera L15.8 8 351.0264 4.22 3.17014.500 1 5 4 Ferrari Dino19.7 6 145.0175 3.62 2.77015.500 1 5 6 Maserati Bora15.0 8 301.0335 3.54 3.57014.600 1 5 8 Volvo 142E21.4 4 121.0109 4.11 2.78018.601 1 4 2 In [ ]: mtcars[c("mpg","disp")]  In [ ]: mtcars["mpg"]  In [23]: mtcars$mpg

1. 21
2. 21
3. 22.8
4. 21.4
5. 18.7
6. 18.1
7. 14.3
8. 24.4
9. 22.8
10. 19.2
11. 17.8
12. 16.4
13. 17.3
14. 15.2
15. 10.4
16. 10.4
17. 14.7
18. 32.4
19. 30.4
20. 33.9
21. 21.5
22. 15.5
23. 15.2
24. 13.3
25. 19.2
26. 27.3
27. 26
28. 30.4
29. 15.8
30. 19.7
31. 15
32. 21.4
In [ ]:
#########################
#Libraries/Dependencies#
########################
install.packages("magrittr")
install.packages("dplyr")
install.packages("checkpoint")
library("magrittr")
library("dplyr")
library("checkpoint")
checkpoint("2016-03-29")
#########################
#'%>%' The Pipe Operator#
#########################

mult = function(x,y){ x * y}
#2 * 6 * 5 * 10
mult(10, mult(5, mult(6, 2)))
#Piped Method
#output into first parameter of next function
2 %>%
mult(6) %>%
mult(5) %>%
mult(10)

###################
#Data In & Rename#
##################

sep=",")
m311
#Fix Formatting
m311 <- m311 %>% tbl_df()
#Summary of Data Set
summary(m311$Case.Owner) summary(m311) #Renaming Part 1: Observations with Look Up Table (LUT) caseOwnerLUT = c("Animal_Services" = "Animal Services", "Public_Works_Construction-6-60" = "Construction", "Public_Works_Mosquito_Control-8-60" = "Mosquito Control", "Public_Works_Traffic_Engineering-10-60" = "Traffic Engineering", "Public_Works_Waste_Management" = "Waste Management", "Regulatory_and_Economic_Resources" = "Regulatory and Economic Resources", "Community_Information_and_Outreach" = "Information and Outreach", "Public_Works_Hwy_Engineering-5-60" = "Traffic Engineering", "Public_Works_Road_And_Bridges-16-60" = "Road and Bridges", "Public_Works_traffic_Signals_And_Signs-15-60" = "Traffic Engineering", "RAAM-27-93" = "RAAM", "Waste_Management" = "Waste Management" ) m311$Case.Owner = caseOwnerLUT[m311$Case.Owner] %>% factor() summary(m311$Case.Owner)
labels(m311)
#Renaming Part 2: Features
m311 = m311 %>%
rename(CreatedDate = Ticket.Created.Date...Time) %>%
rename(LastUpdatedDate = Ticket.Last.Updated.Date...Time) %>%
rename(ClosedDate = Ticket.Closed.Date...Time) %>%
rename(District = Neighborhood...District...Ward...etc.) %>%
rename(Zip = Zip...Postal.Code) %>%
rename(State = State...Province)

#Sampling
m311f = m311 %>% sample_frac(.1)
m311n = m311 %>% sample_n(1000)

#######################
#Feature Manipulation#
######################

#Select a few columns
m311 %>%
select(Case.Owner, CreatedDate)

#Exclude a column
m311 %>%
select(-ClosedDate)

#Select with helper
x = m311 %>%
select(contains("Issue"), Case.Owner)
x
distinct(x)

#Mutate
x = m311 %>%
mutate(
GoalKpi = (Goal.Days - Actual.Completed.Days) / Goal.Days
)
summary(x$GoalKpi) #obviously some issues with this calculation, like divide by zero perhaps? ########################### #Observation Manipulation# ########################## goals = m311 %>% select(Case.Owner, Issue.Type, Goal.Days, Actual.Completed.Days, Ticket.Status) zeroGoal = goals %>% filter(Goal.Days <= 0) #how many are there? nrow(zeroGoal) summary(zeroGoal$Ticket.Status)
#Lets reduce our factors a bit here.
zeroGoal$Issue.Type = zeroGoal$Issue.Type %>% factor()
zeroGoal$Ticket.Status = factor(zeroGoal$Ticket.Status)

zeroGoal %>%
arrange(desc(Actual.Completed.Days)) %>%
glimpse()

zeroGoal$Actual.Completed.Days = as.numeric(zeroGoal$Actual.Completed.Days)

#Check Out Summaries
zeroGoal %>%
summarise(
mean = mean(Actual.Completed.Days, na.rm = TRUE), #easy way
sd = sd(Actual.Completed.Days, na.rm = TRUE),
median = median(Actual.Completed.Days[!is.na(Actual.Completed.Days)]), #hard way
observations = n(),
NasInActualCompleted = sum(is.na(Actual.Completed.Days))
)

#####################
#Group Manipulation#
####################

x = zeroGoal %>%
group_by(Case.Owner)
class(x)
x
#looks like a dataframe

#but you can do grouped operations
zeroGoal %>%
group_by(Case.Owner) %>%
summarise(
obs = n(),
avg.comp.days = mean(Actual.Completed.Days, na.rm = TRUE),
max.comp.days = max(Actual.Completed.Days, na.rm = TRUE),
min.comp.days = min(Actual.Completed.Days, na.rm = TRUE)
) %>%
arrange(avg.comp.days)

#############################
#Practice Exersices - Part 1#
#############################
# For Each Year, What were #
# the top 3 Issues and #
# average resolution time #
############################

#Hints
install.packages("lubridate")
library("lubridate")
summary(m311$CreatedDate) m311f$CreatedDate2 = as.Date(m311f$CreatedDate, format="%m/%d/%Y") year(m311f$CreatedDate2)

m311 %>%
select(Ticket.ID, Issue.Type, Actual.Completed.Days) %>%
top_n(3, Actual.Completed.Days) %>%
arrange(desc(Actual.Completed.Days))

##################
#################
m311 %>%
mutate(
Year = year(as.Date(CreatedDate, format="%m/%d/%Y"))
) %>%
group_by(Year, Issue.Type) %>%
summarise(
obs = n(),
avgResTime = mean(Actual.Completed.Days, na.rm = TRUE)
) %>%
top_n(3, obs) %>%
arrange(desc(obs))

#############################
#Practice Exersices - Part 2#
#############################
# What are top 3 methods we #
# we will receive gps #
#############################

##################
#################

m311 %>%
summarise(
gpsObs = sum((!is.na(Latitude) & !is.na(Longitude)))
) %>%
top_n(3, gpsObs) %>%
arrange(desc(gpsObs))

##############################
#Practice Exersices - Part 3 #
##############################
# Which Districts service #
# the most requests for each #
# department and what are #
# the top 3 issues #
##############################

##################
##################

m311 %>%
group_by(Case.Owner, District, Issue.Type) %>%
summarise(
issueObs = n()
) %>%
arrange(desc(issueObs)) %>%
summarise(
totalObs = sum(issueObs),
top_issues = toString(Issue.Type[1:3])
) %>%
top_n(3, totalObs) %>%
arrange(desc(totalObs)) %>% glimpse()

x[6,] %>% glimpse()