find square root , cube root, fifth root and ... so on

In [1]:
#square root of 16
sqrt(16)

#Cube root of a number
4096^(1/3)

#fifth root of a number
59049^(1/5)
4
16
9
In [1]:
my.name <- readline(prompt="Enter name: ")
Enter name: sumendar
In [6]:
my.age <- readline(prompt="Enter age: ")
Enter age: 35
In [7]:
class(my.age)
my.age <- as.integer(my.age)
class(my.age)
'character'
'integer'
In [8]:
print(paste("Hi,", my.name, "next year you will be", my.age+1, "years old."))
[1] "Hi, sumendar next year you will be 36 years old."
In [14]:
my.name <- scan()
In [12]:

[1] "10,52,65,98,78,"
In [9]:
my.age <- readline(prompt="Enter age: ")

# convert character into integer
my.age <- as.integer(my.age)

print(paste("Hi,", my.name, "next year you will be", my.age+1, "years old."))
Enter name: sumendar
Enter age: 35
[1] "Hi, sumendar next year you will be 36 years old."
In [5]:
v<-c(a = 1:10)
m <- matrix(c(1:10, 11:20), nrow = 10, ncol = 2)
a <- array(1:24, c(4,4,3))
d <- data.frame(col1 = c(1,2,3,4), col2= c("ram", "raheem", "bheem", "Jai"), col3=c(TRUE,FALSE,FALSE,TRUE) )
l <-  list(v, m, a, d)
In [15]:
print(apply(d, 2, class))
       col1        col2        col3 
"character" "character" "character" 
In [12]:
print(lapply(d, class))
print(lapply(l, class))
$col1
[1] "numeric"

$col2
[1] "factor"

$col3
[1] "logical"

[[1]]
[1] "integer"

[[2]]
[1] "matrix"

[[3]]
[1] "array"

[[4]]
[1] "data.frame"

In [2]:
lapply(myVect, mean)
$a1
1
$a2
2
$a3
3
$a4
4
$a5
5
$a6
6
$a7
7
$a8
8
$a9
9
$a10
10

beautiful LATEX, HTML and ASCII tables from R statistical output

In [7]:
library(stargazer)
mydata <- mtcars
stargazer(mydata, type = "text", title="Descriptive statistics", digits=1, out="table1.txt")
Please cite as: 

 Hlavac, Marek (2015). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2. http://CRAN.R-project.org/package=stargazer 

Descriptive statistics
======================================
Statistic N  Mean  St. Dev. Min   Max 
--------------------------------------
mpg       32 20.1    6.0    10.4 33.9 
cyl       32  6.2    1.8     4     8  
disp      32 230.7  123.9   71.1 472.0
hp        32 146.7   68.6    52   335 
drat      32  3.6    0.5    2.8   4.9 
wt        32  3.2    1.0    1.5   5.4 
qsec      32 17.8    1.8    14.5 22.9 
vs        32  0.4    0.5     0     1  
am        32  0.4    0.5     0     1  
gear      32  3.7    0.7     3     5  
carb      32  2.8    1.6     1     8  
--------------------------------------
In [1]:
mean(c(70,72,74,76,80,114))
81
In [2]:
median(c(70,72,74,76,80,114))
75
In [5]:
library(DescTools)
Mode(c(70,72,74,76,80,114))
  1. 70
  2. 72
  3. 74
  4. 76
  5. 80
  6. 114
In [3]:
hist(c(70,72,74,76,80,114))
In [1]:
mySD <-  c( 5, 4, 7, 6, 12, 45, 8, 345, 23, 45, 122, 221, 200 )
In [10]:
hist(mySD)
abline(v=mean(mySD), col="blue")
abline(v=sd(mySD), col="red")
In [5]:
mean(mySD)
sd(mySD)
80.2307692307692
109.537781797084

Mean & SD

In [11]:
head(mtcars)
mpgcyldisphpdratwtqsecvsamgearcarb
Mazda RX421.0 6 160 110 3.90 2.62016.460 1 4 4
Mazda RX4 Wag21.0 6 160 110 3.90 2.87517.020 1 4 4
Datsun 71022.8 4 108 93 3.85 2.32018.611 1 4 1
Hornet 4 Drive21.4 6 258 110 3.08 3.21519.441 0 3 1
Hornet Sportabout18.7 8 360 175 3.15 3.44017.020 0 3 2
Valiant18.1 6 225 105 2.76 3.46020.221 0 3 1
In [19]:
round(mean(mtcars$disp))
231
In [20]:
round(sd(mtcars$disp))
124
In [14]:
mySum<- sum(mtcars$disp)
mySum
7383.1
In [15]:
myAVG<-mySum/nrow(mtcars)
myAVG
230.721875
In [ ]:
round(sqrt(round(mean((round(mean(mtcars$disp)) - round(mtcars$disp))^2))))
In [31]:
data.entry(mtcars$disp)
In [35]:
vi(mtcars$disp)
Error in .External2(C_edit, name, file, title, editor): unable to run editor 'vi'
Traceback:

1. vi(mtcars$disp)
2. edit.default(name, file, editor = "vi")
In [34]:
edit(mtcars$disp)
Error in edit(mtcars$disp): 'edit()' not yet supported in the Jupyter R kernel
Traceback:

1. edit(mtcars$disp)
2. stop(sQuote("edit()"), " not yet supported in the Jupyter R kernel")
In [1]:
mtcars1 <- mtcars
In [2]:
mtcars1$RoundDisp <- round(mtcars1$disp)
In [4]:
mean(mtcars1$RoundDisp)
230.78125
In [1]:
library(lattice)
In [2]:
attach(mtcars)
In [5]:
# create factors with value labels 
gear.f<-factor(gear,levels=c(3,4,5),
  	labels=c("3gears","4gears","5gears")) 
cyl.f <-factor(cyl,levels=c(4,6,8),
   labels=c("4cyl","6cyl","8cyl"))
In [7]:
# kernel density plot 
densityplot(~mpg, 
  	main="Density Plot", 
  	xlab="Miles per Gallon")
In [8]:
# kernel density plots by factor level 
densityplot(~mpg|cyl.f, 
  	main="Density Plot by Number of Cylinders",
   xlab="Miles per Gallon")
In [10]:
# boxplots for each combination of two factors 
bwplot(cyl.f~mpg|gear.f,
  	ylab="Cylinders", xlab="Miles per Gallon", 
   main="Mileage by Cylinders and Gears", 
   layout=(c(1,3)))
In [14]:
install.packages("mlmRev",repos = "https://cran.cnr.berkeley.edu/")
Installing package into 'C:/Users/Suman/Documents/R/win-library/3.4'
(as 'lib' is unspecified)
also installing the dependencies 'minqa', 'nloptr', 'RcppEigen', 'lme4'

package 'minqa' successfully unpacked and MD5 sums checked
package 'nloptr' successfully unpacked and MD5 sums checked
package 'RcppEigen' successfully unpacked and MD5 sums checked
package 'lme4' successfully unpacked and MD5 sums checked
package 'mlmRev' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\Suman\AppData\Local\Temp\RtmpMZkfzy\downloaded_packages
In [15]:
data(Chem97, package = "mlmRev")
In [16]:
dim(Chem97)
  1. 31022
  2. 8
In [17]:
head(Chem97)
leaschoolstudentscoregenderagegcsescoregcsecnt
1 1 1 4 F 3 6.625 0.3393157
1 1 2 10 F -3 7.625 1.3393157
1 1 3 10 F -4 7.250 0.9643157
1 1 4 10 F -2 7.500 1.2143157
1 1 5 8 F -1 6.444 0.1583157
1 1 6 10 F 4 7.750 1.4643157
In [18]:
head(Chem97[c("score", "gender", "gcsescore")])
scoregendergcsescore
4 F 6.625
10 F 7.625
10 F 7.250
10 F 7.500
8 F 6.444
10 F 7.750
In [19]:
mtcars
mpgcyldisphpdratwtqsecvsamgearcarb
Mazda RX421.0 6 160.0110 3.90 2.62016.460 1 4 4
Mazda RX4 Wag21.0 6 160.0110 3.90 2.87517.020 1 4 4
Datsun 71022.8 4 108.0 93 3.85 2.32018.611 1 4 1
Hornet 4 Drive21.4 6 258.0110 3.08 3.21519.441 0 3 1
Hornet Sportabout18.7 8 360.0175 3.15 3.44017.020 0 3 2
Valiant18.1 6 225.0105 2.76 3.46020.221 0 3 1
Duster 36014.3 8 360.0245 3.21 3.57015.840 0 3 4
Merc 240D24.4 4 146.7 62 3.69 3.19020.001 0 4 2
Merc 23022.8 4 140.8 95 3.92 3.15022.901 0 4 2
Merc 28019.2 6 167.6123 3.92 3.44018.301 0 4 4
Merc 280C17.8 6 167.6123 3.92 3.44018.901 0 4 4
Merc 450SE16.4 8 275.8180 3.07 4.07017.400 0 3 3
Merc 450SL17.3 8 275.8180 3.07 3.73017.600 0 3 3
Merc 450SLC15.2 8 275.8180 3.07 3.78018.000 0 3 3
Cadillac Fleetwood10.4 8 472.0205 2.93 5.25017.980 0 3 4
Lincoln Continental10.4 8 460.0215 3.00 5.42417.820 0 3 4
Chrysler Imperial14.7 8 440.0230 3.23 5.34517.420 0 3 4
Fiat 12832.4 4 78.7 66 4.08 2.20019.471 1 4 1
Honda Civic30.4 4 75.7 52 4.93 1.61518.521 1 4 2
Toyota Corolla33.9 4 71.1 65 4.22 1.83519.901 1 4 1
Toyota Corona21.5 4 120.1 97 3.70 2.46520.011 0 3 1
Dodge Challenger15.5 8 318.0150 2.76 3.52016.870 0 3 2
AMC Javelin15.2 8 304.0150 3.15 3.43517.300 0 3 2
Camaro Z2813.3 8 350.0245 3.73 3.84015.410 0 3 4
Pontiac Firebird19.2 8 400.0175 3.08 3.84517.050 0 3 2
Fiat X1-927.3 4 79.0 66 4.08 1.93518.901 1 4 1
Porsche 914-226.0 4 120.3 91 4.43 2.14016.700 1 5 2
Lotus Europa30.4 4 95.1113 3.77 1.51316.901 1 5 2
Ford Pantera L15.8 8 351.0264 4.22 3.17014.500 1 5 4
Ferrari Dino19.7 6 145.0175 3.62 2.77015.500 1 5 6
Maserati Bora15.0 8 301.0335 3.54 3.57014.600 1 5 8
Volvo 142E21.4 4 121.0109 4.11 2.78018.601 1 4 2
In [ ]:
mtcars[c("mpg","disp")]
In [ ]:
mtcars["mpg"]
In [23]:
mtcars$mpg
  1. 21
  2. 21
  3. 22.8
  4. 21.4
  5. 18.7
  6. 18.1
  7. 14.3
  8. 24.4
  9. 22.8
  10. 19.2
  11. 17.8
  12. 16.4
  13. 17.3
  14. 15.2
  15. 10.4
  16. 10.4
  17. 14.7
  18. 32.4
  19. 30.4
  20. 33.9
  21. 21.5
  22. 15.5
  23. 15.2
  24. 13.3
  25. 19.2
  26. 27.3
  27. 26
  28. 30.4
  29. 15.8
  30. 19.7
  31. 15
  32. 21.4
In [ ]:
#########################
#Libraries/Dependencies#
########################
install.packages("magrittr")
install.packages("dplyr")
install.packages("checkpoint")
#Adds %>% forward pipe operator
library("magrittr") 
#adds grouping and manipulations
library("dplyr")
library("checkpoint")
checkpoint("2016-03-29")
#########################
#'%>%' The Pipe Operator#
#########################

mult = function(x,y){ x * y}
#2 * 6 * 5 * 10
#Traditional Way
mult(10, mult(5, mult(6, 2)))
#Piped Method
#output into first parameter of next function
2 %>% 
mult(6) %>%
mult(5) %>%
mult(10)


###################
#Data In & Rename#
##################

#Read Data
m311 <- read.csv("C:\\data\\Miami_311.csv", 
header=TRUE, 
sep=",")
#Show bad Formatting
m311 
#Fix Formatting
m311 <- m311 %>% tbl_df() 
#Summary of Data Set
summary(m311$Case.Owner)
summary(m311)
#Renaming Part 1: Observations with Look Up Table (LUT)
caseOwnerLUT = c("Animal_Services" = "Animal Services",
"Public_Works_Construction-6-60" = "Construction",
"Public_Works_Mosquito_Control-8-60" = "Mosquito Control",
"Public_Works_Traffic_Engineering-10-60" = "Traffic Engineering",
"Public_Works_Waste_Management" = "Waste Management",
"Regulatory_and_Economic_Resources" = "Regulatory and Economic Resources",
"Community_Information_and_Outreach" = "Information and Outreach",
"Public_Works_Hwy_Engineering-5-60" = "Traffic Engineering",
"Public_Works_Road_And_Bridges-16-60" = "Road and Bridges",
"Public_Works_traffic_Signals_And_Signs-15-60" = "Traffic Engineering",
"RAAM-27-93" = "RAAM",
"Waste_Management" = "Waste Management"
) 
m311$Case.Owner = caseOwnerLUT[m311$Case.Owner] %>%
factor()
summary(m311$Case.Owner)
labels(m311)
#Renaming Part 2: Features
m311 = m311 %>% 
rename(CreatedDate = Ticket.Created.Date...Time) %>%
rename(LastUpdatedDate = Ticket.Last.Updated.Date...Time) %>%
rename(ClosedDate = Ticket.Closed.Date...Time) %>%
rename(Address = Location.Geo..Coded) %>%
rename(District = Neighborhood...District...Ward...etc.) %>%
rename(Zip = Zip...Postal.Code) %>%
rename(State = State...Province)

#Sampling
m311f = m311 %>% sample_frac(.1)
m311n = m311 %>% sample_n(1000)


#######################
#Feature Manipulation#
######################

#Select a few columns
m311 %>% 
select(Case.Owner, CreatedDate)

#Exclude a column
m311 %>%
select(-ClosedDate)

#Select with helper
x = m311 %>% 
select(contains("Issue"), Case.Owner)
x
distinct(x)

#Mutate
x = m311 %>%
mutate(
GoalKpi = (Goal.Days - Actual.Completed.Days) / Goal.Days
)
summary(x$GoalKpi) #obviously some issues with this calculation, like divide by zero perhaps?


###########################
#Observation Manipulation#
##########################

goals = m311 %>% 
select(Case.Owner, Issue.Type, Goal.Days, Actual.Completed.Days, Ticket.Status)

zeroGoal = goals %>%
filter(Goal.Days <= 0)
#how many are there?
nrow(zeroGoal)
summary(zeroGoal$Ticket.Status)
#Lets reduce our factors a bit here.
zeroGoal$Issue.Type = zeroGoal$Issue.Type %>% factor()
zeroGoal$Ticket.Status = factor(zeroGoal$Ticket.Status)

zeroGoal %>%
arrange(desc(Actual.Completed.Days)) %>%
glimpse()

zeroGoal$Actual.Completed.Days = as.numeric(zeroGoal$Actual.Completed.Days)

#Check Out Summaries
zeroGoal %>%
summarise(
mean = mean(Actual.Completed.Days, na.rm = TRUE), #easy way
sd = sd(Actual.Completed.Days, na.rm = TRUE),
median = median(Actual.Completed.Days[!is.na(Actual.Completed.Days)]), #hard way
observations = n(),
NasInActualCompleted = sum(is.na(Actual.Completed.Days))
)

#####################
#Group Manipulation#
####################

#A few properties about groups
x = zeroGoal %>%
group_by(Case.Owner)
class(x)
x
#looks like a dataframe

#but you can do grouped operations
zeroGoal %>%
group_by(Case.Owner) %>%
summarise(
obs = n(),
avg.comp.days = mean(Actual.Completed.Days, na.rm = TRUE),
max.comp.days = max(Actual.Completed.Days, na.rm = TRUE),
min.comp.days = min(Actual.Completed.Days, na.rm = TRUE)
) %>%
arrange(avg.comp.days)


#############################
#Practice Exersices - Part 1#
#############################
# For Each Year, What were #
# the top 3 Issues and #
# average resolution time #
############################

#Hints
install.packages("lubridate")
library("lubridate")
summary(m311$CreatedDate)

m311f$CreatedDate2 = as.Date(m311f$CreatedDate,
format="%m/%d/%Y")

year(m311f$CreatedDate2)

m311 %>% 
select(Ticket.ID, Issue.Type, Actual.Completed.Days) %>%
top_n(3, Actual.Completed.Days) %>%
arrange(desc(Actual.Completed.Days))

##################
#Part 1 - Answer#
#################
m311 %>%
mutate(
Year = year(as.Date(CreatedDate, format="%m/%d/%Y"))
) %>%
group_by(Year, Issue.Type) %>%
summarise(
obs = n(),
avgResTime = mean(Actual.Completed.Days, na.rm = TRUE)
) %>%
top_n(3, obs) %>%
arrange(desc(obs))

#############################
#Practice Exersices - Part 2#
#############################
# What are top 3 methods we #
# we will receive gps #
#############################


##################
#Part 2 - Answer#
#################

m311 %>%
group_by(Method.Received) %>%
summarise(
gpsObs = sum((!is.na(Latitude) & !is.na(Longitude))) 
) %>%
top_n(3, gpsObs) %>%
arrange(desc(gpsObs))

##############################
#Practice Exersices - Part 3 #
##############################
# Which Districts service #
# the most requests for each #
# department and what are #
# the top 3 issues #
##############################

##################
#Part 3 - Answer #
##################

m311 %>%
group_by(Case.Owner, District, Issue.Type) %>%
summarise(
issueObs = n()
) %>% 
arrange(desc(issueObs)) %>%
summarise(
totalObs = sum(issueObs),
top_issues = toString(Issue.Type[1:3])
) %>%
top_n(3, totalObs) %>%
arrange(desc(totalObs)) %>% glimpse()

x[6,] %>% glimpse()