library(tidyverse)
Loading tidyverse: ggplot2 Loading tidyverse: tibble Loading tidyverse: tidyr Loading tidyverse: readr Loading tidyverse: purrr Loading tidyverse: dplyr Warning message: "package 'tidyr' was built under R version 3.4.2"Warning message: "package 'purrr' was built under R version 3.4.2"Warning message: "package 'dplyr' was built under R version 3.4.2"Conflicts with tidy packages --------------------------------------------------- filter(): dplyr, stats lag(): dplyr, stats
splitsfile <- "~/Downloads/RSA 2018 - DDSS - Presentation (5).lss"
splits <- XML::xmlParse(splitsfile)
splits <- XML::xmlToList(splits)
class(splits[['AttemptHistory']][1][['Attempt']][['.attrs']][['ended']])
chunk2 <- do.call(rbind, lapply(1:length(splits[['AttemptHistory']]), function(i){
# print(i)
attempt <- splits[['AttemptHistory']][[i]]
# print(names(splits[['AttemptHistory']][[i]]))
if ('RealTime' %in% names(attempt) | 'PauseTime' %in% names(attempt)) {
df <- tibble(attemptID=attempt[['.attrs']][['id']],
start=attempt[['.attrs']][['started']],
end=attempt[['.attrs']][['ended']])
} else {
df <- tibble(attemptID=attempt[['id']],
start=attempt[['started']],
end=attempt[['ended']])
}
if ('RealTime' %in% names(attempt)) {
df$runtime <- attempt[['RealTime']]
} else {
df$runtime <- NA
}
df
}))
chunk2$runtime <- unlist(lapply(chunk2$runtime, function(t) {
dt <- unlist(stringr::str_split(t, ":"))
as.difftime(as.double(dt[1])*60*60 + as.double(dt[2])*60 + as.double(dt[3]), units="secs")
})) %>% as.difftime(units="secs")
chunk2$start <- lubridate::mdy_hms(chunk2$start)
chunk2$end <- lubridate::mdy_hms(chunk2$end)
chunk <- do.call(rbind, lapply(splits[['Segments']], function(segments) {
segments.df <- do.call(rbind, lapply(segments[['SegmentHistory']], function(segment) {
if ('RealTime' %in% names(segment))
data.frame(`attemptID` = segment$.attrs['id'],
RealTime = segment$RealTime,
time = segment$S)
}))
segments.df$name <- rep(segments$Name, nrow(segments.df))
segments.df
}))
chunk$RealTime <- unlist(lapply(chunk$RealTime, function(t) {
dt <- unlist(stringr::str_split(t, ":"))
as.difftime(as.double(dt[1])*60*60 + as.double(dt[2])*60 + as.double(dt[3]), units="secs")
})) %>% as.difftime(units="mins")
chunk$name <- factor(chunk$name, levels=unique(chunk$name))
chunk <- chunk %>%
group_by(name) %>%
mutate(n=n()) %>%
ungroup()
labels <- unique(paste0(chunk$name, " - ", chunk$n, " attempts"))
names(labels) <- unique(chunk$name)
units(chunk$RealTime) <- "mins"
Error in as.difftime(chunk2$runtime): need explicit units for numeric conversion Traceback: 1. as.difftime(chunk2$runtime) 2. stop("need explicit units for numeric conversion")
glimpse(chunk)
glimpse(chunk2)
Observations: 81 Variables: 4 $ attemptID <fctr> 4, 5, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, -... $ RealTime <time> 0.075366667 mins, 0.006003333 mins, 8.620370000 mins, 7.... $ name <fctr> Introduction VMOS and Swot, Introduction VMOS and Swot, ... $ n <int> 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 1... Observations: 20 Variables: 4 $ attemptID <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", ... $ start <dttm> 2017-12-05 19:36:09, 2017-12-05 19:36:12, 2017-12-07 18:... $ end <dttm> 2017-12-05 19:36:10, 2017-12-05 19:36:13, 2017-12-07 19:... $ runtime <time> NA mins, 0.77743 mins, 2321.29524 mins, NA mins, NA mins...
ggplot(chunk) +
geom_line(aes(x=attemptID, y=RealTime, group=name, color=name)) +
labs(x="Run", y="Time in minutes", title="Presentation section times") +
scale_y_continuous(expand=c(0,0)) +
scale_x_discrete(expand=c(0,0)) +
scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) +
theme_classic() +
theme(
axis.title.y = element_text(hjust=0),
axis.title.x = element_text(hjust=0),
panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
axis.ticks = element_blank(),
legend.position="bottom"
)
chunk %>%
filter(attemptID %in% 7:20) %>% # remove columns that are incomplete
filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers
ggplot(aes(x=attemptID, y=RealTime, group=name, color=name)) +
geom_point() +
geom_smooth() +
facet_wrap(~name) +
labs(x="Run", y="Time in minutes", title="Presentation section times") +
scale_y_continuous(expand=c(0,0)) +
scale_x_discrete(expand=c(0,0)) +
scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) +
theme_classic() +
theme(
axis.title.y = element_text(hjust=0),
axis.title.x = element_text(hjust=0),
panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
axis.ticks = element_blank(),
legend.position="bottom"
)
`geom_smooth()` using method = 'loess'
chunk %>%
filter(attemptID %in% 7:20) %>% # remove columns that are incomplete
filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers
ggplot(aes(x=attemptID, y=RealTime, group=forcats::fct_rev(name), color=name)) +
geom_line(position="stack") +
labs(x="Run", y="Time in minutes", title="Stacked presentation section times") +
scale_y_continuous(expand=c(0,0)) +
scale_x_discrete(expand=c(0,0)) +
scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) +
theme_classic() +
theme(
axis.title.y = element_text(hjust=0),
axis.title.x = element_text(hjust=0),
panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
axis.ticks = element_blank(),
legend.position="bottom"
)
chunk2 %>%
mutate(runtime = as.difftime(runtime/60, units="mins")) %>%
filter(!is.na(runtime)) %>%
mutate(attemptID = as.integer(attemptID)) %>%
# filter(runtime > 20) %>%
glimpse() %>%
ggplot() +
geom_line(aes(x=attemptID, y=runtime, group=1)) +
# annotate("segment", x=-Inf, xend=Inf, y=0, yend=0) +
labs(x="Run", y="Time in minutes", title="Overall run time") +
scale_y_continuous(expand=c(0,0)) + #, limits=c(0,NA)) +
scale_x_continuous(expand=c(0,0), limits=c(3,NA)) +
theme_classic() +
theme(
axis.title.y = element_text(hjust=0),
axis.title.x = element_text(hjust=0),
panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
axis.ticks = element_blank()
)
Observations: 12 Variables: 4 $ attemptID <int> 2, 3, 7, 8, 10, 12, 13, 15, 16, 17, 19, 20 $ start <dttm> 2017-12-05 19:36:12, 2017-12-07 18:56:29, 2018-03-05 20:... $ end <dttm> 2017-12-05 19:36:13, 2017-12-07 19:35:10, 2018-03-05 20:... $ runtime <time> 0.01295717 secs, 38.68825392 secs, 44.53068833 secs, 45....
Warning message: "Removed 1 rows containing missing values (geom_path)."
# compare runtime to derived time
## doesn't look like runtime is accurate and the NA's are when the run wasn't completed
chunk2 %>%
mutate(runtime.derived = end-start) %>%
mutate(diff = runtime.derived-runtime)
attemptID | start | end | runtime | runtime.derived | diff |
---|---|---|---|---|---|
1 | 2017-12-05 19:36:09 | 2017-12-05 19:36:10 | NA secs | 1 secs | NA secs |
2 | 2017-12-05 19:36:12 | 2017-12-05 19:36:13 | 0.77743 secs | 1 secs | 0.222570 secs |
3 | 2017-12-07 18:56:29 | 2017-12-07 19:35:10 | 2321.29524 secs | 2321 secs | -0.295235 secs |
4 | 2018-03-05 19:59:52 | 2018-03-05 19:59:57 | NA secs | 5 secs | NA secs |
5 | 2018-03-05 20:00:04 | 2018-03-05 20:00:06 | NA secs | 2 secs | NA secs |
6 | 2018-03-05 20:00:09 | 2018-03-05 20:00:10 | NA secs | 1 secs | NA secs |
7 | 2018-03-05 20:00:27 | 2018-03-05 20:44:58 | 2671.84130 secs | 2671 secs | -0.841300 secs |
8 | 2018-03-07 21:55:18 | 2018-03-07 22:40:33 | 2715.16860 secs | 2715 secs | -0.168600 secs |
9 | 2018-03-09 18:52:10 | 2018-03-09 18:53:01 | NA secs | 51 secs | NA secs |
10 | 2018-03-21 15:36:11 | 2018-03-21 16:17:16 | 2464.90460 secs | 2465 secs | 0.095400 secs |
11 | 2018-03-30 18:44:45 | 2018-03-30 18:46:22 | NA secs | 97 secs | NA secs |
12 | 2018-03-30 18:47:24 | 2018-03-30 19:29:16 | 2511.51100 secs | 2512 secs | 0.489000 secs |
13 | 2018-04-05 20:12:25 | 2018-04-05 20:54:02 | 2497.49410 secs | 2497 secs | -0.494100 secs |
14 | 2018-04-13 15:43:24 | 2018-04-13 15:43:34 | NA secs | 10 secs | NA secs |
15 | 2018-04-16 21:43:27 | 2018-04-16 22:27:03 | 2615.47180 secs | 2616 secs | 0.528200 secs |
16 | 2018-04-17 21:09:06 | 2018-04-17 21:50:40 | 2493.81660 secs | 2494 secs | 0.183400 secs |
17 | 2018-04-18 14:33:23 | 2018-04-18 15:16:22 | 2579.77310 secs | 2579 secs | -0.773100 secs |
18 | 2018-04-19 12:48:35 | 2018-04-19 12:48:41 | NA secs | 6 secs | NA secs |
19 | 2018-04-19 12:48:43 | 2018-04-19 13:29:23 | 2406.60100 secs | 2440 secs | 33.399001 secs |
20 | 2018-04-19 14:59:37 | 2018-04-19 15:39:13 | 2376.20060 secs | 2376 secs | -0.200600 secs |
chunk2 %>%
mutate(runtime = as.difftime(runtime/60, units="mins")) %>%
filter(!is.na(runtime)) %>%
mutate(attemptID = as.integer(attemptID)) %>%
# filter(runtime > 20) %>%
glimpse() %>%
ggplot() +
geom_line(aes(x=start, y=runtime, group=1)) +
geom_point(aes(x=start, y=runtime, group=1)) +
# annotate("segment", x=-Inf, xend=Inf, y=0, yend=0) +
labs(x="Date", y="Time in minutes", title="Overall run time") +
scale_y_continuous(expand=c(0,0)) + #, limits=c(0,NA)) +
scale_x_datetime(expand=c(0,0)) +
theme_classic() +
theme(
axis.title.y = element_text(hjust=0),
axis.title.x = element_text(hjust=0),
panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
axis.ticks = element_blank()
)
Observations: 12 Variables: 4 $ attemptID <int> 2, 3, 7, 8, 10, 12, 13, 15, 16, 17, 19, 20 $ start <dttm> 2017-12-05 19:36:12, 2017-12-07 18:56:29, 2018-03-05 20:... $ end <dttm> 2017-12-05 19:36:13, 2017-12-07 19:35:10, 2018-03-05 20:... $ runtime <time> 0.01295717 secs, 38.68825392 secs, 44.53068833 secs, 45....
chunk %>%
left_join(chunk2, by="attemptID") %>%
filter(attemptID %in% 7:20) %>% # remove columns that are incomplete
filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers
ggplot(aes(x=start, y=RealTime, group=name, color=name)) +
geom_point() +
geom_smooth() +
facet_wrap(~name) +
labs(x="Date", y="Time in minutes", title="Presentation section times") +
scale_y_continuous(expand=c(0,0)) +
scale_x_datetime(expand=c(0,0)) +
scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) +
theme_classic() +
theme(
axis.title.y = element_text(hjust=0),
axis.title.x = element_text(hjust=0),
panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
axis.ticks = element_blank(),
legend.position="bottom"
)
Warning message: "Column `attemptID` joining factor and character vector, coercing into character vector"`geom_smooth()` using method = 'loess'
chunk %>%
left_join(chunk2, by="attemptID") %>%
filter(attemptID %in% 7:20) %>% # remove columns that are incomplete
filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers
ggplot(aes(x=start, y=RealTime, group=forcats::fct_rev(name), color=name)) +
geom_line(position="stack") +
labs(x="Date", y="Time in minutes", title="Stacked presentation section times") +
scale_y_continuous(expand=c(0,0)) +
scale_x_datetime(expand=c(0,0)) +
scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) +
theme_classic() +
theme(
axis.title.y = element_text(hjust=0),
axis.title.x = element_text(hjust=0),
panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
axis.ticks = element_blank(),
legend.position="bottom"
)
Warning message: "Column `attemptID` joining factor and character vector, coercing into character vector"