In [5]:
library(tidyverse)
Loading tidyverse: ggplot2
Loading tidyverse: tibble
Loading tidyverse: tidyr
Loading tidyverse: readr
Loading tidyverse: purrr
Loading tidyverse: dplyr
Warning message:
"package 'tidyr' was built under R version 3.4.2"Warning message:
"package 'purrr' was built under R version 3.4.2"Warning message:
"package 'dplyr' was built under R version 3.4.2"Conflicts with tidy packages ---------------------------------------------------
filter(): dplyr, stats
lag():    dplyr, stats
In [1]:
splitsfile <- "~/Downloads/RSA 2018 - DDSS - Presentation (5).lss"
In [2]:
splits <- XML::xmlParse(splitsfile)
splits <- XML::xmlToList(splits)
In [97]:
class(splits[['AttemptHistory']][1][['Attempt']][['.attrs']][['ended']])
'character'
In [150]:
chunk2 <- do.call(rbind, lapply(1:length(splits[['AttemptHistory']]), function(i){
    # print(i)
    attempt <- splits[['AttemptHistory']][[i]]
    # print(names(splits[['AttemptHistory']][[i]]))
    if ('RealTime' %in% names(attempt) | 'PauseTime' %in% names(attempt)) {
        df <- tibble(attemptID=attempt[['.attrs']][['id']], 
               start=attempt[['.attrs']][['started']], 
               end=attempt[['.attrs']][['ended']])
    } else {
        df <- tibble(attemptID=attempt[['id']], 
               start=attempt[['started']], 
               end=attempt[['ended']])
    }
    if ('RealTime' %in% names(attempt)) {
        df$runtime <- attempt[['RealTime']]
    } else {
        df$runtime <- NA
    }
    df
}))

chunk2$runtime <- unlist(lapply(chunk2$runtime, function(t) { 
    dt <- unlist(stringr::str_split(t, ":"))
    as.difftime(as.double(dt[1])*60*60 + as.double(dt[2])*60 + as.double(dt[3]), units="secs")
})) %>% as.difftime(units="secs")
chunk2$start <- lubridate::mdy_hms(chunk2$start)
chunk2$end <- lubridate::mdy_hms(chunk2$end)
In [6]:
chunk <- do.call(rbind, lapply(splits[['Segments']], function(segments) {
    
    segments.df <- do.call(rbind, lapply(segments[['SegmentHistory']], function(segment) {
        if ('RealTime' %in% names(segment))
        data.frame(`attemptID` = segment$.attrs['id'], 
                   RealTime = segment$RealTime,
                   time = segment$S)
    }))
    segments.df$name <- rep(segments$Name, nrow(segments.df))
    segments.df
}))

chunk$RealTime <- unlist(lapply(chunk$RealTime, function(t) { 
    dt <- unlist(stringr::str_split(t, ":"))
    as.difftime(as.double(dt[1])*60*60 + as.double(dt[2])*60 + as.double(dt[3]), units="secs")
})) %>% as.difftime(units="mins")
    
chunk$name <- factor(chunk$name, levels=unique(chunk$name))
    
chunk <- chunk %>%
    group_by(name) %>%
    mutate(n=n()) %>%
    ungroup() 
 
labels <- unique(paste0(chunk$name, " - ",  chunk$n, " attempts"))
names(labels) <- unique(chunk$name)
units(chunk$RealTime) <- "mins"
In [146]:

Error in as.difftime(chunk2$runtime): need explicit units for numeric conversion
Traceback:

1. as.difftime(chunk2$runtime)
2. stop("need explicit units for numeric conversion")
In [148]:
glimpse(chunk)
glimpse(chunk2)
Observations: 81
Variables: 4
$ attemptID <fctr> 4, 5, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, -...
$ RealTime  <time> 0.075366667 mins, 0.006003333 mins, 8.620370000 mins, 7....
$ name      <fctr> Introduction VMOS and Swot, Introduction VMOS and Swot, ...
$ n         <int> 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 1...
Observations: 20
Variables: 4
$ attemptID <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", ...
$ start     <dttm> 2017-12-05 19:36:09, 2017-12-05 19:36:12, 2017-12-07 18:...
$ end       <dttm> 2017-12-05 19:36:10, 2017-12-05 19:36:13, 2017-12-07 19:...
$ runtime   <time> NA mins, 0.77743 mins, 2321.29524 mins, NA mins, NA mins...
In [66]:
ggplot(chunk) +
    geom_line(aes(x=attemptID, y=RealTime, group=name, color=name)) +
    labs(x="Run", y="Time in minutes", title="Presentation section times") +
    scale_y_continuous(expand=c(0,0)) +
    scale_x_discrete(expand=c(0,0)) +
    scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) +
    theme_classic() +
    theme(
       axis.title.y = element_text(hjust=0),
       axis.title.x = element_text(hjust=0),
       panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
       panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
       axis.ticks = element_blank(),
       legend.position="bottom"
    )
In [69]:
chunk %>%
    filter(attemptID %in% 7:20) %>% # remove columns that are incomplete
    filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers
  ggplot(aes(x=attemptID, y=RealTime, group=name, color=name)) +
    geom_point() +
    geom_smooth() +
    facet_wrap(~name) +
    labs(x="Run", y="Time in minutes", title="Presentation section times") +
    scale_y_continuous(expand=c(0,0)) +
    scale_x_discrete(expand=c(0,0)) +
    scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) +
    theme_classic() +
    theme(
       axis.title.y = element_text(hjust=0),
       axis.title.x = element_text(hjust=0),
       panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
       panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
       axis.ticks = element_blank(),
       legend.position="bottom"
    )
`geom_smooth()` using method = 'loess'
In [70]:
chunk %>%
    filter(attemptID %in% 7:20) %>% # remove columns that are incomplete
    filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers
  ggplot(aes(x=attemptID, y=RealTime, group=forcats::fct_rev(name), color=name)) +
    geom_line(position="stack") +
    labs(x="Run", y="Time in minutes", title="Stacked presentation section times") +
    scale_y_continuous(expand=c(0,0)) +
    scale_x_discrete(expand=c(0,0)) +
    scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) +
    theme_classic() +
    theme(
       axis.title.y = element_text(hjust=0),
       axis.title.x = element_text(hjust=0),
       panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
       panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
       axis.ticks = element_blank(),
       legend.position="bottom"
    )
In [162]:
chunk2 %>%
    mutate(runtime = as.difftime(runtime/60, units="mins")) %>%
    filter(!is.na(runtime)) %>%
    mutate(attemptID = as.integer(attemptID)) %>%
    # filter(runtime > 20) %>%
    glimpse() %>%
  ggplot() +
    geom_line(aes(x=attemptID, y=runtime, group=1)) +
    # annotate("segment", x=-Inf, xend=Inf, y=0, yend=0) +
    labs(x="Run", y="Time in minutes", title="Overall run time") +
    scale_y_continuous(expand=c(0,0)) + #, limits=c(0,NA)) +
    scale_x_continuous(expand=c(0,0), limits=c(3,NA)) +
    theme_classic() +
    theme(
       axis.title.y = element_text(hjust=0),
       axis.title.x = element_text(hjust=0),
       panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
       panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
       axis.ticks = element_blank()
    )
Observations: 12
Variables: 4
$ attemptID <int> 2, 3, 7, 8, 10, 12, 13, 15, 16, 17, 19, 20
$ start     <dttm> 2017-12-05 19:36:12, 2017-12-07 18:56:29, 2018-03-05 20:...
$ end       <dttm> 2017-12-05 19:36:13, 2017-12-07 19:35:10, 2018-03-05 20:...
$ runtime   <time> 0.01295717 secs, 38.68825392 secs, 44.53068833 secs, 45....
Warning message:
"Removed 1 rows containing missing values (geom_path)."
In [151]:
# compare runtime to derived time
## doesn't look like runtime is accurate and the NA's are when the run wasn't completed
chunk2 %>%
    mutate(runtime.derived = end-start) %>%
    mutate(diff = runtime.derived-runtime)
attemptIDstartendruntimeruntime.deriveddiff
1 2017-12-05 19:36:092017-12-05 19:36:10 NA secs 1 secs NA secs
2 2017-12-05 19:36:122017-12-05 19:36:13 0.77743 secs 1 secs 0.222570 secs
3 2017-12-07 18:56:292017-12-07 19:35:102321.29524 secs 2321 secs -0.295235 secs
4 2018-03-05 19:59:522018-03-05 19:59:57 NA secs 5 secs NA secs
5 2018-03-05 20:00:042018-03-05 20:00:06 NA secs 2 secs NA secs
6 2018-03-05 20:00:092018-03-05 20:00:10 NA secs 1 secs NA secs
7 2018-03-05 20:00:272018-03-05 20:44:582671.84130 secs 2671 secs -0.841300 secs
8 2018-03-07 21:55:182018-03-07 22:40:332715.16860 secs 2715 secs -0.168600 secs
9 2018-03-09 18:52:102018-03-09 18:53:01 NA secs 51 secs NA secs
10 2018-03-21 15:36:112018-03-21 16:17:162464.90460 secs 2465 secs 0.095400 secs
11 2018-03-30 18:44:452018-03-30 18:46:22 NA secs 97 secs NA secs
12 2018-03-30 18:47:242018-03-30 19:29:162511.51100 secs 2512 secs 0.489000 secs
13 2018-04-05 20:12:252018-04-05 20:54:022497.49410 secs 2497 secs -0.494100 secs
14 2018-04-13 15:43:242018-04-13 15:43:34 NA secs 10 secs NA secs
15 2018-04-16 21:43:272018-04-16 22:27:032615.47180 secs 2616 secs 0.528200 secs
16 2018-04-17 21:09:062018-04-17 21:50:402493.81660 secs 2494 secs 0.183400 secs
17 2018-04-18 14:33:232018-04-18 15:16:222579.77310 secs 2579 secs -0.773100 secs
18 2018-04-19 12:48:352018-04-19 12:48:41 NA secs 6 secs NA secs
19 2018-04-19 12:48:432018-04-19 13:29:232406.60100 secs 2440 secs 33.399001 secs
20 2018-04-19 14:59:372018-04-19 15:39:132376.20060 secs 2376 secs -0.200600 secs
In [174]:
chunk2 %>%
    mutate(runtime = as.difftime(runtime/60, units="mins")) %>%
    filter(!is.na(runtime)) %>%
    mutate(attemptID = as.integer(attemptID)) %>%
    # filter(runtime > 20) %>%
    glimpse() %>%
  ggplot() +
    geom_line(aes(x=start, y=runtime, group=1)) +
    geom_point(aes(x=start, y=runtime, group=1)) +
    # annotate("segment", x=-Inf, xend=Inf, y=0, yend=0) +
    labs(x="Date", y="Time in minutes", title="Overall run time") +
    scale_y_continuous(expand=c(0,0)) + #, limits=c(0,NA)) +
    scale_x_datetime(expand=c(0,0)) +
    theme_classic() +
    theme(
       axis.title.y = element_text(hjust=0),
       axis.title.x = element_text(hjust=0),
       panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
       panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
       axis.ticks = element_blank()
    )
Observations: 12
Variables: 4
$ attemptID <int> 2, 3, 7, 8, 10, 12, 13, 15, 16, 17, 19, 20
$ start     <dttm> 2017-12-05 19:36:12, 2017-12-07 18:56:29, 2018-03-05 20:...
$ end       <dttm> 2017-12-05 19:36:13, 2017-12-07 19:35:10, 2018-03-05 20:...
$ runtime   <time> 0.01295717 secs, 38.68825392 secs, 44.53068833 secs, 45....
In [180]:
chunk %>%
    left_join(chunk2, by="attemptID") %>%
    filter(attemptID %in% 7:20) %>% # remove columns that are incomplete
    filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers
  ggplot(aes(x=start, y=RealTime, group=name, color=name)) +
    geom_point() +
    geom_smooth() +
    facet_wrap(~name) +
    labs(x="Date", y="Time in minutes", title="Presentation section times") +
    scale_y_continuous(expand=c(0,0)) +
    scale_x_datetime(expand=c(0,0)) +
    scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) +
    theme_classic() +
    theme(
       axis.title.y = element_text(hjust=0),
       axis.title.x = element_text(hjust=0),
       panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
       panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
       axis.ticks = element_blank(),
       legend.position="bottom"
    )
Warning message:
"Column `attemptID` joining factor and character vector, coercing into character vector"`geom_smooth()` using method = 'loess'
In [177]:
chunk %>%
    left_join(chunk2, by="attemptID") %>%
    filter(attemptID %in% 7:20) %>% # remove columns that are incomplete
    filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers
  ggplot(aes(x=start, y=RealTime, group=forcats::fct_rev(name), color=name)) +
    geom_line(position="stack") +
    labs(x="Date", y="Time in minutes", title="Stacked presentation section times") +
    scale_y_continuous(expand=c(0,0)) +
    scale_x_datetime(expand=c(0,0)) +
    scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) +
    theme_classic() +
    theme(
       axis.title.y = element_text(hjust=0),
       axis.title.x = element_text(hjust=0),
       panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
       panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
       axis.ticks = element_blank(),
       legend.position="bottom"
    )
Warning message:
"Column `attemptID` joining factor and character vector, coercing into character vector"