library(tidyverse) splitsfile <- "~/Downloads/RSA 2018 - DDSS - Presentation (5).lss" splits <- XML::xmlParse(splitsfile) splits <- XML::xmlToList(splits) class(splits[['AttemptHistory']][1][['Attempt']][['.attrs']][['ended']]) chunk2 <- do.call(rbind, lapply(1:length(splits[['AttemptHistory']]), function(i){ # print(i) attempt <- splits[['AttemptHistory']][[i]] # print(names(splits[['AttemptHistory']][[i]])) if ('RealTime' %in% names(attempt) | 'PauseTime' %in% names(attempt)) { df <- tibble(attemptID=attempt[['.attrs']][['id']], start=attempt[['.attrs']][['started']], end=attempt[['.attrs']][['ended']]) } else { df <- tibble(attemptID=attempt[['id']], start=attempt[['started']], end=attempt[['ended']]) } if ('RealTime' %in% names(attempt)) { df$runtime <- attempt[['RealTime']] } else { df$runtime <- NA } df })) chunk2$runtime <- unlist(lapply(chunk2$runtime, function(t) { dt <- unlist(stringr::str_split(t, ":")) as.difftime(as.double(dt[1])*60*60 + as.double(dt[2])*60 + as.double(dt[3]), units="secs") })) %>% as.difftime(units="secs") chunk2$start <- lubridate::mdy_hms(chunk2$start) chunk2$end <- lubridate::mdy_hms(chunk2$end) chunk <- do.call(rbind, lapply(splits[['Segments']], function(segments) { segments.df <- do.call(rbind, lapply(segments[['SegmentHistory']], function(segment) { if ('RealTime' %in% names(segment)) data.frame(`attemptID` = segment$.attrs['id'], RealTime = segment$RealTime, time = segment$S) })) segments.df$name <- rep(segments$Name, nrow(segments.df)) segments.df })) chunk$RealTime <- unlist(lapply(chunk$RealTime, function(t) { dt <- unlist(stringr::str_split(t, ":")) as.difftime(as.double(dt[1])*60*60 + as.double(dt[2])*60 + as.double(dt[3]), units="secs") })) %>% as.difftime(units="mins") chunk$name <- factor(chunk$name, levels=unique(chunk$name)) chunk <- chunk %>% group_by(name) %>% mutate(n=n()) %>% ungroup() labels <- unique(paste0(chunk$name, " - ", chunk$n, " attempts")) names(labels) <- unique(chunk$name) units(chunk$RealTime) <- "mins" glimpse(chunk) glimpse(chunk2) ggplot(chunk) + geom_line(aes(x=attemptID, y=RealTime, group=name, color=name)) + labs(x="Run", y="Time in minutes", title="Presentation section times") + scale_y_continuous(expand=c(0,0)) + scale_x_discrete(expand=c(0,0)) + scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) + theme_classic() + theme( axis.title.y = element_text(hjust=0), axis.title.x = element_text(hjust=0), panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"), panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"), axis.ticks = element_blank(), legend.position="bottom" ) chunk %>% filter(attemptID %in% 7:20) %>% # remove columns that are incomplete filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers ggplot(aes(x=attemptID, y=RealTime, group=name, color=name)) + geom_point() + geom_smooth() + facet_wrap(~name) + labs(x="Run", y="Time in minutes", title="Presentation section times") + scale_y_continuous(expand=c(0,0)) + scale_x_discrete(expand=c(0,0)) + scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) + theme_classic() + theme( axis.title.y = element_text(hjust=0), axis.title.x = element_text(hjust=0), panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"), panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"), axis.ticks = element_blank(), legend.position="bottom" ) chunk %>% filter(attemptID %in% 7:20) %>% # remove columns that are incomplete filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers ggplot(aes(x=attemptID, y=RealTime, group=forcats::fct_rev(name), color=name)) + geom_line(position="stack") + labs(x="Run", y="Time in minutes", title="Stacked presentation section times") + scale_y_continuous(expand=c(0,0)) + scale_x_discrete(expand=c(0,0)) + scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) + theme_classic() + theme( axis.title.y = element_text(hjust=0), axis.title.x = element_text(hjust=0), panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"), panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"), axis.ticks = element_blank(), legend.position="bottom" ) chunk2 %>% mutate(runtime = as.difftime(runtime/60, units="mins")) %>% filter(!is.na(runtime)) %>% mutate(attemptID = as.integer(attemptID)) %>% # filter(runtime > 20) %>% glimpse() %>% ggplot() + geom_line(aes(x=attemptID, y=runtime, group=1)) + # annotate("segment", x=-Inf, xend=Inf, y=0, yend=0) + labs(x="Run", y="Time in minutes", title="Overall run time") + scale_y_continuous(expand=c(0,0)) + #, limits=c(0,NA)) + scale_x_continuous(expand=c(0,0), limits=c(3,NA)) + theme_classic() + theme( axis.title.y = element_text(hjust=0), axis.title.x = element_text(hjust=0), panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"), panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"), axis.ticks = element_blank() ) # compare runtime to derived time ## doesn't look like runtime is accurate and the NA's are when the run wasn't completed chunk2 %>% mutate(runtime.derived = end-start) %>% mutate(diff = runtime.derived-runtime) chunk2 %>% mutate(runtime = as.difftime(runtime/60, units="mins")) %>% filter(!is.na(runtime)) %>% mutate(attemptID = as.integer(attemptID)) %>% # filter(runtime > 20) %>% glimpse() %>% ggplot() + geom_line(aes(x=start, y=runtime, group=1)) + geom_point(aes(x=start, y=runtime, group=1)) + # annotate("segment", x=-Inf, xend=Inf, y=0, yend=0) + labs(x="Date", y="Time in minutes", title="Overall run time") + scale_y_continuous(expand=c(0,0)) + #, limits=c(0,NA)) + scale_x_datetime(expand=c(0,0)) + theme_classic() + theme( axis.title.y = element_text(hjust=0), axis.title.x = element_text(hjust=0), panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"), panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"), axis.ticks = element_blank() ) chunk %>% left_join(chunk2, by="attemptID") %>% filter(attemptID %in% 7:20) %>% # remove columns that are incomplete filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers ggplot(aes(x=start, y=RealTime, group=name, color=name)) + geom_point() + geom_smooth() + facet_wrap(~name) + labs(x="Date", y="Time in minutes", title="Presentation section times") + scale_y_continuous(expand=c(0,0)) + scale_x_datetime(expand=c(0,0)) + scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) + theme_classic() + theme( axis.title.y = element_text(hjust=0), axis.title.x = element_text(hjust=0), panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"), panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"), axis.ticks = element_blank(), legend.position="bottom" ) chunk %>% left_join(chunk2, by="attemptID") %>% filter(attemptID %in% 7:20) %>% # remove columns that are incomplete filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers ggplot(aes(x=start, y=RealTime, group=forcats::fct_rev(name), color=name)) + geom_line(position="stack") + labs(x="Date", y="Time in minutes", title="Stacked presentation section times") + scale_y_continuous(expand=c(0,0)) + scale_x_datetime(expand=c(0,0)) + scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) + theme_classic() + theme( axis.title.y = element_text(hjust=0), axis.title.x = element_text(hjust=0), panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"), panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"), axis.ticks = element_blank(), legend.position="bottom" )