library(IRdisplay) display_html( '
' ) shhh <- function(expr) suppressPackageStartupMessages(suppressWarnings(suppressMessages(expr))) shhh({ library(tidyverse); library(glue); library(lubridate); library(scales) }) options(repr.plot.width = 15, repr.plot.height = 7) #deployment annotations to use in charts vertical_lines <- as.numeric(as.Date(c("2020-07-22", "2020-07-28", "2020-08-05"))) #collect sidebar clicks by edit count, sidebar state and wiki query <- "SELECT date_format(dt, 'yyyy-MM-dd') AS date, event.token AS session, wiki AS wiki, event.isAnon AS logged_in_status, event.isSidebarCollapsed AS sidebar_state, event.editCountBucket AS user_edit_count, COUNT(*) as events FROM event.desktopwebuiactionstracking WHERE -- review clicks to the sidebar event.name = 'ui.sidebar' AND event.action = 'click' AND year = 2020 AND ((month=07 AND day >= 22) OR month= 08 ) -- sidebar is collapsible only on new vector skin AND event.skinversion = 2 AND wiki <> 'testwiki' AND useragent.is_bot = false GROUP BY date_format(dt, 'yyyy-MM-dd'), event.token, event.isAnon, event.isSidebarCollapsed, wiki, event.editCountBucket " sidebar_clicks <- wmfdata::query_hive(query) sidebar_clicks$date <- as.Date(sidebar_clicks$date, format = "%Y-%m-%d") sidebar_clicks$sidebar_state <- ifelse(sidebar_clicks$sidebar_state == 'false', "uncollapse", "collapse") sidebar_clicks$logged_in_status <- ifelse(sidebar_clicks$logged_in_status == 'false', "logged-in", "logged-out") # Number of collapse events vs uncollapse events p <- sidebar_clicks %>% group_by(date, sidebar_state) %>% summarise(total_events = sum(events)) %>% ggplot(aes(x=date, y= total_events, color = sidebar_state)) + geom_line(size = 1.5) + geom_vline(xintercept = vertical_lines, linetype = "dashed", color = "black") + geom_text(aes(x=as.Date('2020-07-22'), y=1E3, label="New skin deployed on Basque Wiki, Fr wiktionary, Pt wikiversity"), size=3.7, vjust = -1.2, angle = 90, color = "black") + geom_text(aes(x=as.Date('2020-07-28'), y=1E3, label="New skin deployed on Persian and Hebrew Wikipedias"), size=3.7, vjust = -1.2, angle = 90, color = "black") + geom_text(aes(x=as.Date('2020-08-05'), y=1E3, label="New skin deployed on French Wikipedia"), size=3.7, vjust = -1.2, angle = 90, color = "black") + scale_x_date("Date", labels = date_format("%d %b %Y"), date_breaks = "1 week") + scale_y_continuous("Number of clicks per day") + labs (title = "Daily sidebar clicks by sidebar state") + theme_bw() + theme( plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=0.5), text = element_text(size=18)) p ggsave("Figures/daily_sidebar_clicks_overall.png", p, width = 16, height = 8, units = "in", dpi = 300) count_events_overall <- sidebar_clicks %>% summarize(num_events = sum(events), num_sessions = n_distinct(session), avg_clicks = num_events/num_sessions) count_events_overall count_events_bysidebarstatus <- sidebar_clicks %>% group_by(sidebar_state) %>% summarize(num_events = sum(events), num_sessions = n_distinct(session), avg_clicks = num_events/num_sessions) count_events_bysidebarstatus #Chart overall sidebar clicks over time by wiki p <- sidebar_clicks %>% group_by(date, wiki) %>% summarise(total_clicks = sum(events), avg_clicks = sum(events)/n_distinct(session)) %>% ggplot(aes(x=date, y= total_clicks, color = wiki)) + geom_line(size = 1.5) + geom_vline(xintercept = vertical_lines, linetype = "dashed", color = "black") + geom_text(aes(x=as.Date('2020-07-22'), y=2.5, label="New skin deployed on Basque Wiki, French wiktionary, Portuguese wikiversity"), size=3.7, vjust = -1.2, angle = 90, color = "black") + geom_text(aes(x=as.Date('2020-07-28'), y=2.5, label="New skin deployed on Persian and Hebrew Wikipedias"), size=3.7, vjust = -1.2, angle = 90, color = "black") + geom_text(aes(x=as.Date('2020-08-05'), y=2.5, label="New skin deployed on French Wikipedia"), size=3.7, vjust = -1.2, angle = 90, color = "black") + scale_x_date("Date", labels = date_format("%b %Y"), date_breaks = "1 week") + scale_y_continuous("Number of clicks per day") + labs (title = "Daily sidebar clicks by wiki") + theme_bw() + theme( plot.title = element_text(hjust = 0.5),, text = element_text(size=18)) p ggsave("Figures/daily_sidebar_clicks_bywiki.png", p, width = 16, height = 8, units = "in", dpi = 300) # average clicks per wiki sidebar_clicks_bywiki <- sidebar_clicks %>% group_by(wiki) %>% summarize(total_events = sum(events), unique_sessions = n_distinct(session), avg_events_persession = total_events/unique_sessions) sidebar_clicks_bywiki p <- sidebar_clicks %>% group_by(date, wiki, sidebar_state) %>% summarise(total_events = sum(events)) %>% ggplot(aes(x = date, y= total_events, color = sidebar_state)) + geom_line(size = 1.5) + facet_wrap(~wiki, scales = "free") + scale_x_date("Date", labels = date_format("%b %Y"), date_breaks = "1 week") + scale_y_continuous("Number of clicks per day") + labs (title = "Daily sidebar clicks by wiki and sidebar state") + theme_bw() + theme( plot.title = element_text(hjust = 0.5),, text = element_text(size=18), axis.text.x = element_text(angle = 45, hjust = 1)) p ggsave("Figures/daily_sidebar_clicks_wiki_sidebarstate.png", p, width = 16, height = 8, units = "in", dpi = 300) p <- sidebar_clicks %>% group_by(wiki, sidebar_state) %>% summarise(total_events = sum(events)) %>% ggplot(aes(x = sidebar_state, y= total_events, fill = sidebar_state)) + geom_bar(stat = 'identity') + facet_wrap(~wiki, scales = "free") + scale_y_continuous("Total number of clicks") + labs (title = "Total sidebar clicks by wiki and sidebar state") + theme_bw() + theme( plot.title = element_text(hjust = 0.5),, text = element_text(size=18), axis.text.x = element_text(angle = 45, hjust = 1)) p ggsave("Figures/total_sidebar_clicks_bywiki.png", p, width = 16, height = 8, units = "in", dpi = 300) # average clicks per wiki sidebar_clicks_bywiki_sidebarstatus <- sidebar_clicks %>% group_by(wiki, sidebar_state) %>% summarize(total_events = sum(events), unique_sessions = n_distinct(session), avg_events_persession = total_events/unique_sessions) sidebar_clicks_bywiki_sidebarstatus p <- sidebar_clicks %>% # remove anonymous users filter(logged_in_status == 'logged-in') %>% group_by(date, user_edit_count) %>% summarise(total_events = sum(events)) %>% ggplot(aes(x=date, y= total_events, color = user_edit_count)) + geom_line(size = 1.5) + scale_y_continuous("Number of clicks per day") + scale_x_date("Date", labels = date_format("%d %b %Y"), date_breaks = "1 week") + labs (title = "Daily sidebar clicks by user edit count") + theme_bw() + theme( plot.title = element_text(hjust = 0.5), text = element_text(size=18), axis.text.x = element_text(angle = 45, hjust = 1)) p ggsave("Figures/daily_sidebar_clicks_byusereditcount.png", p, width = 16, height = 8, units = "in", dpi = 300) # average clicks per editcount sidebar_clicks_byeditcount <- sidebar_clicks %>% # remove anonymous users filter(logged_in_status == 'logged-in') %>% group_by(user_edit_count) %>% summarize(total_events = sum(events), unique_sessions = n_distinct(session), avg_events_persession = total_events/unique_sessions) sidebar_clicks_byeditcount p <- sidebar_clicks %>% # remove anonymous users filter(logged_in_status == 'logged-in') %>% group_by(date, user_edit_count, sidebar_state) %>% summarise(total_events = sum(events)) %>% ggplot(aes(x = date, y= total_events, color = sidebar_state)) + geom_line(size = 1.5) + facet_wrap(~user_edit_count, scales = "free") + scale_x_date("Date", labels = date_format("%b %Y"), date_breaks = "1 week") + scale_y_continuous("Number of clicks per day") + labs (title = "Daily sidebar clicks by user edit count and sidebar state") + theme_bw() + theme( plot.title = element_text(hjust = 0.5),, text = element_text(size=18), axis.text.x = element_text(angle = 45, hjust = 1)) p ggsave("Figures/daily_sidebar_clicks_byusereditcount_sidebarstate.png", p, width = 16, height = 8, units = "in", dpi = 300) p <- sidebar_clicks %>% # remove logged in users filter(logged_in_status == 'logged-in') %>% group_by(user_edit_count, sidebar_state) %>% summarise(total_events = sum(events)) %>% ggplot(aes(x=sidebar_state, y= total_events, fill = sidebar_state)) + geom_col() + facet_wrap(~user_edit_count, scales = 'free') + scale_y_continuous("Total number of clicks") + labs (title = "Total sidebar clicks by user edit count") + theme_bw() + theme( plot.title = element_text(hjust = 0.5),, text = element_text(size=18), axis.text.x = element_text(angle = 45, hjust = 1)) p ggsave("Figures/total_sidebar_clicks_byusereditcount_sidebarstate.png", p, width = 16, height = 8, units = "in", dpi = 300) # average clicks per editcount sidebar_clicks_byeditcount <- sidebar_clicks %>% # remove logged in users filter(logged_in_status == 'logged-in') %>% group_by(user_edit_count, sidebar_state) %>% summarize(total_events = sum(events), unique_sessions = n_distinct(session), avg_events = total_events/unique_sessions) sidebar_clicks_byeditcount #Chart overall sidebar clicks over time by logged in vs logged out p <- sidebar_clicks %>% group_by(date, logged_in_status) %>% summarise(total_clicks = sum(events), avg_clicks = sum(events)/n_distinct(session)) %>% ggplot(aes(x=date, y= total_clicks, color = logged_in_status)) + geom_line(size = 1.5) + geom_vline(xintercept = vertical_lines, linetype = "dashed", color = "black") + geom_text(aes(x=as.Date('2020-07-22'), y=1.5E3, label="New skin deployed on Basque Wiki, Fr wiktionary, Pt wikiversity"), size=3.7, vjust = -1.2, angle = 90, color = "black") + geom_text(aes(x=as.Date('2020-07-28'), y=1.5E3, label="New skin deployed on Persian and Hebrew Wikipedias"), size=3.7, vjust = -1.2, angle = 90, color = "black") + geom_text(aes(x=as.Date('2020-08-05'), y=1.5E3, label="New skin deployed on French Wikipedia"), size=3.7, vjust = -1.2, angle = 90, color = "black") + scale_x_date("Date", labels = date_format("%b %Y"), date_breaks = "1 week") + scale_y_continuous("Number of clicks per day") + labs (title = "Daily sidebar clicks by logged in vs anonymous") + theme_bw() + theme( plot.title = element_text(hjust = 0.5),, text = element_text(size=18)) p ggsave("Figures/daily_sidebar_clicks_byloggedin.png", p, width = 16, height = 8, units = "in", dpi = 300) # average clicks per logged in status sidebar_clicks_byanon <- sidebar_clicks %>% group_by(logged_in_status) %>% summarize(total_events = sum(events), unique_sessions = n_distinct(session), avg_events = total_events/unique_sessions) sidebar_clicks_byanon p <- sidebar_clicks %>% group_by(date, logged_in_status, sidebar_state) %>% summarise(total_events = sum(events)) %>% ggplot(aes(x = date, y= total_events, color = sidebar_state)) + geom_line(size = 1.5) + facet_wrap(~logged_in_status, scales = "free") + scale_x_date("Date", labels = date_format("%b %Y"), date_breaks = "1 week") + scale_y_continuous("Number of clicks per day") + labs (title = "Daily sidebar clicks by logged in status and sidebar state") + theme_bw() + theme( plot.title = element_text(hjust = 0.5),, text = element_text(size=18), axis.text.x = element_text(angle = 45, hjust = 1)) p ggsave("Figures/daily_sidebar_clicks_byloggedin_sidebarstate.png", p, width = 16, height = 8, units = "in", dpi = 300) p <- sidebar_clicks %>% group_by(logged_in_status, sidebar_state) %>% summarise(total_events = sum(events)) %>% ggplot(aes(x = sidebar_state, y= total_events, fill = sidebar_state)) + geom_bar(stat = 'identity') + facet_wrap(~logged_in_status, scales = "free") + scale_y_continuous("Total number of clicks") + labs (title = "Total sidebar clicks by wiki") + theme_bw() + theme( plot.title = element_text(hjust = 0.5),, text = element_text(size=18), axis.text.x = element_text(angle = 45, hjust = 1)) p ggsave("Figures/total_sidebar_clicks_byloggedin_sidebarstate.png", p, width = 16, height = 8, units = "in", dpi = 300) # Chart histogram showing frequency of clicks per session p <- sidebar_clicks %>% group_by(session)%>% summarize(sidebar_clicks = sum(events)) %>% # removing sessions with over 20 clicks for visibility filter(sidebar_clicks < 20) %>% ggplot(aes(x= sidebar_clicks)) + geom_histogram(binwidth = 1, fill = 'turquoise3') + scale_y_continuous("Frequency") + scale_x_continuous("Number of sidebar clicks per session", breaks = seq(1,20,1)) + labs(title = "Frequency of clicks to collapse or uncollapse per session") + theme_bw() + theme( plot.title = element_text(hjust = 0.5), text = element_text(size=18)) p ggsave("Figures/sidebar_click_frequency.png", p, width = 16, height = 8, units = "in", dpi = 300) # Chart histogram showing frequency of clicks per session p <- sidebar_clicks %>% group_by(session, wiki)%>% summarize(sidebar_clicks = sum(events)) %>% # removing sessions with over 20 clicks for visibility filter(sidebar_clicks < 20) %>% ggplot(aes(x= sidebar_clicks)) + geom_histogram(binwidth = 1, fill = 'turquoise3') + facet_wrap(~wiki, scales = 'free') + scale_y_continuous("Frequency") + scale_x_continuous("Number of sidebar clicks per session", breaks = seq(1,20,2)) + labs(title = "Frequency of clicks to collapse or uncollapse per session by wiki") + theme_bw() + theme( plot.title = element_text(hjust = 0.5), text = element_text(size=18)) p ggsave("Figures/sidebar_click_frequency_bywiki.png", p, width = 16, height = 8, units = "in", dpi = 300) # Chart histogram showing frequency of clicks per session p <- sidebar_clicks %>% group_by(session, user_edit_count)%>% summarize(sidebar_clicks = sum(events)) %>% # removing sessions with over 20 clicks for visibility filter(sidebar_clicks < 20) %>% ggplot(aes(x= sidebar_clicks)) + geom_histogram(binwidth = 1, fill = 'turquoise3') + facet_wrap(~user_edit_count, scales = 'free') + scale_y_continuous("Frequency") + scale_x_continuous("Number of sidebar clicks per session", breaks = seq(1,20,2)) + labs(title = "Frequency of clicks to collapse or uncollapse per session by user edit count") + theme_bw() + theme( plot.title = element_text(hjust = 0.5), text = element_text(size=18)) p ggsave("Figures/sidebar_click_frequency_byeditcount.png", p, width = 16, height = 8, units = "in", dpi = 300) # Chart histogram showing frequency of clicks per session p <- sidebar_clicks %>% group_by(session, logged_in_status)%>% summarize(sidebar_clicks = sum(events)) %>% # removing sessions with over 20 clicks for visibility filter(sidebar_clicks < 20) %>% ggplot(aes(x= sidebar_clicks)) + geom_histogram(binwidth = 1, fill = 'turquoise3') + facet_wrap(~logged_in_status, scales = 'free') + scale_y_continuous("Frequency") + scale_x_continuous("Number of sidebar clicks per session", breaks = seq(1,20,2)) + labs(title = "Frequency of clicks to collapse or uncollapse the sidebar per session") + theme_bw() + theme( plot.title = element_text(hjust = 0.5), text = element_text(size=18)) p ggsave("Figures/sidebar_click_frequency_byloggedout.png", p, width = 16, height = 8, units = "in", dpi = 300) ## Count types of click events by session, wiki and edit count query <- "SELECT event.token AS session, wiki AS wiki, event.isAnon AS logged_in_status, event.editCountBucket AS user_edit_count, SUM(CAST(event.action = 'click' and event.name = 'ui.sidebar' AS INT)) as sidebar_click, SUM(CAST(event.action = 'click' AS INT)) as click_event, COUNT(*) all_events FROM event.desktopwebuiactionstracking WHERE year = 2020 AND ((month=07 AND day >= 22) OR month= 08 ) -- sidebar is collapsible only on new vector skin AND event.skinversion = 2 AND wiki <> 'testwiki' AND useragent.is_bot = false GROUP BY event.token, wiki, event.isAnon, event.editCountBucket " all_clicks <- wmfdata::query_hive(query) all_clicks$logged_in_status <- ifelse(all_clicks$logged_in_status == 'false', "logged-in", "logged-out") #Counts by session count_clicks_bysession <- all_clicks %>% group_by(session) %>% summarize(sidebar_click = sum(sidebar_click), click_event = sum(click_event), all_events = sum(all_events)) head(count_clicks_bysession) # Percent of all sessions with at least 1 click to the sidebar sidebar_session_prop <- count_clicks_bysession %>% mutate(w_sidebar_click = ifelse(sidebar_click >= 1, 'true', 'false'), w_click = ifelse(click_event >= 1, 'true', 'false')) %>% summarize(num_sidebar_sessions = sum(w_sidebar_click == 'true'), num_allclick_sessions = sum(w_click == 'true'), num_all_sessions = n(), pct_click_sessions = num_sidebar_sessions/num_allclick_sessions * 100, pct_all_sessions = num_sidebar_sessions/num_all_sessions * 100) head(sidebar_session_prop) #Counts by session and wiki count_clicks_bysessionwiki <- all_clicks %>% group_by(session, wiki) %>% summarize(sidebar_click = sum(sidebar_click), click_event = sum(click_event), all_events = sum(all_events)) # Percent of all sessions with at least 1 click to the sidebar by wiki sidebar_session_prop_bywiki <- count_clicks_bysessionwiki %>% mutate(w_sidebar_click = ifelse(sidebar_click >= 1, 'true', 'false'), w_click = ifelse(click_event >= 1, 'true', 'false')) %>% group_by(wiki) %>% summarize(num_sessions_wsidebarclick = sum(w_sidebar_click == 'true'), num_sessions_wclick = sum(w_click == 'true'), num_all_sessions = n(), pct_sidebarclick_of_allclick_sessions = num_sessions_wsidebarclick /num_sessions_wclick * 100, pct_sidebarclick_of_all_sessions = num_sessions_wsidebarclick/num_all_sessions * 100) sidebar_session_prop_bywiki #Counts by session and user edit count count_clicks_bysession_editcount <- all_clicks %>% # remove logged out users filter(logged_in_status == 'logged-in') %>% group_by(session, user_edit_count) %>% summarize(sidebar_click = sum(sidebar_click), click_event = sum(click_event), all_events = sum(all_events)) # Percent of all sessions with at least 1 click to the sidebar by user edit count sidebar_session_prop_byeditcount <- count_clicks_bysession_editcount %>% mutate(w_sidebar_click = ifelse(sidebar_click >= 1, 'true', 'false'), w_click = ifelse(click_event >= 1, 'true', 'false')) %>% group_by(user_edit_count) %>% summarize(num_sessions_wsidebarclick = sum(w_sidebar_click == 'true'), num_sessions_wclick = sum(w_click == 'true'), num_all_sessions = n(), pct_sidebarclick_of_allclick_sessions = num_sessions_wsidebarclick /num_sessions_wclick * 100, pct_sidebarclick_of_all_sessions = num_sessions_wsidebarclick/num_all_sessions * 100) head(sidebar_session_prop_byeditcount) #Counts by session and user edit count count_clicks_bysession_isanon <- all_clicks %>% group_by(session, logged_in_status) %>% summarize(sidebar_click = sum(sidebar_click), click_event = sum(click_event), all_events = sum(all_events)) # Percent of all sessions with at least 1 click to the sidebar by logged in status sidebar_session_prop_byloggedstate <- count_clicks_bysession_isanon %>% mutate(w_sidebar_click = ifelse(sidebar_click >= 1, 'true', 'false'), w_click = ifelse(click_event >= 1, 'true', 'false')) %>% group_by(logged_in_status) %>% summarize(num_sessions_wsidebarclick = sum(w_sidebar_click == 'true'), num_sessions_wclick = sum(w_click == 'true'), num_all_sessions = n(), pct_sidebarclick_of_allclick_sessions = num_sessions_wsidebarclick /num_sessions_wclick * 100, pct_sidebarclick_of_all_sessions = num_sessions_wsidebarclick/num_all_sessions * 100) head(sidebar_session_prop_byloggedstate) # Find session length and sidebar click time for all modern vector skin sessions query <- " WITH start_session AS ( SELECT event.token AS session, wiki, MIN(dt) AS session_start_ts FROM event.desktopwebuiactionstracking WHERE year = 2020 AND ((month=07 AND day >= 22) OR month= 08 ) -- sidebar is collapsible only on new vector skin AND event.skinversion = 2 AND wiki <> 'testwiki' AND useragent.is_bot = false GROUP BY event.token, wiki ), end_session AS ( SELECT event.token AS session, wiki, MAX(dt) AS session_end_ts FROM event.desktopwebuiactionstracking WHERE year = 2020 AND ((month=07 AND day >= 22) OR month= 08 ) -- sidebar is collapsible only on new vector skin AND event.skinversion = 2 AND wiki <> 'testwiki' AND useragent.is_bot = false GROUP BY event.token, wiki ), uncollapse_click_event AS ( SELECT event.token AS session, wiki, dt AS uncollapse_click_ts FROM event.desktopwebuiactionstracking WHERE -- review clicks to the sidebar event.name = 'ui.sidebar' AND event.action = 'click' AND event.isSidebarCollapsed = false AND year = 2020 AND ((month=07 AND day >= 22) OR month= 08 ) -- sidebar is collapsible only on new vector skin AND event.skinversion = 2 AND wiki <> 'testwiki' AND useragent.is_bot = false ), collapse_click_event AS ( SELECT event.token AS session, wiki, dt AS collapse_click_ts FROM event.desktopwebuiactionstracking WHERE -- review clicks to the sidebar event.name = 'ui.sidebar' AND event.action = 'click' AND event.isSidebarCollapsed = true AND year = 2020 AND ((month=07 AND day >= 22) OR month= 08 ) -- sidebar is collapsible only on new vector skin AND event.skinversion = 2 AND wiki <> 'testwiki' AND useragent.is_bot = false ) SELECT start_session.session, start_session.wiki, start_session.session_start_ts, end_session.session_end_ts, collapse_click_event.collapse_click_ts, uncollapse_click_event.uncollapse_click_ts FROM start_session JOIN end_session ON start_session.session = end_session.session AND start_session.wiki = end_session.wiki LEFT JOIN collapse_click_event ON start_session.session = collapse_click_event.session AND start_session.wiki = collapse_click_event.wiki LEFT JOIN uncollapse_click_event ON start_session.session = uncollapse_click_event.session AND start_session.wiki = uncollapse_click_event.wiki " sidebar_session_info <- wmfdata::query_hive(query) # Calculate session lengths session_lengths <- sidebar_session_info %>% mutate( session_start = ymd_hms(session_start_ts), session_end = ymd_hms(session_end_ts), collapse_click_length_time = ymd_hms(collapse_click_ts) - session_start, session_length = session_end - session_start , incl_sidebar_click = ifelse(collapse_click_ts != 'NULL' | uncollapse_click_ts != 'NULL', 1, 0 )) %>% group_by(session) %>% #some sessions recorded multiple times because it was collapsed and uncollapsed multiple times mutate(total_sidebar_events = sum(incl_sidebar_click)) %>% ungroup() # Count all sessions with a sidebar_click count_sessions_sidebar_click <- session_lengths %>% filter(incl_sidebar_click == 1) %>% summarise(num_sidebar_click_sessions = n_distinct(session)) count_sessions_sidebar_click sidebar_multiple_event_session <- session_lengths %>% # review sessions with multiple sidebar events. Sidebar event defined as collapsing and uncollapsing multiple times. filter(total_sidebar_events > 1) %>% summarize(multiple_event_sessions = n_distinct(session)) sidebar_multiple_event_session sidebar_collapsed_prop <- session_lengths %>% # review sessions where the sidebar was collapsed just once. filter(collapse_click_ts != 'NULL', uncollapse_click_ts == 'NULL', session_length != 0, total_sidebar_events == 1) %>% ## if sidebar was collapsed less than halfway through session that it was collapsed majority of the time mutate(collapsed_time = ifelse(collapse_click_length_time/as.double(session_length, units = 'secs') <= 0.5, "majority collapsed", "majority uncollapsed")) %>% summarize(num_sessions = n_distinct(session), pct_majority_collapsed = sum(collapsed_time == 'majority collapsed')/num_sessions * 100, pct_majority_uncollapsed = sum(collapsed_time == 'majority uncollapsed')/num_sessions * 100) head(sidebar_collapsed_prop) sidebar_collapsed_prop_bywiki <- session_lengths %>% # review sessions where the sidebar was collapsed just once. filter(collapse_click_ts != 'NULL', uncollapse_click_ts == 'NULL', session_length != 0, total_sidebar_events == 1) %>% ## if sidebar was collapsed less than halfway through session that it was collapsed majority of the time mutate(collapsed_time = ifelse(collapse_click_length_time/as.double(session_length, units = 'secs') <= 0.5, "majority collapsed", "majority uncollapsed")) %>% group_by(wiki) %>% summarize(num_sessions = n_distinct(session), prop_majority_collapsed = sum(collapsed_time == 'majority collapsed')/num_sessions *100, prop_majority_uncollapsed = sum(collapsed_time == 'majority uncollapsed')/num_sessions * 100) head(sidebar_collapsed_prop_bywiki) #count sessions where a collapse event was followed by uncollapse event session_collapse_then_uncollapse <- session_lengths %>% filter(total_sidebar_events == 1, uncollapse_click_ts != 'NULL', collapse_click_ts != 'NULL', uncollapse_click_ts > collapse_click_ts) %>% summarize(session_collapse_then_uncollapse = n_distinct(session)) head(session_collapse_then_uncollapse) #count sessions where a uncollapse event was followed by collapse event session_uncollapse_then_collapse <- session_lengths %>% filter(total_sidebar_events == 1, uncollapse_click_ts != 'NULL', collapse_click_ts != 'NULL', uncollapse_click_ts < collapse_click_ts) %>% summarize(session_collapse_then_uncollapse = n_distinct(session)) head(session_uncollapse_then_collapse) session_collapse_only_frequency <- session_lengths %>% filter(total_sidebar_events == 1, uncollapse_click_ts == 'NULL', collapse_click_ts != 'NULL') %>% summarize(session_collapse_only = n_distinct(session)) head(session_collapse_only_frequency) session_uncollapse_only_frequency <- session_lengths %>% filter(total_sidebar_events == 1, uncollapse_click_ts != 'NULL', collapse_click_ts == 'NULL') %>% summarize(session_uncollapse_only = n_distinct(session)) head(session_uncollapse_only_frequency) session_collapse_uncollapse_sametime <- session_lengths %>% filter(total_sidebar_events == 1, uncollapse_click_ts != 'NULL', collapse_click_ts != 'NULL', uncollapse_click_ts == collapse_click_ts) %>% summarize(session_collapse_uncollapse_sametime = n_distinct(session)) head(session_collapse_uncollapse_sametime) query <- " SELECT date_format(dt, 'yyyy-MM-dd') AS date, event.token as session, event.skinversion, wiki AS wiki, event.isAnon AS logged_in_status, event.editCountBucket AS user_edit_count, event.name AS name, COUNT(*) AS events FROM event.desktopwebuiactionstracking WHERE year = 2020 AND((month=07 AND day >= 22) OR month= 08 ) AND wiki <> 'testwiki' --- only reviewing clicks to sidebarlinks AND event.action = 'click' AND useragent.is_bot = false AND event.name LIKE 'n-%' GROUP BY date_format(dt, 'yyyy-MM-dd'), event.token, event.skinversion, wiki, event.isAnon, event.editCountBucket, event.name " sidebar_link_interactions <- wmfdata::query_hive(query) sidebar_link_interactions$skinversion <- ifelse(sidebar_link_interactions$skinversion == '1', "legacy", "modern") sidebar_link_interactions$logged_in_status <- ifelse(sidebar_link_interactions$logged_in_status == 'false', "logged-in", "logged-out") sidebar_link_interactions$skinversion <- as.factor(sidebar_link_interactions$skinversion) # query to find percent of sessions with a sidebar link click query <- " SELECT date_format(dt, 'yyyy-MM-dd') AS date, event.token as session, event.skinversion, wiki AS wiki, event.isAnon AS logged_in_status, event.editCountBucket AS user_edit_count, event.name AS name, SUM(CAST(event.action = 'click' AND event.name LIKE 'n-%' AS INT)) AS sidebar_link_click FROM event.desktopwebuiactionstracking WHERE year = 2020 AND((month=07 AND day >= 22) OR month= 08 ) AND wiki IN ('euwiki', 'fawiki', 'frwiki', 'frwiktionary', 'hewiki', 'ptwikiversity') AND useragent.is_bot = false GROUP BY date_format(dt, 'yyyy-MM-dd'), event.token, event.skinversion, wiki, event.isAnon, event.editCountBucket, event.name " sidebar_link_clicks <- wmfdata::query_hive(query) sidebar_link_clicks$skinversion <- ifelse(sidebar_link_clicks$skinversion == '1', "legacy", "modern") sidebar_link_clicks$logged_in_status <- ifelse(sidebar_link_clicks$logged_in_status == 'false', "logged-in", "logged-out") sidebar_link_clicks$skinversion <- as.factor(sidebar_link_clicks$skinversion) sidebarlink_clicks_persession <- sidebar_link_interactions %>% group_by(skinversion) %>% summarize(total_events = sum(events), unique_sessions = n_distinct(session), avg_events_persession = total_events/unique_sessions) %>% arrange(desc(total_events)) head(sidebarlink_clicks_persession) ## Average events per session by skin type and event name sidebarlink_clicks_persession_byname <- sidebar_link_interactions %>% group_by(skinversion, name) %>% summarize(total_events = sum(events), unique_sessions = n_distinct(session), avg_events_persession = total_events/unique_sessions) %>% arrange(desc(total_events)) top_20_sidebarlinks <- sidebarlink_clicks_persession_byname %>% group_by(skinversion) %>% slice_max(total_events, n=10) top_20_sidebarlinks p <- sidebarlink_clicks_persession_byname %>% filter(name %in% top_20_sidebarlinks$name) %>% ggplot(aes(x=name, y = avg_events, fill = skinversion)) + geom_col(position = 'dodge') + scale_y_continuous("Average clicks per session by link type") + scale_x_discrete("Skin version") + labs(title = "Average sidebar link clicks per session") + theme_bw() + theme( plot.title = element_text(hjust = 0.5), text = element_text(size=18), axis.text.x = element_text(angle = 45, hjust = 1)) p ggsave("Figures/average_sidebar_link_events.png", p, width = 16, height = 8, units = "in", dpi = 300) sidebar_link_click_sessions <- sidebar_link_clicks %>% group_by(skinversion) %>% summarise(num_sessions = n_distinct(session), sessions_w_linkclick = sum(sidebar_link_click > 0), pct_sessions_wlinkclick = sessions_w_linkclick/num_sessions *100 ) sidebar_link_click_sessions ## Average events per session by wiki and skin type sidebarlink_clicks_persession_bywiki <- sidebar_link_interactions %>% group_by(wiki, skinversion) %>% summarize(total_events = sum(events), unique_sessions = n_distinct(session), avg_events_persession = total_events/unique_sessions) %>% arrange(desc(wiki)) sidebarlink_clicks_persession_bywiki p <- sidebarlink_clicks_persession_bywiki %>% ggplot(aes(x= skinversion, y = avg_events_persession, fill = skinversion)) + geom_col() + facet_wrap(~wiki) + scale_y_continuous("Average clicks per session") + scale_x_discrete("Skin version") + labs(title = "Average sidebar link clicks per session by wiki") + theme_bw() + theme( plot.title = element_text(hjust = 0.5), text = element_text(size=18)) p ggsave("Figures/average_sidebar_link_events_bywiki.png", p, width = 16, height = 8, units = "in", dpi = 300) sidebar_link_click_sessions_wiki <- sidebar_link_clicks %>% group_by(skinversion, wiki) %>% summarise(num_sessions = n_distinct(session), sessions_w_linkclick = sum(sidebar_link_click > 0), pct_click_sessions = sessions_w_linkclick/num_sessions *100 ) %>% arrange(wiki) sidebar_link_click_sessions_wiki sidebarlink_clicks_persession_byeditcount <- sidebar_link_interactions %>% group_by(user_edit_count, skinversion) %>% summarize(total_events = sum(events), unique_sessions = n_distinct(session), avg_events = total_events/unique_sessions) %>% arrange(desc(user_edit_count)) sidebarlink_clicks_persession_byeditcount p <- sidebarlink_clicks_persession_byeditcount %>% ggplot(aes(x= skinversion, y = avg_events, fill = skinversion)) + geom_col() + facet_wrap(~user_edit_count) + scale_y_continuous("Average clicks per session") + scale_x_discrete("Skin version") + labs(title = "Average sidebar link clicks per session by user edit count") + theme_bw() + theme( plot.title = element_text(hjust = 0.5), text = element_text(size=18)) p ggsave("Figures/average_sidebar_link_events_byeditcount.png", p, width = 16, height = 8, units = "in", dpi = 300) sidebar_link_click_sessions_editcount <- sidebar_link_clicks %>% group_by(skinversion, user_edit_count) %>% summarise(num_sessions = n_distinct(session), sessions_w_linkclick = sum(sidebar_link_click > 0), pct_click_sessions = sessions_w_linkclick/num_sessions *100 ) %>% arrange(user_edit_count) sidebar_link_click_sessions_editcount sidebarlink_clicks_persession_byanon <- sidebar_link_interactions %>% group_by(logged_in_status, skinversion) %>% summarize(total_events = sum(events), unique_sessions = n_distinct(session), avg_events_persession = total_events/unique_sessions) %>% arrange(desc(logged_in_status)) sidebarlink_clicks_persession_byanon p <- sidebarlink_clicks_persession_byanon %>% ggplot(aes(x= skinversion, y = avg_events_persession, fill = skinversion)) + geom_col() + facet_wrap(~logged_in_status) + scale_y_continuous("Average clicks per session") + scale_x_discrete("Skin version") + labs(title = "Average sidebar link clicks per session by logged in status") + theme_bw() + theme( plot.title = element_text(hjust = 0.5), text = element_text(size=18)) p ggsave("Figures/average_sidebar_link_events_byanon.png", p, width = 16, height = 8, units = "in", dpi = 300) sidebar_link_click_sessions_byanon <- sidebar_link_clicks %>% group_by(skinversion, logged_in_status) %>% summarise(num_sessions = n_distinct(session), sessions_w_linkclick = sum(sidebar_link_click > 0), pct_click_sessions = sessions_w_linkclick/num_sessions *100 ) %>% arrange(logged_in_status) sidebar_link_click_sessions_byanon donate_clicks_persession <- sidebar_link_interactions %>% filter(name == 'n-sitesupport') %>% group_by(skinversion) %>% summarize(total_donate_clicks = sum(events), unique_sessions = n_distinct(session), avg_dontate_clicks_persession = total_donate_clicks/unique_sessions) %>% arrange(desc(total_donate_clicks)) head(donate_clicks_persession) donate_clicks_persession_wiki <- sidebar_link_interactions %>% filter(name == 'n-sitesupport') %>% group_by(skinversion, wiki) %>% summarize(total_donate_clicks = sum(events), unique_sessions = n_distinct(session), avg_donate_clicks_persession = total_donate_clicks/unique_sessions) %>% arrange(desc(wiki)) head(donate_clicks_persession_wiki) p <- donate_clicks_persession_wiki %>% ggplot(aes(x= wiki, y = avg_events_persession, fill = skinversion)) + geom_col(position = position_dodge2(width = 0.9, preserve = "single")) + scale_y_continuous("Average clicks per session") + scale_x_discrete("Skin version") + labs(title = "Average donate link clicks per session by wiki") + theme_bw() + theme( plot.title = element_text(hjust = 0.5), text = element_text(size=18)) p ggsave("Figures/average_donate_link_events.png", p, width = 16, height = 8, units = "in", dpi = 300) donate_clicks_persession_editcount <- sidebar_link_interactions %>% filter(name == 'n-sitesupport', logged_in_status == 'logged-in') %>% group_by(skinversion, user_edit_count) %>% summarize(total_donate_clicks = sum(events), unique_sessions = n_distinct(session), avg_clicks_perssion = total_donate_clicks /unique_sessions) %>% arrange(desc(user_edit_count)) head(donate_clicks_persession_editcount) p <- donate_clicks_persession_editcount %>% ggplot(aes(x= user_edit_count, y = avg_events_perssion, fill = skinversion)) + geom_col(position = position_dodge2(width = 0.9, preserve = "single")) + scale_y_continuous("Average clicks per session") + scale_x_discrete("Skin version") + labs(title = "Average donate link clicks per session by user edit count") + theme_bw() + theme( plot.title = element_text(hjust = 0.5), text = element_text(size=18)) p ggsave("Figures/average_donate_link_events_byeditcount.png", p, width = 16, height = 8, units = "in", dpi = 300) donate_clicks_persession_byanon <- sidebar_link_interactions %>% filter(name == 'n-sitesupport') %>% group_by(skinversion, logged_in_status) %>% summarize(total_donate_clicks = sum(events), unique_sessions = n_distinct(session), avg_clicks_persession = total_donate_clicks/unique_sessions) %>% arrange(desc(logged_in_status)) head(donate_clicks_persession_byanon) p <- donate_clicks_persession_byanon%>% ggplot(aes(x= skinversion, y = avg_events_persession, fill = skinversion)) + geom_col() + facet_wrap(~logged_in_status) + scale_y_continuous("Average clicks per session") + scale_x_discrete("Skin version") + labs(title = "Average sidebar link clicks per session by logged in status") + theme_bw() + theme( plot.title = element_text(hjust = 0.5), text = element_text(size=18)) p ggsave("Figures/average_donate_link_events_byanont.png", p, width = 16, height = 8, units = "in", dpi = 300) # FrWiki - Deployment Date August 5th query <- "SELECT CONCAT(year, '-', LPAD(month, 2, '0'), '-', LPAD(day, 2, '0')) AS date, IF(x_analytics_map['loggedIn'] = '1', 'logged-in', 'logged-out') as logged_in_status, SUM(IF((uri_path = '/wiki/Wikip%C3%A9dia:Accueil_principal'), 1, 0)) AS main_page, SUM(IF((uri_path = '/wiki/Sp%C3%A9cial:Page_au_hasard'), 1, 0)) AS random_page, SUM(IF((uri_path = '/wiki/Portail:Accueil'), 1, 0)) AS contents, SUM(IF((uri_path = '/wiki/Wikip%C3%A9dia:Contact'), 1, 0)) AS contact, SUM(IF((uri_path = '/wiki/Aide:Accueil'), 1, 0)) AS help, SUM(IF((uri_path = '/wiki/Wikip%C3%A9dia:Accueil_de_la_communaut%C3%A9'), 1, 0)) AS community_portal, SUM(IF((uri_path = '/wiki/Sp%C3%A9cial:Modifications_r%C3%A9centes'), 1, 0)) AS recent_changes FROM wmf.webrequest TABLESAMPLE(BUCKET 1 OUT OF 128 ON hostname, sequence) WHERE year = 2020 --review two weeks before and after AND ((month=07 AND day >= 22) OR (month= 08 and day <= 18)) AND agent_type = 'user' AND normalized_host.project_family = 'wikipedia' AND normalized_host.project = 'fr' AND access_method = 'desktop' AND referer_class = 'internal' --isolate to only views to these pages from within wikipedia. AND webrequest_source = 'text' GROUP BY CONCAT(year, '-', LPAD(month, 2, '0'), '-', LPAD(day, 2, '0')), IF(x_analytics_map['loggedIn'] = '1', 'logged-in', 'logged-out')" frwiki_sidebar_link_views <- wmfdata::query_hive(query) frwiki_pre_post_deploy <- frwiki_sidebar_link_views %>% gather(sidebar_link, n_views, 3:9) %>% mutate(deploy_status = ifelse(date >= '2020-07-22' & date < '2020-08-05', "pre_deploy", "post_deploy")) %>% group_by(logged_in_status, deploy_status) %>% summarize(n_views = sum(n_views)) frwiki_pre_post_deploy # HeWiki - Deployment Date on July 28th query <- "SELECT CONCAT(year, '-', LPAD(month, 2, '0'), '-', LPAD(day, 2, '0')) AS date, IF(x_analytics_map['loggedIn'] = '1', 'logged-in', 'logged-out') as logged_in_status, SUM(IF((uri_path = '/wiki/%D7%A2%D7%9E%D7%95%D7%93_%D7%A8%D7%90%D7%A9%D7%99'), 1, 0)) AS main_page, SUM(IF((uri_path = '/wiki/%D7%9E%D7%99%D7%95%D7%97%D7%93:%D7%90%D7%A7%D7%A8%D7%90%D7%99'), 1, 0)) AS random_page, SUM(IF((uri_path = '/wiki/%D7%A4%D7%95%D7%A8%D7%98%D7%9C:%D7%A4%D7%95%D7%A8%D7%98%D7%9C%D7%99%D7%9D'), 1, 0)) AS contents, SUM(IF((uri_path = '/wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94:%D7%99%D7%A6%D7%99%D7%A8%D7%AA_%D7%A7%D7%A9%D7%A8'), 1, 0)) AS contact, SUM(IF((uri_path = '/wiki/%D7%A2%D7%96%D7%A8%D7%94:%D7%AA%D7%A4%D7%A8%D7%99%D7%98_%D7%A8%D7%90%D7%A9%D7%99'), 1, 0)) AS help, SUM(IF((uri_path = '/wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94:%D7%A9%D7%A2%D7%A8_%D7%94%D7%A7%D7%94%D7%99%D7%9C%D7%94'), 1, 0)) AS community_portal, SUM(IF((uri_path = '/wiki/%D7%9E%D7%99%D7%95%D7%97%D7%93:%D7%A9%D7%99%D7%A0%D7%95%D7%99%D7%99%D7%9D_%D7%90%D7%97%D7%A8%D7%95%D7%A0%D7%99%D7%9D'), 1, 0)) AS recent_changes FROM wmf.webrequest TABLESAMPLE(BUCKET 1 OUT OF 64 ON hostname, sequence) WHERE year = 2020 AND ((month=07 AND day >= 14) OR (month= 08 and day <= 10)) AND agent_type = 'user' AND normalized_host.project_family = 'wikipedia' AND normalized_host.project = 'he' AND access_method = 'desktop' AND referer_class = 'internal' --isolate to only views to these pages from within wikipedia. AND webrequest_source = 'text' GROUP BY CONCAT(year, '-', LPAD(month, 2, '0'), '-', LPAD(day, 2, '0')), IF(x_analytics_map['loggedIn'] = '1', 'logged-in', 'logged-out')" hewiki_sidebar_link_views <- wmfdata::query_hive(query) hewiki_pre_post_deploy <- hewiki_sidebar_link_views %>% gather(sidebar_link, n_views, 3:9) %>% mutate(deploy_status = ifelse(date >= '2020-07-14' & date < '2020-07-28', "pre_deploy", "post_deploy")) %>% group_by(logged_in_status, deploy_status) %>% summarize(n_views = sum(n_views)) hewiki_pre_post_deploy # FaWiki - deployment date July 28th query <- "SELECT CONCAT(year, '-', LPAD(month, 2, '0'), '-', LPAD(day, 2, '0')) AS date, IF(x_analytics_map['loggedIn'] = '1', 'logged-in', 'logged-out') as logged_in_status, SUM(IF((uri_path ='/wiki/%D8%B5%D9%81%D8%AD%D9%87%D9%94_%D8%A7%D8%B5%D9%84%DB%8C'), 1, 0)) AS main_page, SUM(IF((uri_path = '/wiki/%D9%88%DB%8C%DA%98%D9%87:%D8%B5%D9%81%D8%AD%D9%87%D9%94_%D8%AA%D8%B5%D8%A7%D8%AF%D9%81%DB%8C'), 1, 0)) AS random_page, SUM(IF((uri_path = '/wiki/%D9%88%DB%8C%DA%A9%DB%8C%E2%80%8C%D9%BE%D8%AF%DB%8C%D8%A7:%D8%AA%D9%85%D8%A7%D8%B3_%D8%A8%D8%A7_%D9%85%D8%A7'), 1, 0)) AS contact, SUM(IF((uri_path = '/wiki/%D8%B1%D8%A7%D9%87%D9%86%D9%85%D8%A7:%D9%81%D9%87%D8%B1%D8%B3%D8%AA'), 1, 0)) AS help, SUM(IF((uri_path = '/wiki/%D9%88%DB%8C%DA%98%D9%87:%D8%AA%D8%BA%DB%8C%DB%8C%D8%B1%D8%A7%D8%AA_%D8%A7%D8%AE%DB%8C%D8%B1'), 1, 0)) AS recent_changes FROM wmf.webrequest TABLESAMPLE(BUCKET 1 OUT OF 64 ON hostname, sequence) WHERE year = 2020 AND ((month=07 AND day >= 14) OR (month= 08 and day <= 10)) AND agent_type = 'user' AND normalized_host.project_family = 'wikipedia' AND normalized_host.project = 'fa' AND access_method = 'desktop' AND referer_class = 'internal' --isolate to only views to these pages from within wikipedia. AND webrequest_source = 'text' GROUP BY CONCAT(year, '-', LPAD(month, 2, '0'), '-', LPAD(day, 2, '0')), IF(x_analytics_map['loggedIn'] = '1', 'logged-in', 'logged-out')" fawiki_sidebar_link_views <- wmfdata::query_hive(query) fawiki_pre_post_deploy <- fawiki_sidebar_link_views %>% gather(sidebar_link, n_views, 3:7) %>% mutate(deploy_status = ifelse(date >= '2020-07-14' & date < '2020-07-28', "pre_deploy", "post_deploy")) %>% group_by(logged_in_status, deploy_status) %>% summarize(n_views = sum(n_views)) fawiki_pre_post_deploy # EuWiki - deployment date July 22nd query <- "SELECT CONCAT(year, '-', LPAD(month, 2, '0'), '-', LPAD(day, 2, '0')) AS date, IF(x_analytics_map['loggedIn'] = '1', 'logged-in', 'logged-out') as logged_in_status, SUM(IF((uri_path = '/wiki/Azala'), 1, 0)) AS main_page, SUM(IF((uri_path = '/wiki/Berezi:Ausazkoa'), 1, 0)) AS random_page, SUM(IF((uri_path = '/wiki/Laguntza:Sarrera'), 1, 0)) AS help, SUM(IF((uri_path = '/wiki/Berezi:AzkenAldaketak'), 1, 0)) AS recent_changes FROM wmf.webrequest TABLESAMPLE(BUCKET 1 OUT OF 64 ON hostname, sequence) WHERE year = 2020 AND ((month=07 AND day >= 08) OR (month= 08 and day <= 04)) AND agent_type = 'user' AND normalized_host.project_family = 'wikipedia' AND normalized_host.project = 'eu' AND access_method = 'desktop' AND referer_class = 'internal' --isolate to only views to these pages from within wikipedia. AND webrequest_source = 'text' GROUP BY CONCAT(year, '-', LPAD(month, 2, '0'), '-', LPAD(day, 2, '0')), IF(x_analytics_map['loggedIn'] = '1', 'logged-in', 'logged-out')" euwiki_sidebar_link_views <- wmfdata::query_hive(query) euwiki_pre_post_deploy <- euwiki_sidebar_link_views %>% gather(sidebar_link, n_views, 3:6) %>% mutate(deploy_status = ifelse(date >= '2020-07-08' & date < '2020-07-22', "pre_deploy", "post_deploy")) %>% group_by(logged_in_status, deploy_status) %>% summarize(n_views = sum(n_views)) euwiki_pre_post_deploy # FrWiktionary - deployment date July 22nd query <- "SELECT CONCAT(year, '-', LPAD(month, 2, '0'), '-', LPAD(day, 2, '0')) AS date, IF(x_analytics_map['loggedIn'] = '1', 'logged-in', 'logged-out') as logged_in_status, SUM(IF((uri_path = '/wiki/Wiktionnaire:Page_d%E2%80%99accueil'), 1, 0)) AS main_page, SUM(IF((uri_path = '/wiki/Sp%C3%A9cial:Page_au_hasard'), 1, 0)) AS random_page, SUM(IF((uri_path = '/wiki/Wiktionnaire:Portails'), 1, 0)) AS contents, SUM(IF((uri_path = '/wiki/Wiktionnaire:Accueil_communautaire'), 1, 0)) AS community_portal, SUM(IF((uri_path = '/wiki/Aide:Sommaire'), 1, 0)) AS help, SUM(IF((uri_path = '/wiki/Sp%C3%A9cial:Modifications_r%C3%A9centes'), 1, 0)) AS recent_changes FROM wmf.webrequest TABLESAMPLE(BUCKET 1 OUT OF 64 ON hostname, sequence) WHERE year = 2020 --review two weeks before and after deployment date AND ((month=07 AND day >= 08) OR (month= 08 and day <= 04)) AND agent_type = 'user' AND normalized_host.project_family = 'wiktionary' AND normalized_host.project = 'fr' AND access_method = 'desktop' AND referer_class = 'internal' --isolate to only views to these pages from within wikipedia. AND webrequest_source = 'text' GROUP BY CONCAT(year, '-', LPAD(month, 2, '0'), '-', LPAD(day, 2, '0')), IF(x_analytics_map['loggedIn'] = '1', 'logged-in', 'logged-out')" frwiktionary_sidebar_link_views <- wmfdata::query_hive(query) frwiktionary_pre_post_deploy <- frwiktionary_sidebar_link_views %>% gather(sidebar_link, n_views, 3:7) %>% mutate(deploy_status = ifelse(date >= '2020-07-08' & date < '2020-07-22', "pre_deploy", "post_deploy")) %>% group_by(logged_in_status, deploy_status) %>% summarize(n_views = sum(n_views)) frwiktionary_pre_post_deploy # ptwikiversity - deployment date July 22nd query <- "SELECT CONCAT(year, '-', LPAD(month, 2, '0'), '-', LPAD(day, 2, '0')) AS date, IF(x_analytics_map['loggedIn'] = '1', 'logged-in', 'logged-out') as logged_in_status, SUM(IF((uri_path = '/wiki/P%C3%A1gina_principal'), 1, 0)) AS main_page, SUM(IF((uri_path = '/wiki/Wikiversidade:Portal_comunit%C3%A1rio'), 1, 0)) AS community_portal, SUM(IF((uri_path = '/wiki/Especial:Mudan%C3%A7as_recentes'), 1, 0)) AS recent_changes -- increase sampling size due to low number of events FROM wmf.webrequest WHERE year = 2020 -- review two weeks before and after deployment date AND ((month=07 AND day >= 08) OR (month= 08 and day <= 04)) AND agent_type = 'user' AND normalized_host.project_family = 'wikiversity' AND normalized_host.project = 'pt' AND access_method = 'desktop' AND referer_class = 'internal' --isolate to only views to these pages from within wikipedia. AND webrequest_source = 'text' GROUP BY CONCAT(year, '-', LPAD(month, 2, '0'), '-', LPAD(day, 2, '0')), IF(x_analytics_map['loggedIn'] = '1', 'logged-in', 'logged-out')" ptwikiversity_sidebar_link_views <- wmfdata::query_hive(query) ptwikiversity_pre_post_deploy <- ptwikiversity_sidebar_link_views %>% gather(sidebar_link, n_views, 3:5) %>% mutate(deploy_status = ifelse(date >= '2020-07-08' & date < '2020-07-22', "pre_deploy", "post_deploy")) %>% group_by(logged_in_status, deploy_status) %>% summarize(n_views = sum(n_views)) ptwikiversity_pre_post_deploy query <- " SELECT CONCAT(year, '-', LPAD(month, 2, '0'), '-', LPAD(day, 2, '0')) AS date, project AS wiki, SUM(view_count) as pageviews FROM wmf.projectview_hourly WHERE agent_type = 'user' AND access_method = 'desktop' AND year >= 2019 -- only review interactions from within the site AND referer_class = 'internal' AND project IN ('fr.wikipedia', 'he.wikipedia', 'fa.wikipedia', 'eu.wikipedia', 'fr.wiktionary', 'pt.wikiversity') GROUP BY CONCAT(year, '-', LPAD(month, 2, '0'), '-', LPAD(day, 2, '0')), project " desktop_pageviews <- wmfdata::query_hive(query) desktop_pageviews$date <- as.Date(desktop_pageviews$date, format = "%Y-%m-%d") # Review year over year changes in search-related traffic pageviews_yoy <- desktop_pageviews %>% filter(date <= '2020-08-26') %>% mutate( year = factor(year(date)) ) year(pageviews_yoy$date) <- 2020 p <- ggplot(pageviews_yoy, aes(x = date, y = pageviews)) + geom_line(aes(color = year)) + facet_wrap(~wiki, scales = 'free') + scale_x_date(date_labels = "%b", date_breaks = "1 month", minor_breaks = NULL) + scale_y_continuous(labels = polloi::compress) + scale_color_brewer(palette = "Set1") + labs( x = NULL, y = "Pageviews", color = "Year", title = "Year-over-year desktop pageviews from internal referers to test Wikis with Collapsible Sidebar deployed as Default") + theme(legend.position = "bottom") + theme_bw() p