shhh <- function(expr) suppressPackageStartupMessages(suppressWarnings(suppressMessages(expr))) shhh({ library(tidyverse); library(lubridate) library(scales); library(data.table) }) query <- " SELECT event.editing_session_id as edit_attempt_id, wiki, event.platform as platform, useragent.browser_family as browser_family, useragent.os_family as os_family, event.editor_interface as interface, if(event.user_id != 0, concat(wiki, '-', event.user_id), event.anonymous_user_token) as user_id, event.user_id = 0 as user_is_anonymous_byid, if(event.anonymous_user_token is NULL, false, true) as user_is_anonymous_bytoken, event.user_id != 0 as user_is_registered, event.action as action, event.init_timing as init_timing, event.bucket, geocoded_data['country'] as country, event.user_editcount as user_edit_count FROM event.editattemptstep WHERE event.bucket in ('default-visual', 'default-source') and year = 2019 and ( month = 7 and day >= 14 or month >= 8) " sessions = wmf::query_hive(query) #recheck overall user bucket number to confirm any changes imbalance sessions_all <- sessions %>% group_by(bucket) %>% summarise(users = n_distinct(user_id), attempts = n_distinct(edit_attempt_id)) sessions_all ## Break down by anonymous users to identify any discrepancy that occurs there sessions_anonymous <- sessions %>% filter(user_is_anonymous_byid == 'true') %>% group_by(bucket) %>% summarise(users = n_distinct(user_id), attempts = n_distinct(edit_attempt_id)) sessions_anonymous ## Look at user_isanonymous_bytoken to confirm it matches with user_is_anonymous_byid sessions_anonymous_bytoken <- sessions %>% filter(user_is_anonymous_bytoken == "true") %>% group_by(bucket) %>% summarise(users = n_distinct(user_id), attempts = n_distinct(edit_attempt_id)) sessions_anonymous_bytoken ## Break down by registered users to identify any discrepancy that occurs there sessions_registered <- sessions %>% filter(user_is_registered == "true") %>% group_by(bucket) %>% summarise(users = n_distinct(user_id), attempts = n_distinct(edit_attempt_id)) sessions_registered sessions_bybrowser <- sessions %>% group_by(bucket, browser_family) %>% summarise(users = n_distinct(user_id), attempts = n_distinct(edit_attempt_id)) %>% arrange(browser_family) head(sessions_bybrowser) sessions_bybrowser$users <- as.numeric(sessions_bybrowser$users) sessions_bybrowser$attempts <- as.numeric(sessions_bybrowser$attempts) sessions_bybrowser$bucket[sessions_bybrowser$bucket == "default-source"] <- "default_source" sessions_bybrowser$bucket[sessions_bybrowser$bucket == "default-visual"] <- "default_visual" # Look at test bucket user counts for top browsers sessions_bybrowser_plot <- sessions_bybrowser %>% filter(browser_family %in% c('Chrome Mobile', 'Mobile Safari', 'Samsung Internet', 'Chrome', 'Chrome Mobile WebView', 'UC Browser', 'Opera Mobile', 'Firefox Mobile', 'Chrome Mobile iOS', 'Facebook', 'Android', 'Mobile Safari UI/WKWebView', 'Edge Mobile', 'Edge')) %>% # filter to top browsers for plot visibility ggplot(aes(x= browser_family, y = users, fill = bucket)) + geom_col() + scale_y_continuous("user counts") + labs(title = "Editor test bucket user counts by browser") + ggthemes::theme_tufte(base_size = 10, base_family = "Gill Sans") + theme(axis.text.x=element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5), panel.grid = element_line("gray70"), legend.position= "none") sessions_bybrowser_plot sessions_bybrowser_percent_imbalance <- sessions_bybrowser %>% select(-4) %>% ##look at user counts only spread(bucket,users) %>% #mutate(percent_diff = round(abs((default_source-default_visual)/default_visual *100), 3)) %>% mutate(percent_wikitext_users = default_source/(default_source + default_visual) *100) %>% arrange(desc(percent_wikitext_users)) head(sessions_bybrowser_percent_imbalance) sessions_bycountry <- sessions %>% group_by(bucket, country) %>% summarise(users = n_distinct(user_id), attempts = n_distinct(edit_attempt_id)) %>% arrange(country, users) head(sessions_bycountry) sessions_bywiki <- sessions %>% group_by(bucket, wiki) %>% summarise(users = n_distinct(user_id), attempts = n_distinct(edit_attempt_id)) %>% arrange(wiki) sessions_bywiki sessions_bywiki$users <- as.numeric(sessions_bywiki$users) sessions_bywiki$attempts <- as.numeric(sessions_bywiki$attempts) sessions_bywiki$bucket[sessions_bywiki$bucket == "default-source"] <- "default_source" sessions_bywiki$bucket[sessions_bywiki$bucket == "default-visual"] <- "default_visual" sessions_bywiki_percent_imbalance <- sessions_bywiki %>% select(-4) %>% ##look at user counts only spread(bucket,users) %>% #mutate(percent_diff = round(abs((default_source-default_visual)/default_visual *100), 3)) %>% mutate(percent_wikitext_users = default_source/(default_source + default_visual) *100) %>% arrange(desc(percent_wikitext_users)) head(sessions_bywiki_percent_imbalance) Unable to isolate the imblance to a particular wiki. Similar imblance trends seen. sessions_byaction <- sessions %>% group_by(bucket, action) %>% summarise(users = n_distinct(user_id), attempts = n_distinct(edit_attempt_id)) %>% arrange(action) sessions_byaction