options(repr.plot.width = 4, repr.plot.height = 3) ## resizing plots options(scipen = 999) ## has scientific notation ever annoyed you? library(tidyverse) library(ggsci) set.seed(76548208) # structural equation for W_1 # takes as input a vector U_W1 and returns a vector evaluating # f_{W,1}(U_W1) f_W1 <- function(U_W1){ return(U_W1) } # structural equation for W_2 # takes as input a vector U_W2 and returns a vector evaluating # f_{W,2}(U_W2) f_W2 <- function(U_W2){ return(U_W2) } # structural equation for A f_A <- function(W_1, W_2, U_A){ return(as.numeric(plogis(W_1 - W_2 + U_A) > 0.5)) } # structural equation for Y f_Y <- function(W_1, W_2, A, U_Y){ return(-W_1 + W_2 + A - U_Y) } sim_obs_scm <- function(n) { ############################################## # function to draw n observations from an scm # n = the number of observations to draw # returns a data.frame with named columns ############################################## # first we draw the errors ## draw U_{W,1} U_W1 <- rbinom(n,1,0.5) ## draw U_{W,2} U_W2 <- rbinom(n,1,0.5) ## draw U_A U_A <- rnorm(n,0,1) ## draw U_Y U_Y <- rnorm(n,0,1) # now we can evaluate the observations sequentially ## evaluate W_1 W_1 <- f_W1(U_W1) ## evaluate W_2 W_2 <- f_W2(U_W2) ## evaluate A A <- f_A(W_1 = W_1, W_2 = W_2, U_A = U_A) ## evaluate Y Y <- f_Y(W_1 = W_1, W_2 = W_2, A = A, U_Y = U_Y) # return a tibble out <- as_tibble(list(W_1 = W_1, W_2 = W_2, A = A, Y = Y)) return(out) } # try it out sim_test <- sim_obs_scm(n = 100) head(sim_test) sim_int_scm <- function(n, set_A = 1) { ######################################################### # function that draws n observations from an SCM that is # intervened on to set A = set_A # n = number of observations # set_A = the value to set A equal to (0 or 1) # returns a tibble of coutnerfactual observations ######################################################### # first we draw the errors ## draw U_{W,1} U_W1 <- rbinom(n,1,0.5) ## draw U_{W,2} U_W2 <- rbinom(n,1,0.5) ## draw U_A U_A <- rnorm(n,0,1) ## draw U_Y U_Y <- rnorm(n,0,1) # now we can evaluate the observations sequentially ## evaluate W_1 W_1 <- f_W1(U_W1) ## evaluate W_2 W_2 <- f_W2(U_W2) # we are now setting A = 1 for everyone A <- rep(set_A, n) ## evaluate Y with the set values of A Y <- f_Y(W_1 = W_1, W_2 = W_2, A = A, U_Y = U_Y) # return a tibble object out <- as_tibble(list(W_1 = W_1, W_2 = W_2, A = A, Y = Y) ) ## lets rename the Y column to reflect the intervention colnames(out)[4] <- paste0("Y_",set_A) return(out) } # try it out sim_test_a_is_1 <- sim_int_scm(n = 100, set_A = 1) head(sim_test_a_is_1) # and with setA = 0 sim_test_a_is_0 <- sim_int_scm(n = 100, set_A = 0) head(sim_test_a_is_0) # simulate a big data set from he SCM on which we intervened to set A = 0 big_int_0 <- sim_int_scm(n = 1e5, set_A = 0) # plot the distribution of Y_0 p_y0 <- ggplot(big_int_0, aes(x = Y_0)) + geom_histogram(aes(y = ..count..), fill = "blue", binwidth = 0.1, alpha = 0.8) + theme_bw() + xlab("Y_0") + ggtitle("Distribution of Y_0") # simulate a big data set from the SCM on which we intervened to set A = 1 big_int_1 <- sim_int_scm(n = 1e5, set_A = 1) # plot the distribution of Y_1 p_y1 <- ggplot(big_int_1, aes(x = Y_1)) + geom_histogram(aes(y = ..count..), fill = "blue", binwidth = 0.1, alpha = 0.8) + theme_bw() + xlab("Y_1") + ggtitle("Distribution of Y_1") # approximate the (counterfactual) mean of Y_0 cf_mean_y0 <- mean(big_int_0$Y_0) ## let's add it to the histogram p_y0 <- p_y0 + geom_vline(xintercept = cf_mean_y0, colour = "red") p_y0 # approximate the (counterfactual) mean of Y_1 cf_mean_y1 <- mean(big_int_1$Y_1) ## let's add it to the histogram p_y1 <- p_y1 + geom_vline(xintercept = cf_mean_y1, colour = "red") p_y1 cf_mean_y1 - cf_mean_y0 # simulate large data set big_obs <- sim_obs_scm(n = 1e5) # plot a histogram of the conditional distribution of Y given A = 0 py_a_is_0 <- big_obs %>% dplyr::filter(A == 0) %>% ggplot(., aes(x = Y)) + geom_histogram(aes(y = ..count..), fill = "blue", binwidth = 0.1, alpha = 0.8) + theme_bw() + xlab("Y") + ggtitle("Conditional dist. of Y | A = 0") # plot a histogram of the conditional distribution of Y given A = 1 py_a_is_1 <- big_obs %>% dplyr::filter(A == 1) %>% ggplot(., aes(x = Y)) + geom_histogram(aes(y = ..count..), fill = "blue", binwidth = 0.1, alpha = 0.8) + theme_bw() + xlab("Y") + ggtitle("Conditional dist. of Y | A = 1") # approximate the conditional mean of Y | A = 0 cond_mean_Y0 <- big_obs %>% dplyr::filter(A == 0) %>% summarise(mean(Y)) %>% as.numeric() ## let's add it to the histogram py_a_is_0 <- py_a_is_0 + geom_vline(xintercept = cond_mean_Y0, colour = "red") py_a_is_0 cond_mean_Y0 # approximate the conditional mean of Y | A = 0 cond_mean_Y1 <- big_obs %>% dplyr::filter(A == 1) %>% summarise(mean(Y)) %>% as.numeric() ## let's add it to the histogram py_a_is_1 <- py_a_is_1 + geom_vline(xintercept = cond_mean_Y1, colour = "red") py_a_is_1 cond_mean_Y1 cond_mean_Y1 - cond_mean_Y0 # write a loop to get mean of Y | A = 1, W = w minus # mean of Y | A = 0, W = w in each strata of W out <- NULL for (w1 in c(0, 1)) { for (w2 in c(0, 1)) { ate_this_W <- with(big_obs, mean(Y[A == 1 & W_1 == w1 & W_2 == w2]) - mean(Y[A == 0 & W_1 == w1 & W_2 == w2]) ) p_this_W <- with(big_obs, sum(W_1 == w1 & W_2 == w2) / nrow(big_obs) ) out <- rbind(out, c(ate_this_W, p_this_W)) } } out <- as_tibble(out) colnames(out) <- c("ate_w","p_w") # let's look at the matrix we just made out sum(out$ate_w * out$p_w) # this uses the same functions defined in section I above # for f_W1, f_W2, f_A, but we need to re-define f_Y f_Y <- function(W_1, W_2, A, U_Y) { W_1 + W_2 - W_1 * W_2 * A + 3 * W_2 * A - A + U_Y } sim_rule_int_scm <- function(n, strata_tx) { ################################################################ # define a function that takes as input # n = the number of observations to simulate # strataTrt = the values of W_s to treat # return a tibble according to the rule-specific intervened SCM ################################################################ # first we draw the errors ## draw U_{W,1} U_W1 <- rbinom(n, 1, 0.5) ## draw U_{W,2} U_W2 <- rbinom(n, 1, 0.5) ## draw U_A U_A <- rnorm(n, 0, 1) ## draw U_Y U_Y <- rnorm(n, 0, 1) # now we can evaluate the observations sequentially ## evaluate W_1 W_1 <- f_W1(U_W1) ## evaluate W_2 W_2 <- f_W2(U_W2) ## evaluate W_S W_S <- as.numeric(I(W_1 == 0 & W_2 == 0) + 2 * I(W_1 == 0 & W_2 == 1) + 3 * I(W_1 == 1 & W_2 == 0) + 4 * I(W_1 == 1 & W_2 == 1)) # set A according to strataTrt A <- as.numeric(W_S %in% strata_tx) # evaluate Y with the set values of A Y <- f_Y(W_1 = W_1, W_2 = W_2, A = A, U_Y = U_Y) # return a tibble object out <- as_tibble(list(W_1 = W_1, W_2 = W_2, W_S = W_S, A = A, Yd = Y) ) return(out) } # simulate data treating strata 1 rule_only_1 <- sim_rule_int_scm(n = 100, strata_tx = 1) # let's take a look head(rule_only_1) # confirm that anyone with W_S == 1 has A = 1 all(rule_only_1$A[rule_only_1$W_S == 1] == 1) # and that anyone without W_S == 1 has A = 0 all(rule_only_1$A[rule_only_1$W_S != 1] == 0) # first create a list that contains all possible # subsets of strata. We do this using the combn function strata_list <- vector(mode = "list", length = 0) for(i in seq_len(max(rule_only_1$W_S))){ strata_list <- c(strata_list, combn(1:4, i, simplify = FALSE)) } # you can look at the list to see what this command # did (omitted here for space) #strata_list # write a function that computes E(Y_d) get_EYd <- function(strata, n = 1e5) { # simulate data according to rule dat <- sim_rule_int_scm(n = n, strata_tx = strata) # calculate mean of Yd return(mean(dat$Yd)) } # apply the function over strataList all_EYd <- unlist(lapply(strata_list, get_EYd)) # look at all the values for E(Y_d) all_EYd # optimal rule is the smallest value of E(Y_d) opt_index <- which(all_EYd == min(all_EYd)) # what rule is it strata_list[opt_index] sim_rule_obs_scm <- function(n) { # first we draw the errors ## draw U_{W,1} U_W1 <- rbinom(n, 1, 0.5) ## draw U_{W,2} U_W2 <- rbinom(n, 1, 0.5) ## draw U_A U_A <- rnorm(n, 0, 1) ## draw U_Y U_Y <- rnorm(n, 0, 1) # now we can evaluate the observations sequentially ## evaluate W_1 W_1 <- f_W1(U_W1) ## evaluate W_2 W_2 <- f_W2(U_W2) ## evaluate W_S W_S <- as.numeric(I(W_1 == 0 & W_2 == 0) + 2 * I(W_1 == 0 & W_2 == 1) + 3 * I(W_1 == 1 & W_2 == 0) + 4 * I(W_1 == 1 & W_2 == 1)) # evaluate A according to f_A A <- f_A(W_1 = W_1, W_2 = W_2, U_A = U_A) # evaluate Y with the set values of A Y <- f_Y(W_1 = W_1, W_2 = W_2, A = A, U_Y = U_Y) ## return a data.frame object out <- data.frame(W_1 = W_1, W_2 = W_2, W_S = W_S, A = A, Y = Y) return(out) } get_blip <- function(strata, n = 1e6) { # simulate data based on rule dat <- sim_rule_obs_scm(n = n) EY_A1_WS <- dat %>% dplyr::filter(W_S %in% strata & A == 1) %>% summarise(mean(Y)) %>% as.numeric() EY_A0_WS <- dat %>% dplyr::filter(W_S %in% strata & A == 0) %>% summarise(mean(Y)) %>% as.numeric() blip <- EY_A1_WS - EY_A0_WS return(blip) } # now compute the blip for each strata all_blips <- unlist(lapply(split(1:4, 1:4), get_blip)) all_blips # which strata have blip < 0 as.numeric(which(all_blips < 0))