install.packages('grf')
The downloaded binary packages are in /var/folders/8b/hhnbt0nd4zsg2qhxc28q23w80000gn/T//RtmpydKe1A/downloaded_packages
library('grf')
Warning message: “package ‘grf’ was built under R version 3.6.2”
if(packageVersion("grf") < '0.10.2') {
warning("This script requires grf 0.10.2 or higher")
}
library(sandwich)
library(lmtest)
library(Hmisc)
library(ggplot2)
set.seed(1)
rm(list = ls())
data.all = read.csv("./data/synthetic_data.csv")
data.all$schoolid = factor(data.all$schoolid)
DF = data.all[,-1]
school.id = as.numeric(data.all$schoolid)
school.mat = model.matrix(~ schoolid + 0, data = data.all)
school.size = colSums(school.mat)
# It appears that school ID does not affect pscore. So ignore it
# in modeling, and just treat it as source of per-cluster error.
w.lm = glm(Z ~ ., data = data.all[,-3], family = binomial)
summary(w.lm)
Call: glm(formula = Z ~ ., family = binomial, data = data.all[, -3]) Deviance Residuals: Min 1Q Median 3Q Max -1.2079 -0.9088 -0.8297 1.4176 1.9556 Coefficients: (6 not defined because of singularities) Estimate Std. Error z value Pr(>|z|) (Intercept) -0.9524636 0.2845173 -3.348 0.000815 *** schoolid2 0.0697302 0.2766287 0.252 0.800986 schoolid3 0.0382080 0.2911323 0.131 0.895586 schoolid4 0.1761334 0.2784711 0.633 0.527059 schoolid5 -0.0033389 0.2950180 -0.011 0.990970 schoolid6 0.0583548 0.3067481 0.190 0.849124 schoolid7 -0.1313759 0.3188190 -0.412 0.680288 schoolid8 0.1233661 0.3023736 0.408 0.683279 schoolid9 -0.1955428 0.3073344 -0.636 0.524611 schoolid10 -0.1892794 0.2968750 -0.638 0.523752 schoolid11 -0.2224060 0.5461005 -0.407 0.683816 schoolid12 -0.3312420 0.5414374 -0.612 0.540682 schoolid13 -0.0408540 0.3989507 -0.102 0.918436 schoolid14 -0.8681934 0.6033674 -1.439 0.150175 schoolid15 -0.1059135 0.3263162 -0.325 0.745504 schoolid16 -0.1063268 0.2885387 -0.369 0.712500 schoolid17 0.0854323 0.3119435 0.274 0.784184 schoolid18 -0.1924441 0.2997822 -0.642 0.520908 schoolid19 -0.0265326 0.3229712 -0.082 0.934526 schoolid20 -0.2179554 0.3041336 -0.717 0.473594 schoolid21 -0.2147440 0.2982822 -0.720 0.471565 schoolid22 -0.5115966 0.4410779 -1.160 0.246098 schoolid23 0.0039231 0.3475373 0.011 0.990994 schoolid24 -0.0848314 0.3259572 -0.260 0.794668 schoolid25 0.0521087 0.2754586 0.189 0.849959 schoolid26 0.0241212 0.2876511 0.084 0.933171 schoolid27 -0.2300630 0.3104796 -0.741 0.458698 schoolid28 -0.3519010 0.2924774 -1.203 0.228909 schoolid29 -0.2198764 0.3293288 -0.668 0.504357 schoolid30 -0.3146292 0.3257994 -0.966 0.334187 schoolid31 0.1398555 0.6137901 0.228 0.819759 schoolid32 0.1555524 0.3916156 0.397 0.691215 schoolid33 -0.0991693 0.3939370 -0.252 0.801243 schoolid34 -0.0073688 0.2980808 -0.025 0.980278 schoolid35 -0.3528987 0.3997273 -0.883 0.377318 schoolid36 -0.3751465 0.3988972 -0.940 0.346982 schoolid37 -0.0343169 0.3219646 -0.107 0.915117 schoolid38 -0.1346432 0.3851869 -0.350 0.726674 schoolid39 -0.4339936 0.3612869 -1.201 0.229657 schoolid40 -0.3993958 0.3834495 -1.042 0.297604 schoolid41 -0.1490784 0.3542105 -0.421 0.673846 schoolid42 -0.1545715 0.3551857 -0.435 0.663428 schoolid43 -0.5679567 0.4277455 -1.328 0.184247 schoolid44 -0.1425896 0.3774795 -0.378 0.705623 schoolid45 -0.1337888 0.3232493 -0.414 0.678957 schoolid46 -0.2573249 0.3129119 -0.822 0.410874 schoolid47 0.0027726 0.2770108 0.010 0.992014 schoolid48 -0.3406079 0.3470361 -0.981 0.326358 schoolid49 -0.3236117 0.3434541 -0.942 0.346077 schoolid50 -0.1185119 0.4086074 -0.290 0.771787 schoolid51 0.4087898 0.4506822 0.907 0.364382 schoolid52 -0.3144014 0.4118342 -0.763 0.445214 schoolid53 -0.2733677 0.4511280 -0.606 0.544538 schoolid54 -0.0889588 0.3872532 -0.230 0.818311 schoolid55 -0.1558106 0.4155020 -0.375 0.707665 schoolid56 0.1050353 0.3149235 0.334 0.738737 schoolid57 -0.0314901 0.2901719 -0.109 0.913581 schoolid58 -0.0383183 0.2730077 -0.140 0.888379 schoolid59 -0.0529637 0.2934895 -0.180 0.856790 schoolid60 -0.1624792 0.3972885 -0.409 0.682561 schoolid61 -0.0289549 0.3201953 -0.090 0.927946 schoolid62 0.0993158 0.2669678 0.372 0.709882 schoolid63 0.1684702 0.3282167 0.513 0.607749 schoolid64 -0.0693060 0.2770896 -0.250 0.802493 schoolid65 -0.0004197 0.4072922 -0.001 0.999178 schoolid66 -0.2130911 0.2984091 -0.714 0.475171 schoolid67 0.0358440 0.2921158 0.123 0.902341 schoolid68 -0.0871303 0.3290814 -0.265 0.791188 schoolid69 -0.2550387 0.2908992 -0.877 0.380636 schoolid70 -0.0268947 0.4032160 -0.067 0.946820 schoolid71 0.0037464 0.4268290 0.009 0.992997 schoolid72 -0.1304085 0.2881512 -0.453 0.650859 schoolid73 -0.2160697 0.2840030 -0.761 0.446776 schoolid74 -0.0935320 0.2842612 -0.329 0.742129 schoolid75 -0.1056241 0.3024204 -0.349 0.726892 schoolid76 -0.1052261 0.2939262 -0.358 0.720342 S3 0.1036077 0.0197345 5.250 1.52e-07 *** C1 -0.0015919 0.0053900 -0.295 0.767728 C2 -0.1038596 0.0424020 -2.449 0.014309 * C3 -0.1319218 0.0461833 -2.856 0.004284 ** XC NA NA NA NA X1 NA NA NA NA X2 NA NA NA NA X3 NA NA NA NA X4 NA NA NA NA X5 NA NA NA NA --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 13115 on 10390 degrees of freedom Residual deviance: 13009 on 10311 degrees of freedom AIC: 13169 Number of Fisher Scoring iterations: 4
W = DF$Z
Y = DF$Y
X.raw = DF[,-(1:2)]
C1.exp = model.matrix(~ factor(X.raw$C1) + 0)
XC.exp = model.matrix(~ factor(X.raw$XC) + 0)
X = cbind(X.raw[,-which(names(X.raw) %in% c("C1", "XC"))], C1.exp, XC.exp)
#
# Grow a forest. Add extra trees for the causal forest.
#
Y.forest = regression_forest(X, Y, clusters = school.id, equalize.cluster.weights = TRUE)
Y.hat = predict(Y.forest)$predictions
W.forest = regression_forest(X, W, clusters = school.id, equalize.cluster.weights = TRUE)
W.hat = predict(W.forest)$predictions
cf.raw = causal_forest(X, Y, W,
Y.hat = Y.hat, W.hat = W.hat,
clusters = school.id,
equalize.cluster.weights = TRUE)
varimp = variable_importance(cf.raw)
selected.idx = which(varimp > mean(varimp))
cf = causal_forest(X[,selected.idx], Y, W,
Y.hat = Y.hat, W.hat = W.hat,
clusters = school.id,
equalize.cluster.weights = TRUE,
tune.parameters = "all")
tau.hat = predict(cf)$predictions
#
# Estimate ATE
#
ATE = average_treatment_effect(cf)
paste("95% CI for the ATE:", round(ATE[1], 3),
"+/-", round(qnorm(0.975) * ATE[2], 3))
#
# Omnibus tests for heterogeneity
#
# Run best linear predictor analysis
test_calibration(cf)
# Compare regions with high and low estimated CATEs
high_effect = tau.hat > median(tau.hat)
ate.high = average_treatment_effect(cf, subset = high_effect)
ate.low = average_treatment_effect(cf, subset = !high_effect)
paste("95% CI for difference in ATE:",
round(ate.high[1] - ate.low[1], 3), "+/-",
round(qnorm(0.975) * sqrt(ate.high[2]^2 + ate.low[2]^2), 3))
Best linear fit using forest predictions (on held-out data) as well as the mean forest prediction as regressors, along with one-sided heteroskedasticity-robust (HC3) SEs: Estimate Std. Error t value Pr(>t) mean.forest.prediction 1.008054 0.082129 12.2741 <2e-16 *** differential.forest.prediction -0.552783 1.063927 -0.5196 0.6983 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# formal test for X1 and X2
#
dr.score = tau.hat + W / cf$W.hat *
(Y - cf$Y.hat - (1 - cf$W.hat) * tau.hat) -
(1 - W) / (1 - cf$W.hat) * (Y - cf$Y.hat + cf$W.hat * tau.hat)
school.score = t(school.mat) %*% dr.score / school.size
school.X1 = t(school.mat) %*% X$X1 / school.size
high.X1 = school.X1 > median(school.X1)
t.test(school.score[high.X1], school.score[!high.X1])
school.X2 = (t(school.mat) %*% X$X2) / school.size
high.X2 = school.X2 > median(school.X2)
t.test(school.score[high.X2], school.score[!high.X2])
school.X2.levels = cut(school.X2,
breaks = c(-Inf, quantile(school.X2, c(1/3, 2/3)), Inf))
summary(aov(school.score ~ school.X2.levels))
Welch Two Sample t-test data: school.score[high.X1] and school.score[!high.X1] t = -3.0347, df = 71.45, p-value = 0.003357 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: -0.19218352 -0.03978437 sample estimates: mean of x mean of y 0.1908525 0.3068365
Welch Two Sample t-test data: school.score[high.X2] and school.score[!high.X2] t = 0.9637, df = 72.286, p-value = 0.3384 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: -0.04146936 0.11909811 sample estimates: mean of x mean of y 0.2682517 0.2294373
Df Sum Sq Mean Sq F value Pr(>F) school.X2.levels 2 0.0811 0.04054 1.328 0.271 Residuals 73 2.2283 0.03052
#
# formal test for S3
#
school.score.XS3.high = t(school.mat) %*% (dr.score * (X$S3 >= 6)) /
t(school.mat) %*% (X$S3 >= 6)
school.score.XS3.low = t(school.mat) %*% (dr.score * (X$S3 < 6)) /
t(school.mat) %*% (X$S3 < 6)
plot(school.score.XS3.low, school.score.XS3.high)
t.test(school.score.XS3.high - school.score.XS3.low)
One Sample t-test data: school.score.XS3.high - school.score.XS3.low t = 2.2397, df = 75, p-value = 0.02807 alternative hypothesis: true mean is not equal to 0 95 percent confidence interval: 0.009408619 0.160803922 sample estimates: mean of x 0.08510627
#
# Look at school-wise heterogeneity
#
#pdf("school_hist.pdf")
pardef = par(mar = c(5, 4, 4, 2) + 0.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
hist(school.score, xlab = "School Treatment Effect Estimate", main = "")
#dev.off()
#
# Re-check ATE... sanity check only
#
ate.hat = mean(school.score)
se.hat = sqrt(var(school.score) / length(school.score - 1))
print(paste(round(ate.hat, 3), "+/-", round(1.96 * se.hat, 3)))
[1] "0.249 +/- 0.039"
#
# Look at variation in propensity scores
#
DF = X
DF$W.hat = cf$W.hat
#pdf("pscore.pdf")
pardef = par(mar = c(5, 4, 4, 2) + 0.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
boxplot(W.hat ~ S3, data = DF, ylab = "Propensity Score", xlab = "Student Expectation of Success")
lines(smooth.spline(X$S3, cf$W.hat), lwd = 2, col = 4)
#dev.off()
#
# Analysis ignoring clusters
#
cf.noclust = causal_forest(X[,selected.idx], Y, W,
Y.hat = Y.hat, W.hat = W.hat,
tune.parameters = "all")
ATE.noclust = average_treatment_effect(cf.noclust)
paste("95% CI for the ATE:", round(ATE.noclust[1], 3),
"+/-", round(qnorm(0.975) * ATE.noclust[2], 3))
test_calibration(cf.noclust)
tau.hat.noclust = predict(cf.noclust)$predict
plot(school.id, tau.hat.noclust)
nfold = 5
school.levels = unique(school.id)
cluster.folds = sample.int(nfold, length(school.levels), replace = TRUE)
tau.hat.crossfold = rep(NA, length(Y))
for (foldid in 1:nfold) {
print(foldid)
infold = school.id %in% school.levels[cluster.folds == foldid]
cf.fold = causal_forest(X[!infold, selected.idx], Y[!infold], W[!infold],
Y.hat = Y.hat[!infold], W.hat = W.hat[!infold],
tune.parameters = "all")
pred.fold = predict(cf.fold, X[infold, selected.idx])$predictions
tau.hat.crossfold[infold] = pred.fold
}
cf.noclust.cpy = cf.noclust
cf.noclust.cpy$predictions = tau.hat.crossfold
cf.noclust.cpy$clusters = school.id
test_calibration(cf.noclust.cpy)
Rloss = mean(((Y - Y.hat) - tau.hat * (W - W.hat))^2)
Rloss.noclust = mean(((Y - Y.hat) - tau.hat.noclust * (W - W.hat))^2)
Rloss.crossfold = mean(((Y - Y.hat) - tau.hat.crossfold * (W - W.hat))^2)
c(Rloss.noclust - Rloss, Rloss.crossfold - Rloss)
summary(aov(dr.score ~ factor(school.id)))
Best linear fit using forest predictions (on held-out data) as well as the mean forest prediction as regressors, along with one-sided heteroskedasticity-robust (HC3) SEs: Estimate Std. Error t value Pr(>t) mean.forest.prediction 1.012757 0.045084 22.4639 < 2.2e-16 *** differential.forest.prediction 0.528275 0.133245 3.9647 3.699e-05 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
[1] 1 [1] 2 [1] 3 [1] 4 [1] 5
Best linear fit using forest predictions (on held-out data) as well as the mean forest prediction as regressors, along with one-sided heteroskedasticity-robust (HC3) SEs: Estimate Std. Error t value Pr(>t) mean.forest.prediction 0.988134 0.064817 15.2450 <2e-16 *** differential.forest.prediction 0.224634 0.213643 1.0514 0.1465 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Df Sum Sq Mean Sq F value Pr(>F) factor(school.id) 75 201 2.677 1.98 1.05e-06 *** Residuals 10315 13944 1.352 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Analaysis without fitting the propensity score
#
cf.noprop = causal_forest(X[,selected.idx], Y, W,
Y.hat = Y.hat, W.hat = mean(W),
tune.parameters = "all",
equalize.cluster.weights = TRUE,
clusters = school.id)
tau.hat.noprop = predict(cf.noprop)$predictions
ATE.noprop = average_treatment_effect(cf.noprop)
paste("95% CI for the ATE:", round(ATE.noprop[1], 3),
"+/-", round(qnorm(0.975) * ATE.noprop[2], 3))
#pdf("tauhat_noprop.pdf")
pardef = par(mar = c(5, 4, 4, 2) + 0.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
plot(tau.hat, tau.hat.noprop,
xlim = range(tau.hat, tau.hat.noprop),
ylim = range(tau.hat, tau.hat.noprop),
xlab = "orthogonalized causal forest estimates",
ylab = "non-orthogonalized causal forest")
abline(0, 1, lwd = 2, lty = 2, col = 4)
par = pardef
#dev.off()
#
# Train forest on school-wise DR scores
#
school.X = (t(school.mat) %*% as.matrix(X[,c(4:8, 25:28)])) / school.size
school.X = data.frame(school.X)
colnames(school.X) = c("X1", "X2", "X3", "X4", "X5",
"XC.1", "XC.2", "XC.3", "XC.4")
dr.score = tau.hat + W / cf$W.hat * (Y - cf$Y.hat - (1 - cf$W.hat) * tau.hat) -
(1 - W) / (1 - cf$W.hat) * (Y - cf$Y.hat + cf$W.hat * tau.hat)
school.score = t(school.mat) %*% dr.score / school.size
school.forest = regression_forest(school.X, school.score)
school.pred = predict(school.forest)$predictions
test_calibration(school.forest)
# Alternative OLS analysis
school.DF = data.frame(school.X, school.score=school.score)
coeftest(lm(school.score ~ ., data = school.DF), vcov = vcovHC)
Best linear fit using forest predictions (on held-out data) as well as the mean forest prediction as regressors, along with one-sided heteroskedasticity-robust (HC3) SEs: Estimate Std. Error t value Pr(>t) mean.forest.prediction 1.00004 0.08179 12.2269 <2e-16 *** differential.forest.prediction 0.71717 0.67983 1.0549 0.1474 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
t test of coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.2414116 0.0770612 3.1327 0.002583 ** X1 -0.0506340 0.0291568 -1.7366 0.087121 . X2 0.0125656 0.0336563 0.3734 0.710084 X3 0.0102119 0.0266019 0.3839 0.702302 X4 0.0236645 0.0255092 0.9277 0.356950 X5 -0.0357828 0.0268560 -1.3324 0.187312 XC.1 0.0015231 0.0935067 0.0163 0.987054 XC.2 0.0887259 0.1047433 0.8471 0.400012 XC.3 -0.1341513 0.0875327 -1.5326 0.130158 XC.4 0.0424028 0.0816170 0.5195 0.605127 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Make some plots...
#
#pdf("tauhat_hist.pdf")
pardef = par(mar = c(5, 4, 4, 2) + 0.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
hist(tau.hat, xlab = "estimated CATE", main = "")
#dev.off()
#pdf("tauhat_hist_noprop.pdf")
pardef = par(mar = c(5, 4, 4, 2) + 0.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
hist(tau.hat.noprop, xlab = "estimated CATE", main = "")
#dev.off()
#pdf("tauhat_hist_noclust.pdf")
pardef = par(mar = c(5, 4, 4, 2) + 0.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
hist(tau.hat.noclust, xlab = "estimated CATE", main = "",
breaks = seq(-0.0, 0.55, by = 0.55 / 25))
#dev.off()
#pdf("tauhat_vs_X1.pdf")
pardef = par(mar = c(5, 4, 4, 2) + 0.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
boxplot(tau.hat ~ round(X$X1), xlab = "X1", ylab = "estimated CATE")
lines(smooth.spline(4 + X[,"X1"], tau.hat, df = 4), lwd = 2, col = 4)
#dev.off()
#pdf("tauhat_vs_X2.pdf")
pardef = par(mar = c(5, 4, 4, 2) + 0.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
boxplot(tau.hat ~ round(X$X2), xlab = "X2", ylab = "estimated CATE")
lines(smooth.spline(4 + X[,"X2"], tau.hat, df = 4), lwd = 2, col = 4)
#dev.off()
school.avg.tauhat = t(school.mat) %*% tau.hat / school.size
#pdf("school_avg.pdf")
pardef = par(mar = c(5, 4, 4, 2) + 0.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
plot(school.avg.tauhat, school.pred, cex = 1.5,
xlim = range(school.avg.tauhat, school.pred),
ylim = range(school.avg.tauhat, school.pred),
xlab = "average CATE estimate in school",
ylab = "school-wise forest predictions")
abline(0, 1, lwd = 2, lty = 2, col = 4)
par = pardef
#dev.off()
#
# Experiment with no orthogonalization
#
n.synth = 1000
p.synth = 10
X.synth = matrix(rnorm(n.synth * p.synth), n.synth, p.synth)
W.synth = rbinom(n.synth, 1, 1 / (1 + exp(-X.synth[,1])))
Y.synth = 2 * rowMeans(X.synth[,1:6]) + rnorm(n.synth)
Y.forest.synth = regression_forest(X.synth, Y.synth)
Y.hat.synth = predict(Y.forest.synth)$predictions
W.forest.synth = regression_forest(X.synth, W.synth)
W.hat.synth = predict(W.forest.synth)$predictions
cf.synth = causal_forest(X.synth, Y.synth, W.synth,
Y.hat = Y.hat.synth, W.hat = W.hat.synth)
ATE.synth = average_treatment_effect(cf.synth)
paste("95% CI for the ATE:", round(ATE.synth[1], 3),
"+/-", round(qnorm(0.975) * ATE.synth[2], 3))
cf.synth.noprop = causal_forest(X.synth, Y.synth, W.synth,
Y.hat = Y.hat.synth, W.hat = mean(W.synth))
ATE.synth.noprop = average_treatment_effect(cf.synth.noprop)
paste("95% CI for the ATE:", round(ATE.synth.noprop[1], 3),
"+/-", round(qnorm(0.975) * ATE.synth.noprop[2], 3))
#
# Visualize school-level covariates by treatment heterogeneity
#
school.X.std = scale(school.X)
school.tercile = cut(school.pred,
breaks = c(-Inf, quantile(school.pred, c(1/3, 2/3)), Inf))
school.tercile.mat = model.matrix(~ school.tercile + 0)
school.means = diag(1 / colSums(school.tercile.mat)) %*% t(school.tercile.mat) %*% as.matrix(school.X.std)
MM = max(abs(school.means))
HC = heat.colors(21)
school.col = apply(school.means, 1:2, function(aa) HC[1 + round(20 * (0.5 + aa))])
DF.plot = data.frame(tercile=rep(factor(1:3, labels=c("low", "mid", "high")), 9), mean=as.numeric(school.means),
feature = factor(rbind(colnames(school.X), colnames(school.X), colnames(school.X))))
ggplot(data = DF.plot, aes(x = feature, y = tercile, fill = mean)) +
geom_tile() + scale_fill_gradient(low = "white", high = "steelblue") +
theme(axis.text = element_text(size=12), axis.title = element_text(size=14),
legend.title = element_text(size=14), legend.text = element_text(size=12)) +
theme(panel.background = element_blank())
#ggsave("tercile_plot.pdf", width = 8, height = 4.5, dpi = 120)
mean(school.X$XC.3)
mean(school.X$XC.3[as.numeric(school.tercile) == 1])
#
# CATE by school
#
ord = order(order(school.pred))
school.sort = ord[school.id]
#pdf("school_boxplot.pdf")
pardef = par(mar = c(5, 4, 4, 2) + 0.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
boxplot(tau.hat.noclust ~ school.sort, xaxt = "n",
xlab = "school", ylab = "estimated CATE")
points(1:76, sort(school.pred), col = 4, pch = 16)
legend("topleft", c("school mean CATE", "CATE w/o clustering"), pch = c(16, 1), col = c(4, 1), cex = 1.5)
par = pardef
#dev.off()