set.seed(49753) # packages we'll be using library(data.table) library(SuperLearner) library(origami) library(sl3) # load example data set data(cpp_imputed) # take a peek at the data head(cpp_imputed) # here are the covariates we are interested in and, of course, the outcome covars <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn") outcome <- "haz" # create the sl3 task and take a look at it task <- make_sl3_Task(data = cpp_imputed, covariates = covars, outcome = outcome, outcome_type = "continuous") # let's take a look at the sl3 task task # make learner object lrnr_glm <- make_learner(Lrnr_glm) # fit learner to task data lrnr_glm_fit <- lrnr_glm$train(task) # verify that the learner is fit lrnr_glm_fit$is_trained # get learner predictions preds <- lrnr_glm_fit$predict() head(preds) screen_cor <- Lrnr_pkg_SuperLearner_screener$new("screen.corP") screen_fit <- screen_cor$train(task) print(screen_fit) sg_pipeline <- make_learner(Pipeline, screen_cor, lrnr_glm) sg_pipeline_fit <- sg_pipeline$train(task) sg_pipeline_preds <- sg_pipeline_fit$predict() head(sg_pipeline_preds) stack <- make_learner(Stack, lrnr_glm, sg_pipeline) stack_fit <- stack$train(task) stack_preds <- stack_fit$predict() head(stack_preds) cv_stack <- Lrnr_cv$new(stack) cv_fit <- cv_stack$train(task) cv_preds <- cv_fit$predict() risks <- cv_fit$cv_risk(loss_squared_error) print(risks) metalearner <- make_learner(Lrnr_nnls) cv_task <- cv_fit$chain() ml_fit <- metalearner$train(cv_task) sl_pipeline <- make_learner(Pipeline, stack_fit, ml_fit) sl_preds <- sl_pipeline$predict() head(sl_preds) sl <- Lrnr_sl$new(learners = stack, metalearner = metalearner) sl_fit <- sl$train(task) lrnr_sl_preds <- sl_fit$predict() head(lrnr_sl_preds) # let's split the data into training and validation sets train_cpp_imputed <- as.data.table(cpp_imputed[sample(nrow(cpp_imputed), 0.75 * nrow(cpp_imputed)), ]) valid_cpp_imputed <- as.data.table(cpp_imputed[!(seq_len(nrow(cpp_imputed)) %in% rownames(train_cpp_imputed)), ]) # create the sl3 task and take a look at it task_train <- make_sl3_Task(data = train_cpp_imputed, covariates = covars, outcome = outcome, outcome_type = "continuous") task_train # we'll also create an sl3 task for the holdout set task_valid <- make_sl3_Task(data = valid_cpp_imputed, covariates = covars, outcome = outcome, outcome_type = "continuous")