set.seed(49753)

# packages we'll be using
library(data.table)
library(SuperLearner)
library(origami)
library(sl3)

# load example data set
data(cpp_imputed)

# take a peek at the data
head(cpp_imputed)

# here are the covariates we are interested in and, of course, the outcome
covars <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs",
            "sexn")
outcome <- "haz"

# create the sl3 task and take a look at it
task <- make_sl3_Task(data = cpp_imputed, covariates = covars,
                            outcome = outcome, outcome_type = "continuous")

# let's take a look at the sl3 task
task

# make learner object
lrnr_glm <- make_learner(Lrnr_glm)

# fit learner to task data
lrnr_glm_fit <- lrnr_glm$train(task)

# verify that the learner is fit
lrnr_glm_fit$is_trained

# get learner predictions
preds <- lrnr_glm_fit$predict()
head(preds)

screen_cor <- Lrnr_pkg_SuperLearner_screener$new("screen.corP")
screen_fit <- screen_cor$train(task)
print(screen_fit)

sg_pipeline <- make_learner(Pipeline, screen_cor, lrnr_glm)
sg_pipeline_fit <- sg_pipeline$train(task)
sg_pipeline_preds <- sg_pipeline_fit$predict()
head(sg_pipeline_preds)

stack <- make_learner(Stack, lrnr_glm, sg_pipeline)
stack_fit <- stack$train(task)
stack_preds <- stack_fit$predict()
head(stack_preds)

cv_stack <- Lrnr_cv$new(stack)
cv_fit <- cv_stack$train(task)
cv_preds <- cv_fit$predict()

risks <- cv_fit$cv_risk(loss_squared_error)
print(risks)

metalearner <- make_learner(Lrnr_nnls)
cv_task <- cv_fit$chain()
ml_fit <- metalearner$train(cv_task)

sl_pipeline <- make_learner(Pipeline, stack_fit, ml_fit)
sl_preds <- sl_pipeline$predict()
head(sl_preds)

sl <- Lrnr_sl$new(learners = stack,
                  metalearner = metalearner)
sl_fit <- sl$train(task)
lrnr_sl_preds <- sl_fit$predict()
head(lrnr_sl_preds)

# let's split the data into training and validation sets
train_cpp_imputed <- as.data.table(cpp_imputed[sample(nrow(cpp_imputed), 0.75 * nrow(cpp_imputed)), ])
valid_cpp_imputed <- as.data.table(cpp_imputed[!(seq_len(nrow(cpp_imputed)) %in% rownames(train_cpp_imputed)), ])

# create the sl3 task and take a look at it
task_train <- make_sl3_Task(data = train_cpp_imputed, covariates = covars,
                            outcome = outcome, outcome_type = "continuous")
task_train

# we'll also create an sl3 task for the holdout set
task_valid <- make_sl3_Task(data = valid_cpp_imputed, covariates = covars,
                            outcome = outcome, outcome_type = "continuous")