This learner provides covariate screening procedures by running a test of
correlation (Pearson default) with the cor.test
function, and then selecting the (1) top ranked variables (default), or (2)
the variables with a pvalue lower than some pre-specified threshold.
R6Class object.
Learner object with methods for training and prediction. See
Lrnr_base for documentation on learners.
method = 'pearson'Correlation coefficient used for test.
type = c('rank', 'threshold')Screen covariates by (1) rank
(default), which chooses the top num_screen correlated covariates;
or (2) threshold, which chooses covariates with a correlation- test- based
pvalue lower the threshold and a minimum of min_screen covariates.
num_screen = 5Number of covariates to select.
pvalue_threshold = 0.1Maximum p-value threshold. Covariates
with a pvalue lower than this threshold will be retained, and at least
min_screen most significant covariates will be selected.
min_screen = 2Minimum number of covariates to select. Used in pvalue_threshold screening procedure.
Other Learners:
Custom_chain,
Lrnr_HarmonicReg,
Lrnr_arima,
Lrnr_bartMachine,
Lrnr_base,
Lrnr_bayesglm,
Lrnr_bilstm,
Lrnr_caret,
Lrnr_cv_selector,
Lrnr_cv,
Lrnr_dbarts,
Lrnr_define_interactions,
Lrnr_density_discretize,
Lrnr_density_hse,
Lrnr_density_semiparametric,
Lrnr_earth,
Lrnr_expSmooth,
Lrnr_gam,
Lrnr_ga,
Lrnr_gbm,
Lrnr_glm_fast,
Lrnr_glm_semiparametric,
Lrnr_glmnet,
Lrnr_glmtree,
Lrnr_glm,
Lrnr_grfcate,
Lrnr_grf,
Lrnr_gru_keras,
Lrnr_gts,
Lrnr_h2o_grid,
Lrnr_hal9001,
Lrnr_haldensify,
Lrnr_hts,
Lrnr_independent_binomial,
Lrnr_lightgbm,
Lrnr_lstm_keras,
Lrnr_mean,
Lrnr_multiple_ts,
Lrnr_multivariate,
Lrnr_nnet,
Lrnr_nnls,
Lrnr_optim,
Lrnr_pca,
Lrnr_pkg_SuperLearner,
Lrnr_polspline,
Lrnr_pooled_hazards,
Lrnr_randomForest,
Lrnr_ranger,
Lrnr_revere_task,
Lrnr_rpart,
Lrnr_rugarch,
Lrnr_screener_augment,
Lrnr_screener_coefs,
Lrnr_screener_importance,
Lrnr_sl,
Lrnr_solnp_density,
Lrnr_solnp,
Lrnr_stratified,
Lrnr_subset_covariates,
Lrnr_svm,
Lrnr_tsDyn,
Lrnr_ts_weights,
Lrnr_xgboost,
Pipeline,
Stack,
define_h2o_X(),
undocumented_learner
library(data.table)
# load example data
data(cpp_imputed)
setDT(cpp_imputed)
cpp_imputed[, parity_cat := factor(ifelse(parity < 4, parity, 4))]
#> subjid agedays wtkg htcm lencm bmi waz haz whz
#> 1: 1 1 4.621 55 55 15.27603 2.3800000 2.6100000 0.19
#> 2: 1 366 14.500 79 79 23.23346 3.8400000 1.3500000 4.02
#> 3: 2 1 3.345 51 51 12.86044 0.0600000 0.5000000 -0.64
#> 4: 2 366 8.400 73 73 15.76281 -1.2700000 -1.1700000 -0.96
#> 5: 2 2558 19.100 114 0 14.69683 -1.3727316 -1.4664795 0.00
#> ---
#> 1437: 500 1 3.629 52 52 13.42086 0.8900000 1.4400000 -0.49
#> 1438: 500 366 10.900 77 77 18.38421 1.5700000 1.1500000 1.47
#> 1439: 500 2558 26.300 126 0 16.56589 0.9932827 0.9455342 0.00
#> 1440: 501 1 3.232 46 46 15.27410 -0.1800000 -2.1400000 2.27
#> 1441: 501 366 9.700 77 77 16.36026 0.0400000 0.5100000 -0.24
#> baz siteid sexn sex feedingn feeding gagebrth birthwt birthlen
#> 1: 1.35 5 1 Male 90 Unknown 287 4621 55
#> 2: 3.89 5 1 Male 90 Unknown 287 4621 55
#> 3: -0.43 5 1 Male 90 Unknown 280 3345 51
#> 4: -0.80 5 1 Male 90 Unknown 280 3345 51
#> 5: 0.00 5 1 Male 90 Unknown 280 3345 51
#> ---
#> 1437: 0.08 5 2 Female 90 Unknown 287 3629 52
#> 1438: 1.30 5 2 Female 90 Unknown 287 3629 52
#> 1439: 0.00 5 2 Female 90 Unknown 287 3629 52
#> 1440: 1.35 5 1 Male 90 Unknown 287 3232 46
#> 1441: -0.33 5 1 Male 90 Unknown 287 3232 46
#> apgar1 apgar5 mage mracen mrace mmaritn mmarit meducyrs sesn
#> 1: 8 9 21 5 White 1 Married 12 50
#> 2: 8 9 21 5 White 1 Married 12 50
#> 3: 8 9 15 5 White 1 Married 0 0
#> 4: 8 9 15 5 White 1 Married 0 0
#> 5: 8 9 15 5 White 1 Married 0 0
#> ---
#> 1437: 6 9 20 5 White 1 Married 11 38
#> 1438: 6 9 20 5 White 1 Married 11 38
#> 1439: 6 9 20 5 White 1 Married 11 38
#> 1440: 5 9 19 5 White 1 Married 9 50
#> 1441: 5 9 19 5 White 1 Married 9 50
#> ses parity gravida smoked mcignum comprisk parity_cat
#> 1: Middle 1 1 0 0 none 1
#> 2: Middle 1 1 0 0 none 1
#> 3: . 0 0 1 35 none 0
#> 4: . 0 0 1 35 none 0
#> 5: . 0 0 1 35 none 0
#> ---
#> 1437: Lower-middle 0 0 1 10 none 0
#> 1438: Lower-middle 0 0 1 10 none 0
#> 1439: Lower-middle 0 0 1 10 none 0
#> 1440: Middle 1 1 0 0 none 1
#> 1441: Middle 1 1 0 0 none 1
covars <- c(
"apgar1", "apgar5", "parity_cat", "gagebrth", "mage", "meducyrs",
"sexn"
)
outcome <- "haz"
# create sl3 task
task <- sl3_Task$new(data.table::copy(cpp_imputed),
covariates = covars,
outcome = outcome
)
lrnr_glmnet <- make_learner(Lrnr_glmnet)
lrnr_glm <- make_learner(Lrnr_glm)
lrnr_mean <- make_learner(Lrnr_mean)
lrnrs <- make_learner(Stack, lrnr_glm, lrnr_mean)
screen_corP <- make_learner(Lrnr_screener_correlation, type = "threshold")
corP_pipeline <- make_learner(Pipeline, screen_corP, lrnrs)
fit_corP <- corP_pipeline$train(task)
preds_corP_screener <- fit_corP$predict()