This learner provides covariate screening procedures by running a test of
correlation (Pearson default) with the cor.test
function, and then selecting the (1) top ranked variables (default), or (2)
the variables with a pvalue lower than some pre-specified threshold.
R6Class
object.
Learner object with methods for training and prediction. See
Lrnr_base
for documentation on learners.
method = 'pearson'
Correlation coefficient used for test.
type = c('rank', 'threshold')
Screen covariates by (1) rank
(default), which chooses the top num_screen
correlated covariates;
or (2) threshold, which chooses covariates with a correlation- test- based
pvalue lower the threshold and a minimum of min_screen
covariates.
num_screen = 5
Number of covariates to select.
pvalue_threshold = 0.1
Maximum p-value threshold. Covariates
with a pvalue lower than this threshold will be retained, and at least
min_screen
most significant covariates will be selected.
min_screen = 2
Minimum number of covariates to select. Used in pvalue_threshold screening procedure.
Other Learners:
Custom_chain
,
Lrnr_HarmonicReg
,
Lrnr_arima
,
Lrnr_bartMachine
,
Lrnr_base
,
Lrnr_bayesglm
,
Lrnr_bilstm
,
Lrnr_caret
,
Lrnr_cv_selector
,
Lrnr_cv
,
Lrnr_dbarts
,
Lrnr_define_interactions
,
Lrnr_density_discretize
,
Lrnr_density_hse
,
Lrnr_density_semiparametric
,
Lrnr_earth
,
Lrnr_expSmooth
,
Lrnr_gam
,
Lrnr_ga
,
Lrnr_gbm
,
Lrnr_glm_fast
,
Lrnr_glm_semiparametric
,
Lrnr_glmnet
,
Lrnr_glmtree
,
Lrnr_glm
,
Lrnr_grfcate
,
Lrnr_grf
,
Lrnr_gru_keras
,
Lrnr_gts
,
Lrnr_h2o_grid
,
Lrnr_hal9001
,
Lrnr_haldensify
,
Lrnr_hts
,
Lrnr_independent_binomial
,
Lrnr_lightgbm
,
Lrnr_lstm_keras
,
Lrnr_mean
,
Lrnr_multiple_ts
,
Lrnr_multivariate
,
Lrnr_nnet
,
Lrnr_nnls
,
Lrnr_optim
,
Lrnr_pca
,
Lrnr_pkg_SuperLearner
,
Lrnr_polspline
,
Lrnr_pooled_hazards
,
Lrnr_randomForest
,
Lrnr_ranger
,
Lrnr_revere_task
,
Lrnr_rpart
,
Lrnr_rugarch
,
Lrnr_screener_augment
,
Lrnr_screener_coefs
,
Lrnr_screener_importance
,
Lrnr_sl
,
Lrnr_solnp_density
,
Lrnr_solnp
,
Lrnr_stratified
,
Lrnr_subset_covariates
,
Lrnr_svm
,
Lrnr_tsDyn
,
Lrnr_ts_weights
,
Lrnr_xgboost
,
Pipeline
,
Stack
,
define_h2o_X()
,
undocumented_learner
library(data.table)
# load example data
data(cpp_imputed)
setDT(cpp_imputed)
cpp_imputed[, parity_cat := factor(ifelse(parity < 4, parity, 4))]
#> subjid agedays wtkg htcm lencm bmi waz haz whz
#> 1: 1 1 4.621 55 55 15.27603 2.3800000 2.6100000 0.19
#> 2: 1 366 14.500 79 79 23.23346 3.8400000 1.3500000 4.02
#> 3: 2 1 3.345 51 51 12.86044 0.0600000 0.5000000 -0.64
#> 4: 2 366 8.400 73 73 15.76281 -1.2700000 -1.1700000 -0.96
#> 5: 2 2558 19.100 114 0 14.69683 -1.3727316 -1.4664795 0.00
#> ---
#> 1437: 500 1 3.629 52 52 13.42086 0.8900000 1.4400000 -0.49
#> 1438: 500 366 10.900 77 77 18.38421 1.5700000 1.1500000 1.47
#> 1439: 500 2558 26.300 126 0 16.56589 0.9932827 0.9455342 0.00
#> 1440: 501 1 3.232 46 46 15.27410 -0.1800000 -2.1400000 2.27
#> 1441: 501 366 9.700 77 77 16.36026 0.0400000 0.5100000 -0.24
#> baz siteid sexn sex feedingn feeding gagebrth birthwt birthlen
#> 1: 1.35 5 1 Male 90 Unknown 287 4621 55
#> 2: 3.89 5 1 Male 90 Unknown 287 4621 55
#> 3: -0.43 5 1 Male 90 Unknown 280 3345 51
#> 4: -0.80 5 1 Male 90 Unknown 280 3345 51
#> 5: 0.00 5 1 Male 90 Unknown 280 3345 51
#> ---
#> 1437: 0.08 5 2 Female 90 Unknown 287 3629 52
#> 1438: 1.30 5 2 Female 90 Unknown 287 3629 52
#> 1439: 0.00 5 2 Female 90 Unknown 287 3629 52
#> 1440: 1.35 5 1 Male 90 Unknown 287 3232 46
#> 1441: -0.33 5 1 Male 90 Unknown 287 3232 46
#> apgar1 apgar5 mage mracen mrace mmaritn mmarit meducyrs sesn
#> 1: 8 9 21 5 White 1 Married 12 50
#> 2: 8 9 21 5 White 1 Married 12 50
#> 3: 8 9 15 5 White 1 Married 0 0
#> 4: 8 9 15 5 White 1 Married 0 0
#> 5: 8 9 15 5 White 1 Married 0 0
#> ---
#> 1437: 6 9 20 5 White 1 Married 11 38
#> 1438: 6 9 20 5 White 1 Married 11 38
#> 1439: 6 9 20 5 White 1 Married 11 38
#> 1440: 5 9 19 5 White 1 Married 9 50
#> 1441: 5 9 19 5 White 1 Married 9 50
#> ses parity gravida smoked mcignum comprisk parity_cat
#> 1: Middle 1 1 0 0 none 1
#> 2: Middle 1 1 0 0 none 1
#> 3: . 0 0 1 35 none 0
#> 4: . 0 0 1 35 none 0
#> 5: . 0 0 1 35 none 0
#> ---
#> 1437: Lower-middle 0 0 1 10 none 0
#> 1438: Lower-middle 0 0 1 10 none 0
#> 1439: Lower-middle 0 0 1 10 none 0
#> 1440: Middle 1 1 0 0 none 1
#> 1441: Middle 1 1 0 0 none 1
covars <- c(
"apgar1", "apgar5", "parity_cat", "gagebrth", "mage", "meducyrs",
"sexn"
)
outcome <- "haz"
# create sl3 task
task <- sl3_Task$new(data.table::copy(cpp_imputed),
covariates = covars,
outcome = outcome
)
lrnr_glmnet <- make_learner(Lrnr_glmnet)
lrnr_glm <- make_learner(Lrnr_glm)
lrnr_mean <- make_learner(Lrnr_mean)
lrnrs <- make_learner(Stack, lrnr_glm, lrnr_mean)
screen_corP <- make_learner(Lrnr_screener_correlation, type = "threshold")
corP_pipeline <- make_learner(Pipeline, screen_corP, lrnrs)
fit_corP <- corP_pipeline$train(task)
preds_corP_screener <- fit_corP$predict()