This learner provides covariate screening procedures by running a test of correlation (Pearson default) with the cor.test function, and then selecting the (1) top ranked variables (default), or (2) the variables with a pvalue lower than some pre-specified threshold.

Format

R6Class object.

Value

Learner object with methods for training and prediction. See Lrnr_base for documentation on learners.

Parameters

method = 'pearson'

Correlation coefficient used for test.

type = c('rank', 'threshold')

Screen covariates by (1) rank (default), which chooses the top num_screen correlated covariates; or (2) threshold, which chooses covariates with a correlation- test- based pvalue lower the threshold and a minimum of min_screen covariates.

num_screen = 5

Number of covariates to select.

pvalue_threshold = 0.1

Maximum p-value threshold. Covariates with a pvalue lower than this threshold will be retained, and at least min_screen most significant covariates will be selected.

min_screen = 2

Minimum number of covariates to select. Used in pvalue_threshold screening procedure.

Examples

library(data.table)

# load example data
data(cpp_imputed)
setDT(cpp_imputed)
cpp_imputed[, parity_cat := factor(ifelse(parity < 4, parity, 4))]
#>       subjid agedays   wtkg htcm lencm      bmi        waz        haz   whz
#>    1:      1       1  4.621   55    55 15.27603  2.3800000  2.6100000  0.19
#>    2:      1     366 14.500   79    79 23.23346  3.8400000  1.3500000  4.02
#>    3:      2       1  3.345   51    51 12.86044  0.0600000  0.5000000 -0.64
#>    4:      2     366  8.400   73    73 15.76281 -1.2700000 -1.1700000 -0.96
#>    5:      2    2558 19.100  114     0 14.69683 -1.3727316 -1.4664795  0.00
#>   ---                                                                      
#> 1437:    500       1  3.629   52    52 13.42086  0.8900000  1.4400000 -0.49
#> 1438:    500     366 10.900   77    77 18.38421  1.5700000  1.1500000  1.47
#> 1439:    500    2558 26.300  126     0 16.56589  0.9932827  0.9455342  0.00
#> 1440:    501       1  3.232   46    46 15.27410 -0.1800000 -2.1400000  2.27
#> 1441:    501     366  9.700   77    77 16.36026  0.0400000  0.5100000 -0.24
#>         baz siteid sexn    sex feedingn feeding gagebrth birthwt birthlen
#>    1:  1.35      5    1   Male       90 Unknown      287    4621       55
#>    2:  3.89      5    1   Male       90 Unknown      287    4621       55
#>    3: -0.43      5    1   Male       90 Unknown      280    3345       51
#>    4: -0.80      5    1   Male       90 Unknown      280    3345       51
#>    5:  0.00      5    1   Male       90 Unknown      280    3345       51
#>   ---                                                                    
#> 1437:  0.08      5    2 Female       90 Unknown      287    3629       52
#> 1438:  1.30      5    2 Female       90 Unknown      287    3629       52
#> 1439:  0.00      5    2 Female       90 Unknown      287    3629       52
#> 1440:  1.35      5    1   Male       90 Unknown      287    3232       46
#> 1441: -0.33      5    1   Male       90 Unknown      287    3232       46
#>       apgar1 apgar5 mage mracen mrace mmaritn  mmarit meducyrs sesn
#>    1:      8      9   21      5 White       1 Married       12   50
#>    2:      8      9   21      5 White       1 Married       12   50
#>    3:      8      9   15      5 White       1 Married        0    0
#>    4:      8      9   15      5 White       1 Married        0    0
#>    5:      8      9   15      5 White       1 Married        0    0
#>   ---                                                              
#> 1437:      6      9   20      5 White       1 Married       11   38
#> 1438:      6      9   20      5 White       1 Married       11   38
#> 1439:      6      9   20      5 White       1 Married       11   38
#> 1440:      5      9   19      5 White       1 Married        9   50
#> 1441:      5      9   19      5 White       1 Married        9   50
#>                ses parity gravida smoked mcignum comprisk parity_cat
#>    1:       Middle      1       1      0       0     none          1
#>    2:       Middle      1       1      0       0     none          1
#>    3:            .      0       0      1      35     none          0
#>    4:            .      0       0      1      35     none          0
#>    5:            .      0       0      1      35     none          0
#>   ---                                                               
#> 1437: Lower-middle      0       0      1      10     none          0
#> 1438: Lower-middle      0       0      1      10     none          0
#> 1439: Lower-middle      0       0      1      10     none          0
#> 1440:       Middle      1       1      0       0     none          1
#> 1441:       Middle      1       1      0       0     none          1
covars <- c(
  "apgar1", "apgar5", "parity_cat", "gagebrth", "mage", "meducyrs",
  "sexn"
)
outcome <- "haz"

# create sl3 task
task <- sl3_Task$new(data.table::copy(cpp_imputed),
  covariates = covars,
  outcome = outcome
)

lrnr_glmnet <- make_learner(Lrnr_glmnet)
lrnr_glm <- make_learner(Lrnr_glm)
lrnr_mean <- make_learner(Lrnr_mean)
lrnrs <- make_learner(Stack, lrnr_glm, lrnr_mean)

screen_corP <- make_learner(Lrnr_screener_correlation, type = "threshold")
corP_pipeline <- make_learner(Pipeline, screen_corP, lrnrs)
fit_corP <- corP_pipeline$train(task)
preds_corP_screener <- fit_corP$predict()