From 68e4cf6402c9fa4cc54de0bf7ef3887309d10104 Mon Sep 17 00:00:00 2001 From: Cesaire Joris Kuete Fouodo Date: Fri, 19 Jul 2024 11:31:37 +0200 Subject: [PATCH] TrainSrudy: 100% test coverage --- .covrignore | 1 - DESCRIPTION | 4 +- R/TrainStudy.R | 11 +- man/HashTable.Rd | 36 +++--- man/TrainStudy.Rd | 10 +- man/weightedMeanLearner.Rd | 6 +- tests/testthat/test-TrainStudy.R | 204 ++++++++++++++++++++++++++++++- 7 files changed, 238 insertions(+), 34 deletions(-) diff --git a/.covrignore b/.covrignore index ba0a9a3..fb90b09 100644 --- a/.covrignore +++ b/.covrignore @@ -12,5 +12,4 @@ ./R/TrainData.R ./R/TrainLayer.R ./R/TrainMetaLayer.R -./R/TrainStudy.R ./R/VarSel.R diff --git a/DESCRIPTION b/DESCRIPTION index 911e491..5ad0cf8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,9 @@ Imports: digest Suggests: testthat (>= 3.0.0), - UpSetR (>= 1.4.0) + UpSetR (>= 1.4.0), + caret, + ranger Config/testthat/edition: 3 Depends: R (>= 3.6.0) Roxygen: list(markdown = TRUE) diff --git a/R/TrainStudy.R b/R/TrainStudy.R index 084f5e0..f739693 100644 --- a/R/TrainStudy.R +++ b/R/TrainStudy.R @@ -225,8 +225,8 @@ TrainStudy <- R6Class("TrainStudy", resampling_method, resampling_arg) { # Test that the study contains ovelapping individuals - if (!self$test_overlap()) { - stop("This study does not contain overlapping individuals.") + if (!self$testOverlap()) { + stop("This study does not contain overlapping individuals.") #nocov } # 1) Train each layer self$trainLayer(ind_subset = ind_subset, @@ -421,8 +421,11 @@ TrainStudy <- R6Class("TrainStudy", #' #' @export #' - test_overlap = function () { + testOverlap = function () { layers = self$getKeyClass() + if (!nrow(layers)) { + stop ("No layer found in this study.") + } # This code accesses each layer (except TrainMetaLayer) level # and get the individual IDs. layers = layers[layers$class %in% "TrainLayer", ] @@ -436,7 +439,7 @@ TrainStudy <- R6Class("TrainStudy", if (sum(duplicated(ids_data[ , 1L])) > 5L) { return(TRUE) } else { - return(FALSE) + return(FALSE) # nocov } }, #' @description diff --git a/man/HashTable.Rd b/man/HashTable.Rd index 31048aa..ed8de82 100644 --- a/man/HashTable.Rd +++ b/man/HashTable.Rd @@ -10,11 +10,11 @@ Hashtable to contain object entities. Study and layers are extensions of this cl \subsection{Public methods}{ \itemize{ \item \href{#method-HashTable-new}{\code{HashTable$new()}} +\item \href{#method-HashTable-print}{\code{HashTable$print()}} \item \href{#method-HashTable-add2HashTable}{\code{HashTable$add2HashTable()}} \item \href{#method-HashTable-getFromHashTable}{\code{HashTable$getFromHashTable()}} \item \href{#method-HashTable-getKeyClass}{\code{HashTable$getKeyClass()}} \item \href{#method-HashTable-removeFromHashTable}{\code{HashTable$removeFromHashTable()}} -\item \href{#method-HashTable-print}{\code{HashTable$print()}} \item \href{#method-HashTable-getId}{\code{HashTable$getId()}} \item \href{#method-HashTable-getHashTable}{\code{HashTable$getHashTable()}} \item \href{#method-HashTable-checkClassExist}{\code{HashTable$checkClassExist()}} @@ -39,6 +39,23 @@ ID of the hash table. It must be unique.} } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-HashTable-print}{}}} +\subsection{Method \code{print()}}{ +Printer +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{HashTable$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{(any) \cr} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-HashTable-add2HashTable}{}}} \subsection{Method \code{add2HashTable()}}{ @@ -111,23 +128,6 @@ Remove the object with the corresponding key from the hashtable. } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-HashTable-print}{}}} -\subsection{Method \code{print()}}{ -Printer -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{HashTable$print(...)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{...}}{(any) \cr} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-HashTable-getId}{}}} \subsection{Method \code{getId()}}{ diff --git a/man/TrainStudy.Rd b/man/TrainStudy.Rd index d2e9e96..e72456b 100644 --- a/man/TrainStudy.Rd +++ b/man/TrainStudy.Rd @@ -51,7 +51,7 @@ a new study. \item \href{#method-TrainStudy-getIndCol}{\code{TrainStudy$getIndCol()}} \item \href{#method-TrainStudy-getTarget}{\code{TrainStudy$getTarget()}} \item \href{#method-TrainStudy-increaseNbTrainedLayer}{\code{TrainStudy$increaseNbTrainedLayer()}} -\item \href{#method-TrainStudy-test_overlap}{\code{TrainStudy$test_overlap()}} +\item \href{#method-TrainStudy-testOverlap}{\code{TrainStudy$testOverlap()}} \item \href{#method-TrainStudy-upset}{\code{TrainStudy$upset()}} \item \href{#method-TrainStudy-summary}{\code{TrainStudy$summary()}} } @@ -334,13 +334,13 @@ Increase the number of trained layer. } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TrainStudy-test_overlap}{}}} -\subsection{Method \code{test_overlap()}}{ +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TrainStudy-testOverlap}{}}} +\subsection{Method \code{testOverlap()}}{ Test that individuals overlap over layers. At least five individuals must overlapped. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TrainStudy$test_overlap()}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{TrainStudy$testOverlap()}\if{html}{\out{
}} } } diff --git a/man/weightedMeanLearner.Rd b/man/weightedMeanLearner.Rd index cd6fb77..fd2dbcd 100644 --- a/man/weightedMeanLearner.Rd +++ b/man/weightedMeanLearner.Rd @@ -20,9 +20,9 @@ A model object of class \code{weightedMeanLeaner}. Layer weights are estimated for each layer based on Brier score. } \examples{ -set.seed(20240624) -x = data.frame(x1 = rnorm(50)) -y = sample(x = 0:1, size = 50, replace = TRUE) +set.seed(20240624L) +x = data.frame(x1 = rnorm(50L)) +y = sample(x = 0L:1L, size = 50L, replace = TRUE) my_model = weightedMeanLearner(x = x, y = y) } diff --git a/tests/testthat/test-TrainStudy.R b/tests/testthat/test-TrainStudy.R index 8849056..c961015 100644 --- a/tests/testthat/test-TrainStudy.R +++ b/tests/testthat/test-TrainStudy.R @@ -1,3 +1,203 @@ -test_that("multiplication works", { - expect_equal(2 * 2, 4) +data("entities") +train_study <- TrainStudy$new(id = "train_study", + ind_col = "IDS", + target = "disease") +# See also train_study$summary() +print(train_study) +test_that("TrainStudy: until training", { + expect_no_error({ + train_study <- TrainStudy$new(id = "train_study", + ind_col = "IDS", + target = "disease") + print(train_study) + }) + expect_error({ + train_study$train() + }) + expect_error({ + train_study$trainLayer() + }) + expect_error({ + train_study$createMetaTrainData() + }) + expect_error({ + train_study$varSelection() + }) + expect_error({ + train_study$test_overlap() + }) + + expect_error({ + train_study$train(ind_subset = NULL, + use_var_sel = TRUE, + resampling_method = character(0L), + resampling_arg = list()) + train_study$train(ind_subset = NULL, + use_var_sel = TRUE, + resampling_method = character(0L), + resampling_arg = list()) + }) + + # Layers can be added successfully + expect_no_error({ + tl_ge <- TrainLayer$new(id = "geneexpr", train_study = train_study) + tl_pr <- TrainLayer$new(id = "proteinexpr", train_study = train_study) + tl_me <- TrainLayer$new(id = "methylation", train_study = train_study) + # We also prepare the meta layer for the meta analysis. + tl_meta <- TrainMetaLayer$new(id = "meta_layer", train_study = train_study) + }) + + expect_error({ + train_study$train() + }) + expect_error({ + train_study$test_overlap() + }) + + # TrainData can be added successfully + expect_no_error({ + train_data_ge <- TrainData$new(id = "geneexpr", + train_layer = tl_ge, + data_frame = entities$training$geneexpr) + train_data_pr <- TrainData$new(id = "proteinexpr", + train_layer = tl_pr, + data_frame = entities$training$proteinexpr) + train_data_me <- TrainData$new(id = "methylation", + train_layer = tl_me, + data_frame = entities$training$methylation) + }) + + # Upset plot works + expect_no_error({ + train_study$upset(order.by = "freq") + }) + + # Variable selection works + expect_no_error({ + same_param_varsel <- ParamVarSel$new(id = "ParamVarSel", + param_list = list(num.trees = 1000L, + mtry = 3L)) + varsel_ge <- VarSel$new(id = "varsel_geneexpr", + package = "Boruta", + varsel_fct = "Boruta", + param = same_param_varsel, + train_layer = tl_ge) + + varsel_pr <- VarSel$new(id = "varsel_geneexpr", + package = "Boruta", + varsel_fct = "Boruta", + param = same_param_varsel, + train_layer = tl_pr) + + varsel_me <- VarSel$new(id = "varsel_geneexpr", + package = "Boruta", + varsel_fct = "Boruta", + param = same_param_varsel, + train_layer = tl_me) + + var_sel_res <- train_study$varSelection() + print(var_sel_res) + }) + + # Lrner parameters + expect_no_error({ + same_param <- ParamLrner$new(id = "ParamRanger", + param_list = list(probability = TRUE, + mtry = 2L), + hyperparam_list = list(num.trees = 1000L)) + }) + + # Lrner + expect_no_error({ + lrner_ge <- Lrner$new(id = "ranger", + package = "ranger", + lrn_fct = "ranger", + param = same_param, + train_layer = tl_ge) + lrner_pr <- Lrner$new(id = "ranger", + package = "ranger", + lrn_fct = "ranger", + param = same_param, + train_layer = tl_pr) + lrner_me <- Lrner$new(id = "ranger", + package = "ranger", + lrn_fct = "ranger", + param = same_param, + train_layer = tl_me) + lrner_meta <- Lrner$new(id = "weighted", + lrn_fct = "weightedMeanLearner", + param = ParamLrner$new(id = "ParamWeighted", + param_list = list(), + hyperparam_list = list()), + train_layer = tl_meta) + }) + + # Training + expect_no_error({ + same_param <- ParamLrner$new(id = "ParamRanger", + param_list = list(probability = TRUE, + mtry = 2L), + hyperparam_list = list(num.trees = 1000L)) + + disease <- train_study$getTargetValues()$disease + trained_study <- train_study$train(resampling_method = "caret::createFolds", + resampling_arg = list(y = disease, + k = 2L), + use_var_sel = TRUE) + print(trained_study) + }) + + expect_error({ + same_param <- ParamLrner$new(id = "ParamRanger", + param_list = list(probability = TRUE, + mtry = 2L), + hyperparam_list = list(num.trees = 1000L)) + + disease <- train_study$getTargetValues()$disease + trained_study <- train_study$train(resampling_method = "stats::rnorm", + resampling_arg = list(n = 10), + use_var_sel = TRUE) + }) + + + expect_no_error({ + trained_study$getId() + trained_study$getIndCol() + trained_study$getTarget() + trained_study$getTrainMetaLayer() + trained_study$getIndIDs() + trained_study$getTargetValues() + }) + + expect_no_error({ + print(train_study$summary()) + }) + + # Prediction + expect_no_error({ + new_study <- NewStudy$new(id = "new_study", ind_col = "IDS") + }) + + expect_no_error({ + nl_ge <- NewLayer$new(id = "geneexpr", new_study = new_study) + nl_pr <- NewLayer$new(id = "proteinexpr", new_study = new_study) + nl_me <- NewLayer$new(id = "methylation", new_study = new_study) + }) + + expect_no_error({ + new_data_ge <- NewData$new(id = "geneexpr", + new_layer = nl_ge, + data_frame = entities$testing$geneexpr) + new_data_pr <- NewData$new(id = "proteinexpr", + new_layer = nl_pr, + data_frame = entities$testing$proteinexpr) + new_data_me <- NewData$new(id = "methylation", + new_layer = nl_me, + data_frame = entities$testing$methylation) + }) + + expect_no_error({ + new_predictions <- train_study$predict(new_study = new_study) + print(new_predictions) + }) })