From 68e4cf6402c9fa4cc54de0bf7ef3887309d10104 Mon Sep 17 00:00:00 2001
From: Cesaire Joris Kuete Fouodo <fouodo@imbs.uni-luebeck.de>
Date: Fri, 19 Jul 2024 11:31:37 +0200
Subject: [PATCH] TrainSrudy: 100% test coverage

---
 .covrignore                      |   1 -
 DESCRIPTION                      |   4 +-
 R/TrainStudy.R                   |  11 +-
 man/HashTable.Rd                 |  36 +++---
 man/TrainStudy.Rd                |  10 +-
 man/weightedMeanLearner.Rd       |   6 +-
 tests/testthat/test-TrainStudy.R | 204 ++++++++++++++++++++++++++++++-
 7 files changed, 238 insertions(+), 34 deletions(-)

diff --git a/.covrignore b/.covrignore
index ba0a9a3..fb90b09 100644
--- a/.covrignore
+++ b/.covrignore
@@ -12,5 +12,4 @@
 ./R/TrainData.R
 ./R/TrainLayer.R
 ./R/TrainMetaLayer.R
-./R/TrainStudy.R
 ./R/VarSel.R
diff --git a/DESCRIPTION b/DESCRIPTION
index 911e491..5ad0cf8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -14,7 +14,9 @@ Imports:
     digest
 Suggests: 
     testthat (>= 3.0.0),
-    UpSetR (>= 1.4.0)
+    UpSetR (>= 1.4.0),
+    caret,
+    ranger
 Config/testthat/edition: 3
 Depends: R (>= 3.6.0)
 Roxygen: list(markdown = TRUE)
diff --git a/R/TrainStudy.R b/R/TrainStudy.R
index 084f5e0..f739693 100644
--- a/R/TrainStudy.R
+++ b/R/TrainStudy.R
@@ -225,8 +225,8 @@ TrainStudy <- R6Class("TrainStudy",
                                           resampling_method,
                                           resampling_arg) {
                           # Test that the study contains ovelapping individuals
-                          if (!self$test_overlap()) {
-                            stop("This study does not contain overlapping individuals.")
+                          if (!self$testOverlap()) {
+                            stop("This study does not contain overlapping individuals.") #nocov
                           }
                           # 1) Train each layer
                           self$trainLayer(ind_subset = ind_subset,
@@ -421,8 +421,11 @@ TrainStudy <- R6Class("TrainStudy",
                         #'
                         #' @export
                         #'
-                        test_overlap = function () {
+                        testOverlap = function () {
                           layers = self$getKeyClass()
+                          if (!nrow(layers)) {
+                            stop ("No layer found in this study.")
+                          }
                           # This code accesses each layer (except TrainMetaLayer) level
                           # and get the individual IDs.
                           layers = layers[layers$class %in% "TrainLayer", ]
@@ -436,7 +439,7 @@ TrainStudy <- R6Class("TrainStudy",
                           if (sum(duplicated(ids_data[ , 1L])) > 5L) {
                             return(TRUE)
                           } else {
-                            return(FALSE)
+                            return(FALSE) # nocov
                           }
                         },
                         #' @description
diff --git a/man/HashTable.Rd b/man/HashTable.Rd
index 31048aa..ed8de82 100644
--- a/man/HashTable.Rd
+++ b/man/HashTable.Rd
@@ -10,11 +10,11 @@ Hashtable to contain object entities. Study and layers are extensions of this cl
 \subsection{Public methods}{
 \itemize{
 \item \href{#method-HashTable-new}{\code{HashTable$new()}}
+\item \href{#method-HashTable-print}{\code{HashTable$print()}}
 \item \href{#method-HashTable-add2HashTable}{\code{HashTable$add2HashTable()}}
 \item \href{#method-HashTable-getFromHashTable}{\code{HashTable$getFromHashTable()}}
 \item \href{#method-HashTable-getKeyClass}{\code{HashTable$getKeyClass()}}
 \item \href{#method-HashTable-removeFromHashTable}{\code{HashTable$removeFromHashTable()}}
-\item \href{#method-HashTable-print}{\code{HashTable$print()}}
 \item \href{#method-HashTable-getId}{\code{HashTable$getId()}}
 \item \href{#method-HashTable-getHashTable}{\code{HashTable$getHashTable()}}
 \item \href{#method-HashTable-checkClassExist}{\code{HashTable$checkClassExist()}}
@@ -39,6 +39,23 @@ ID of the hash table. It must be unique.}
 }
 }
 \if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-HashTable-print"></a>}}
+\if{latex}{\out{\hypertarget{method-HashTable-print}{}}}
+\subsection{Method \code{print()}}{
+Printer
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{HashTable$print(...)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{...}}{(any) \cr}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-HashTable-add2HashTable"></a>}}
 \if{latex}{\out{\hypertarget{method-HashTable-add2HashTable}{}}}
 \subsection{Method \code{add2HashTable()}}{
@@ -111,23 +128,6 @@ Remove the object with the corresponding key from the hashtable.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-HashTable-print"></a>}}
-\if{latex}{\out{\hypertarget{method-HashTable-print}{}}}
-\subsection{Method \code{print()}}{
-Printer
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{HashTable$print(...)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{...}}{(any) \cr}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-HashTable-getId"></a>}}
 \if{latex}{\out{\hypertarget{method-HashTable-getId}{}}}
 \subsection{Method \code{getId()}}{
diff --git a/man/TrainStudy.Rd b/man/TrainStudy.Rd
index d2e9e96..e72456b 100644
--- a/man/TrainStudy.Rd
+++ b/man/TrainStudy.Rd
@@ -51,7 +51,7 @@ a new study.
 \item \href{#method-TrainStudy-getIndCol}{\code{TrainStudy$getIndCol()}}
 \item \href{#method-TrainStudy-getTarget}{\code{TrainStudy$getTarget()}}
 \item \href{#method-TrainStudy-increaseNbTrainedLayer}{\code{TrainStudy$increaseNbTrainedLayer()}}
-\item \href{#method-TrainStudy-test_overlap}{\code{TrainStudy$test_overlap()}}
+\item \href{#method-TrainStudy-testOverlap}{\code{TrainStudy$testOverlap()}}
 \item \href{#method-TrainStudy-upset}{\code{TrainStudy$upset()}}
 \item \href{#method-TrainStudy-summary}{\code{TrainStudy$summary()}}
 }
@@ -334,13 +334,13 @@ Increase the number of trained layer.
 
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-TrainStudy-test_overlap"></a>}}
-\if{latex}{\out{\hypertarget{method-TrainStudy-test_overlap}{}}}
-\subsection{Method \code{test_overlap()}}{
+\if{html}{\out{<a id="method-TrainStudy-testOverlap"></a>}}
+\if{latex}{\out{\hypertarget{method-TrainStudy-testOverlap}{}}}
+\subsection{Method \code{testOverlap()}}{
 Test that individuals overlap over layers.
 At least five individuals must overlapped.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{TrainStudy$test_overlap()}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{TrainStudy$testOverlap()}\if{html}{\out{</div>}}
 }
 
 }
diff --git a/man/weightedMeanLearner.Rd b/man/weightedMeanLearner.Rd
index cd6fb77..fd2dbcd 100644
--- a/man/weightedMeanLearner.Rd
+++ b/man/weightedMeanLearner.Rd
@@ -20,9 +20,9 @@ A model object of class \code{weightedMeanLeaner}.
 Layer weights are estimated for each layer based on Brier score.
 }
 \examples{
-set.seed(20240624)
-x = data.frame(x1 = rnorm(50))
-y = sample(x = 0:1, size = 50, replace = TRUE)
+set.seed(20240624L)
+x = data.frame(x1 = rnorm(50L))
+y = sample(x = 0L:1L, size = 50L, replace = TRUE)
 my_model = weightedMeanLearner(x = x, y = y)
 
 }
diff --git a/tests/testthat/test-TrainStudy.R b/tests/testthat/test-TrainStudy.R
index 8849056..c961015 100644
--- a/tests/testthat/test-TrainStudy.R
+++ b/tests/testthat/test-TrainStudy.R
@@ -1,3 +1,203 @@
-test_that("multiplication works", {
-  expect_equal(2 * 2, 4)
+data("entities")
+train_study <- TrainStudy$new(id = "train_study",
+                              ind_col = "IDS",
+                              target = "disease")
+# See also train_study$summary()
+print(train_study)
+test_that("TrainStudy: until training", {
+  expect_no_error({
+    train_study <- TrainStudy$new(id = "train_study",
+                                  ind_col = "IDS",
+                                  target = "disease")
+    print(train_study)
+  })
+  expect_error({
+    train_study$train()
+  })
+  expect_error({
+    train_study$trainLayer()
+  })
+  expect_error({
+    train_study$createMetaTrainData()
+  })
+  expect_error({
+    train_study$varSelection()
+  })
+  expect_error({
+    train_study$test_overlap()
+  })
+
+  expect_error({
+    train_study$train(ind_subset = NULL,
+                      use_var_sel = TRUE,
+                      resampling_method = character(0L),
+                      resampling_arg = list())
+    train_study$train(ind_subset = NULL,
+                      use_var_sel = TRUE,
+                      resampling_method = character(0L),
+                      resampling_arg = list())
+  })
+
+  # Layers can be added successfully
+  expect_no_error({
+    tl_ge <- TrainLayer$new(id = "geneexpr", train_study = train_study)
+    tl_pr <- TrainLayer$new(id = "proteinexpr", train_study = train_study)
+    tl_me <- TrainLayer$new(id = "methylation", train_study = train_study)
+    # We also prepare the meta layer for the meta analysis.
+    tl_meta <- TrainMetaLayer$new(id = "meta_layer", train_study = train_study)
+  })
+
+  expect_error({
+    train_study$train()
+  })
+  expect_error({
+    train_study$test_overlap()
+  })
+
+  # TrainData can be added successfully
+  expect_no_error({
+    train_data_ge <- TrainData$new(id = "geneexpr",
+                                   train_layer = tl_ge,
+                                   data_frame = entities$training$geneexpr)
+    train_data_pr <- TrainData$new(id = "proteinexpr",
+                                   train_layer = tl_pr,
+                                   data_frame = entities$training$proteinexpr)
+    train_data_me <- TrainData$new(id = "methylation",
+                                   train_layer = tl_me,
+                                   data_frame = entities$training$methylation)
+  })
+
+  # Upset plot works
+  expect_no_error({
+    train_study$upset(order.by = "freq")
+  })
+
+  # Variable selection works
+  expect_no_error({
+    same_param_varsel <- ParamVarSel$new(id = "ParamVarSel",
+                                         param_list = list(num.trees = 1000L,
+                                                           mtry = 3L))
+    varsel_ge <- VarSel$new(id = "varsel_geneexpr",
+                            package = "Boruta",
+                            varsel_fct = "Boruta",
+                            param = same_param_varsel,
+                            train_layer = tl_ge)
+
+    varsel_pr <- VarSel$new(id = "varsel_geneexpr",
+                            package = "Boruta",
+                            varsel_fct = "Boruta",
+                            param = same_param_varsel,
+                            train_layer = tl_pr)
+
+    varsel_me <- VarSel$new(id = "varsel_geneexpr",
+                            package = "Boruta",
+                            varsel_fct = "Boruta",
+                            param = same_param_varsel,
+                            train_layer = tl_me)
+
+    var_sel_res <- train_study$varSelection()
+    print(var_sel_res)
+  })
+
+  # Lrner parameters
+  expect_no_error({
+    same_param <- ParamLrner$new(id = "ParamRanger",
+                                 param_list = list(probability = TRUE,
+                                                   mtry = 2L),
+                                 hyperparam_list = list(num.trees = 1000L))
+  })
+
+  # Lrner
+  expect_no_error({
+    lrner_ge <- Lrner$new(id = "ranger",
+                          package = "ranger",
+                          lrn_fct = "ranger",
+                          param = same_param,
+                          train_layer = tl_ge)
+    lrner_pr <- Lrner$new(id = "ranger",
+                          package = "ranger",
+                          lrn_fct = "ranger",
+                          param = same_param,
+                          train_layer = tl_pr)
+    lrner_me <- Lrner$new(id = "ranger",
+                          package = "ranger",
+                          lrn_fct = "ranger",
+                          param = same_param,
+                          train_layer = tl_me)
+    lrner_meta <- Lrner$new(id = "weighted",
+                            lrn_fct = "weightedMeanLearner",
+                            param = ParamLrner$new(id = "ParamWeighted",
+                                                   param_list = list(),
+                                                   hyperparam_list = list()),
+                            train_layer = tl_meta)
+  })
+
+  # Training
+  expect_no_error({
+    same_param <- ParamLrner$new(id = "ParamRanger",
+                                 param_list = list(probability = TRUE,
+                                                   mtry = 2L),
+                                 hyperparam_list = list(num.trees = 1000L))
+
+    disease <- train_study$getTargetValues()$disease
+    trained_study <- train_study$train(resampling_method = "caret::createFolds",
+                                       resampling_arg = list(y = disease,
+                                                             k = 2L),
+                                       use_var_sel = TRUE)
+    print(trained_study)
+  })
+
+  expect_error({
+    same_param <- ParamLrner$new(id = "ParamRanger",
+                                 param_list = list(probability = TRUE,
+                                                   mtry = 2L),
+                                 hyperparam_list = list(num.trees = 1000L))
+
+    disease <- train_study$getTargetValues()$disease
+    trained_study <- train_study$train(resampling_method = "stats::rnorm",
+                                       resampling_arg = list(n = 10),
+                                       use_var_sel = TRUE)
+  })
+
+
+  expect_no_error({
+    trained_study$getId()
+    trained_study$getIndCol()
+    trained_study$getTarget()
+    trained_study$getTrainMetaLayer()
+    trained_study$getIndIDs()
+    trained_study$getTargetValues()
+  })
+
+  expect_no_error({
+    print(train_study$summary())
+  })
+
+  # Prediction
+  expect_no_error({
+    new_study <- NewStudy$new(id = "new_study", ind_col = "IDS")
+  })
+
+  expect_no_error({
+    nl_ge <- NewLayer$new(id = "geneexpr", new_study = new_study)
+    nl_pr <- NewLayer$new(id = "proteinexpr", new_study = new_study)
+    nl_me <- NewLayer$new(id = "methylation", new_study = new_study)
+  })
+
+  expect_no_error({
+    new_data_ge <- NewData$new(id = "geneexpr",
+                               new_layer = nl_ge,
+                               data_frame = entities$testing$geneexpr)
+    new_data_pr <- NewData$new(id = "proteinexpr",
+                               new_layer = nl_pr,
+                               data_frame = entities$testing$proteinexpr)
+    new_data_me <- NewData$new(id = "methylation",
+                               new_layer = nl_me,
+                               data_frame = entities$testing$methylation)
+  })
+
+  expect_no_error({
+    new_predictions <- train_study$predict(new_study = new_study)
+    print(new_predictions)
+  })
 })