diff --git a/.covrignore b/.covrignore
index ba0a9a3..fb90b09 100644
--- a/.covrignore
+++ b/.covrignore
@@ -12,5 +12,4 @@
./R/TrainData.R
./R/TrainLayer.R
./R/TrainMetaLayer.R
-./R/TrainStudy.R
./R/VarSel.R
diff --git a/DESCRIPTION b/DESCRIPTION
index 911e491..5ad0cf8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -14,7 +14,9 @@ Imports:
digest
Suggests:
testthat (>= 3.0.0),
- UpSetR (>= 1.4.0)
+ UpSetR (>= 1.4.0),
+ caret,
+ ranger
Config/testthat/edition: 3
Depends: R (>= 3.6.0)
Roxygen: list(markdown = TRUE)
diff --git a/R/TrainStudy.R b/R/TrainStudy.R
index 084f5e0..f739693 100644
--- a/R/TrainStudy.R
+++ b/R/TrainStudy.R
@@ -225,8 +225,8 @@ TrainStudy <- R6Class("TrainStudy",
resampling_method,
resampling_arg) {
# Test that the study contains ovelapping individuals
- if (!self$test_overlap()) {
- stop("This study does not contain overlapping individuals.")
+ if (!self$testOverlap()) {
+ stop("This study does not contain overlapping individuals.") #nocov
}
# 1) Train each layer
self$trainLayer(ind_subset = ind_subset,
@@ -421,8 +421,11 @@ TrainStudy <- R6Class("TrainStudy",
#'
#' @export
#'
- test_overlap = function () {
+ testOverlap = function () {
layers = self$getKeyClass()
+ if (!nrow(layers)) {
+ stop ("No layer found in this study.")
+ }
# This code accesses each layer (except TrainMetaLayer) level
# and get the individual IDs.
layers = layers[layers$class %in% "TrainLayer", ]
@@ -436,7 +439,7 @@ TrainStudy <- R6Class("TrainStudy",
if (sum(duplicated(ids_data[ , 1L])) > 5L) {
return(TRUE)
} else {
- return(FALSE)
+ return(FALSE) # nocov
}
},
#' @description
diff --git a/man/HashTable.Rd b/man/HashTable.Rd
index 31048aa..ed8de82 100644
--- a/man/HashTable.Rd
+++ b/man/HashTable.Rd
@@ -10,11 +10,11 @@ Hashtable to contain object entities. Study and layers are extensions of this cl
\subsection{Public methods}{
\itemize{
\item \href{#method-HashTable-new}{\code{HashTable$new()}}
+\item \href{#method-HashTable-print}{\code{HashTable$print()}}
\item \href{#method-HashTable-add2HashTable}{\code{HashTable$add2HashTable()}}
\item \href{#method-HashTable-getFromHashTable}{\code{HashTable$getFromHashTable()}}
\item \href{#method-HashTable-getKeyClass}{\code{HashTable$getKeyClass()}}
\item \href{#method-HashTable-removeFromHashTable}{\code{HashTable$removeFromHashTable()}}
-\item \href{#method-HashTable-print}{\code{HashTable$print()}}
\item \href{#method-HashTable-getId}{\code{HashTable$getId()}}
\item \href{#method-HashTable-getHashTable}{\code{HashTable$getHashTable()}}
\item \href{#method-HashTable-checkClassExist}{\code{HashTable$checkClassExist()}}
@@ -39,6 +39,23 @@ ID of the hash table. It must be unique.}
}
}
\if{html}{\out{
}}
+\if{html}{\out{}}
+\if{latex}{\out{\hypertarget{method-HashTable-print}{}}}
+\subsection{Method \code{print()}}{
+Printer
+\subsection{Usage}{
+\if{html}{\out{}}\preformatted{HashTable$print(...)}\if{html}{\out{
}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{}}
+\describe{
+\item{\code{...}}{(any) \cr}
+}
+\if{html}{\out{
}}
+}
+}
+\if{html}{\out{
}}
\if{html}{\out{}}
\if{latex}{\out{\hypertarget{method-HashTable-add2HashTable}{}}}
\subsection{Method \code{add2HashTable()}}{
@@ -111,23 +128,6 @@ Remove the object with the corresponding key from the hashtable.
}
}
\if{html}{\out{
}}
-\if{html}{\out{}}
-\if{latex}{\out{\hypertarget{method-HashTable-print}{}}}
-\subsection{Method \code{print()}}{
-Printer
-\subsection{Usage}{
-\if{html}{\out{}}\preformatted{HashTable$print(...)}\if{html}{\out{
}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{}}
-\describe{
-\item{\code{...}}{(any) \cr}
-}
-\if{html}{\out{
}}
-}
-}
-\if{html}{\out{
}}
\if{html}{\out{}}
\if{latex}{\out{\hypertarget{method-HashTable-getId}{}}}
\subsection{Method \code{getId()}}{
diff --git a/man/TrainStudy.Rd b/man/TrainStudy.Rd
index d2e9e96..e72456b 100644
--- a/man/TrainStudy.Rd
+++ b/man/TrainStudy.Rd
@@ -51,7 +51,7 @@ a new study.
\item \href{#method-TrainStudy-getIndCol}{\code{TrainStudy$getIndCol()}}
\item \href{#method-TrainStudy-getTarget}{\code{TrainStudy$getTarget()}}
\item \href{#method-TrainStudy-increaseNbTrainedLayer}{\code{TrainStudy$increaseNbTrainedLayer()}}
-\item \href{#method-TrainStudy-test_overlap}{\code{TrainStudy$test_overlap()}}
+\item \href{#method-TrainStudy-testOverlap}{\code{TrainStudy$testOverlap()}}
\item \href{#method-TrainStudy-upset}{\code{TrainStudy$upset()}}
\item \href{#method-TrainStudy-summary}{\code{TrainStudy$summary()}}
}
@@ -334,13 +334,13 @@ Increase the number of trained layer.
}
\if{html}{\out{
}}
-\if{html}{\out{}}
-\if{latex}{\out{\hypertarget{method-TrainStudy-test_overlap}{}}}
-\subsection{Method \code{test_overlap()}}{
+\if{html}{\out{}}
+\if{latex}{\out{\hypertarget{method-TrainStudy-testOverlap}{}}}
+\subsection{Method \code{testOverlap()}}{
Test that individuals overlap over layers.
At least five individuals must overlapped.
\subsection{Usage}{
-\if{html}{\out{}}\preformatted{TrainStudy$test_overlap()}\if{html}{\out{
}}
+\if{html}{\out{}}\preformatted{TrainStudy$testOverlap()}\if{html}{\out{
}}
}
}
diff --git a/man/weightedMeanLearner.Rd b/man/weightedMeanLearner.Rd
index cd6fb77..fd2dbcd 100644
--- a/man/weightedMeanLearner.Rd
+++ b/man/weightedMeanLearner.Rd
@@ -20,9 +20,9 @@ A model object of class \code{weightedMeanLeaner}.
Layer weights are estimated for each layer based on Brier score.
}
\examples{
-set.seed(20240624)
-x = data.frame(x1 = rnorm(50))
-y = sample(x = 0:1, size = 50, replace = TRUE)
+set.seed(20240624L)
+x = data.frame(x1 = rnorm(50L))
+y = sample(x = 0L:1L, size = 50L, replace = TRUE)
my_model = weightedMeanLearner(x = x, y = y)
}
diff --git a/tests/testthat/test-TrainStudy.R b/tests/testthat/test-TrainStudy.R
index 8849056..c961015 100644
--- a/tests/testthat/test-TrainStudy.R
+++ b/tests/testthat/test-TrainStudy.R
@@ -1,3 +1,203 @@
-test_that("multiplication works", {
- expect_equal(2 * 2, 4)
+data("entities")
+train_study <- TrainStudy$new(id = "train_study",
+ ind_col = "IDS",
+ target = "disease")
+# See also train_study$summary()
+print(train_study)
+test_that("TrainStudy: until training", {
+ expect_no_error({
+ train_study <- TrainStudy$new(id = "train_study",
+ ind_col = "IDS",
+ target = "disease")
+ print(train_study)
+ })
+ expect_error({
+ train_study$train()
+ })
+ expect_error({
+ train_study$trainLayer()
+ })
+ expect_error({
+ train_study$createMetaTrainData()
+ })
+ expect_error({
+ train_study$varSelection()
+ })
+ expect_error({
+ train_study$test_overlap()
+ })
+
+ expect_error({
+ train_study$train(ind_subset = NULL,
+ use_var_sel = TRUE,
+ resampling_method = character(0L),
+ resampling_arg = list())
+ train_study$train(ind_subset = NULL,
+ use_var_sel = TRUE,
+ resampling_method = character(0L),
+ resampling_arg = list())
+ })
+
+ # Layers can be added successfully
+ expect_no_error({
+ tl_ge <- TrainLayer$new(id = "geneexpr", train_study = train_study)
+ tl_pr <- TrainLayer$new(id = "proteinexpr", train_study = train_study)
+ tl_me <- TrainLayer$new(id = "methylation", train_study = train_study)
+ # We also prepare the meta layer for the meta analysis.
+ tl_meta <- TrainMetaLayer$new(id = "meta_layer", train_study = train_study)
+ })
+
+ expect_error({
+ train_study$train()
+ })
+ expect_error({
+ train_study$test_overlap()
+ })
+
+ # TrainData can be added successfully
+ expect_no_error({
+ train_data_ge <- TrainData$new(id = "geneexpr",
+ train_layer = tl_ge,
+ data_frame = entities$training$geneexpr)
+ train_data_pr <- TrainData$new(id = "proteinexpr",
+ train_layer = tl_pr,
+ data_frame = entities$training$proteinexpr)
+ train_data_me <- TrainData$new(id = "methylation",
+ train_layer = tl_me,
+ data_frame = entities$training$methylation)
+ })
+
+ # Upset plot works
+ expect_no_error({
+ train_study$upset(order.by = "freq")
+ })
+
+ # Variable selection works
+ expect_no_error({
+ same_param_varsel <- ParamVarSel$new(id = "ParamVarSel",
+ param_list = list(num.trees = 1000L,
+ mtry = 3L))
+ varsel_ge <- VarSel$new(id = "varsel_geneexpr",
+ package = "Boruta",
+ varsel_fct = "Boruta",
+ param = same_param_varsel,
+ train_layer = tl_ge)
+
+ varsel_pr <- VarSel$new(id = "varsel_geneexpr",
+ package = "Boruta",
+ varsel_fct = "Boruta",
+ param = same_param_varsel,
+ train_layer = tl_pr)
+
+ varsel_me <- VarSel$new(id = "varsel_geneexpr",
+ package = "Boruta",
+ varsel_fct = "Boruta",
+ param = same_param_varsel,
+ train_layer = tl_me)
+
+ var_sel_res <- train_study$varSelection()
+ print(var_sel_res)
+ })
+
+ # Lrner parameters
+ expect_no_error({
+ same_param <- ParamLrner$new(id = "ParamRanger",
+ param_list = list(probability = TRUE,
+ mtry = 2L),
+ hyperparam_list = list(num.trees = 1000L))
+ })
+
+ # Lrner
+ expect_no_error({
+ lrner_ge <- Lrner$new(id = "ranger",
+ package = "ranger",
+ lrn_fct = "ranger",
+ param = same_param,
+ train_layer = tl_ge)
+ lrner_pr <- Lrner$new(id = "ranger",
+ package = "ranger",
+ lrn_fct = "ranger",
+ param = same_param,
+ train_layer = tl_pr)
+ lrner_me <- Lrner$new(id = "ranger",
+ package = "ranger",
+ lrn_fct = "ranger",
+ param = same_param,
+ train_layer = tl_me)
+ lrner_meta <- Lrner$new(id = "weighted",
+ lrn_fct = "weightedMeanLearner",
+ param = ParamLrner$new(id = "ParamWeighted",
+ param_list = list(),
+ hyperparam_list = list()),
+ train_layer = tl_meta)
+ })
+
+ # Training
+ expect_no_error({
+ same_param <- ParamLrner$new(id = "ParamRanger",
+ param_list = list(probability = TRUE,
+ mtry = 2L),
+ hyperparam_list = list(num.trees = 1000L))
+
+ disease <- train_study$getTargetValues()$disease
+ trained_study <- train_study$train(resampling_method = "caret::createFolds",
+ resampling_arg = list(y = disease,
+ k = 2L),
+ use_var_sel = TRUE)
+ print(trained_study)
+ })
+
+ expect_error({
+ same_param <- ParamLrner$new(id = "ParamRanger",
+ param_list = list(probability = TRUE,
+ mtry = 2L),
+ hyperparam_list = list(num.trees = 1000L))
+
+ disease <- train_study$getTargetValues()$disease
+ trained_study <- train_study$train(resampling_method = "stats::rnorm",
+ resampling_arg = list(n = 10),
+ use_var_sel = TRUE)
+ })
+
+
+ expect_no_error({
+ trained_study$getId()
+ trained_study$getIndCol()
+ trained_study$getTarget()
+ trained_study$getTrainMetaLayer()
+ trained_study$getIndIDs()
+ trained_study$getTargetValues()
+ })
+
+ expect_no_error({
+ print(train_study$summary())
+ })
+
+ # Prediction
+ expect_no_error({
+ new_study <- NewStudy$new(id = "new_study", ind_col = "IDS")
+ })
+
+ expect_no_error({
+ nl_ge <- NewLayer$new(id = "geneexpr", new_study = new_study)
+ nl_pr <- NewLayer$new(id = "proteinexpr", new_study = new_study)
+ nl_me <- NewLayer$new(id = "methylation", new_study = new_study)
+ })
+
+ expect_no_error({
+ new_data_ge <- NewData$new(id = "geneexpr",
+ new_layer = nl_ge,
+ data_frame = entities$testing$geneexpr)
+ new_data_pr <- NewData$new(id = "proteinexpr",
+ new_layer = nl_pr,
+ data_frame = entities$testing$proteinexpr)
+ new_data_me <- NewData$new(id = "methylation",
+ new_layer = nl_me,
+ data_frame = entities$testing$methylation)
+ })
+
+ expect_no_error({
+ new_predictions <- train_study$predict(new_study = new_study)
+ print(new_predictions)
+ })
})