diff --git a/R/Lrner.R b/R/Lrner.R index 5375cf3..ddf3ffb 100644 --- a/R/Lrner.R +++ b/R/Lrner.R @@ -182,8 +182,8 @@ Lrner <- R6Class("Lrner", use_var_sel = FALSE, verbose = TRUE) { train_data = private$train_layer$getTrainData() - # Train only on complete data train_data = train_data$clone(deep = FALSE) + # Train only on complete data if (private$na_rm) { all_data = train_data$getDataFrame() complete_data = train_data$getCompleteData() diff --git a/R/TestMetaLayer.R b/R/TestMetaLayer.R index 1789184..ba8ea1a 100644 --- a/R/TestMetaLayer.R +++ b/R/TestMetaLayer.R @@ -94,15 +94,12 @@ TestMetaLayer <- R6Class("TestMetaLayer", #' Name of individual column IDs. #' @param data_frame `data.frame(1)` \cr #' \code{data.frame} of layer specific predictions. - #' @param meta_layer `TestLayer(1)` \cr - #' Layer where to store the [TestData] object. #' #' @export # TODO: Please do not export me. setTestData = function (id, ind_col, - data_frame, - meta_layer) { + data_frame) { TestData$new(id = id, ind_col = ind_col, data_frame = data_frame, diff --git a/R/TrainMetaLayer.R b/R/TrainMetaLayer.R index 0b42dd6..5c6588d 100644 --- a/R/TrainMetaLayer.R +++ b/R/TrainMetaLayer.R @@ -257,23 +257,26 @@ TrainMetaLayer <- R6Class("TrainMetaLayer", #' Name of individual column IDs. #' @param data_frame `data.frame` \cr #' \code{data.frame} of layer specific predictions. - #' @param target `character` \cr - #' Name of the target variable #' #' @export # TODO: Please do not export me. setTrainData = function (id, ind_col, - data_frame, - target) { + data_frame) { # nocov start if (sum(!complete.cases(data_frame)) == nrow(data_frame)) { warning("No individual fully overlaps across all layers.") } # nocov end - TrainData$new(id = id, - data_frame = data_frame, - train_layer = self) + if (self$getLrner()$getNaRm()) { + TrainData$new(id = id, + data_frame = data_frame[complete.cases(data_frame), ], + train_layer = self) + } else { + TrainData$new(id = id, + data_frame = data_frame, + train_layer = self) + } return(self) }, #' @description diff --git a/R/Training.R b/R/Training.R index 549f4ac..c11c080 100644 --- a/R/Training.R +++ b/R/Training.R @@ -591,7 +591,6 @@ Training <- R6Class("Training", # This code accesses each layer (except TrainMetaLayer) level # and get the individual IDs. layers = layers[layers$class %in% c("TrainLayer", "TrainMetaLayer"), ] - current_model = NULL models = list() for (k in layers$key) { layer = self$getFromHashTable(key = k) @@ -609,7 +608,6 @@ Training <- R6Class("Training", getData = function() { layers = self$getKeyClass() layers = layers[layers$class %in% c("TrainLayer", "TrainMetaLayer"), ] - current_model = NULL all_data = list() for (k in layers$key) { layer = self$getFromHashTable(key = k) diff --git a/doc/fuseMLR.html b/doc/fuseMLR.html index 8e721e8..c177a6f 100644 --- a/doc/fuseMLR.html +++ b/doc/fuseMLR.html @@ -12,7 +12,7 @@ - + How does fuseMLR work? @@ -340,7 +340,7 @@

How does fuseMLR work?

Cesaire Fouodo

-

2024-12-11

+

2024-12-12

@@ -471,7 +471,7 @@

C.1 - Creating a training

param_train_list = list(probability = TRUE, mtry = 1L), param_pred_list = list(), - na_action = "na.keep") + na_action = "na.rm") #> Training : training #> Problem type : classification #> Status : Not trained @@ -479,7 +479,7 @@

C.1 - Creating a training

#> Layers trained : 0 #> p : 131 #> n : 50 -#> na.action : na.keep +#> na.action : na.rm
# Create gene protein abundance layer
 createTrainLayer(training = training,
                  train_layer_id = "proteinexpr",
@@ -502,7 +502,7 @@ 

C.1 - Creating a training

#> Layers trained : 0 #> p : 131 | 160 #> n : 50 | 50 -#> na.action : na.keep | na.keep
+#> na.action : na.rm | na.keep
# Create methylation layer
 createTrainLayer(training = training,
                  train_layer_id = "methylation",
@@ -525,7 +525,7 @@ 

C.1 - Creating a training

#> Layers trained : 0 #> p : 131 | 160 | 367 #> n : 50 | 50 | 50 -#> na.action : na.keep | na.keep | na.keep
+#> na.action : na.rm | na.keep | na.keep

Also add a meta-layer. We use the weighted mean (internal function to fuseMLR) as meta-learner. Similarly learners, a meta-learner should allow at least the arguments x and @@ -563,7 +563,7 @@

C.1 - Creating a training

#> Layers trained : 0 #> p : 131 | 160 | 367 #> n : 50 | 50 | 50 -#> na.action : na.keep | na.keep | na.keep +#> na.action : na.rm | na.keep | na.keep print(training) #> Training : training #> Problem type : classification @@ -572,7 +572,7 @@

C.1 - Creating a training

#> Layers trained : 0 #> p : 131 | 160 | 367 #> n : 50 | 50 | 50 -#> na.action : na.keep | na.keep | na.keep +#> na.action : na.rm | na.keep | na.keep

Function upsetplot() is available to generate an upset of the training data, i.e. an overview how patients overlap across layers.

@@ -660,7 +660,7 @@

C.2 - Variable selection

#> Layers trained : 0 #> p : 19 | 1 | 35 #> n : 50 | 50 | 50 -#> na.action : na.keep | na.keep | na.keep +#> na.action : na.rm | na.keep | na.keep

For each layer, the variable selection results show the chosen variables.

@@ -713,8 +713,8 @@

C.2 - Train

#> Layers trained : 4 #> Var. sel. used : Yes #> p : 19 | 1 | 35 | 3 -#> n : 50 | 50 | 50 | 69 -#> na.action : na.keep | na.keep | na.keep | na.rm +#> n : 50 | 50 | 50 | 26 +#> na.action : na.rm | na.keep | na.keep | na.rm

We can also display a summary of training to see more details on layer levels. Information about the training data modality, the variable selection method and the learner stored at each layer will @@ -729,8 +729,8 @@

C.2 - Train

#> Layers trained : 4 #> Var. sel. used : Yes #> p : 19 | 1 | 35 | 3 -#> n : 50 | 50 | 50 | 69 -#> na.action : na.keep | na.keep | na.keep | na.rm +#> n : 50 | 50 | 50 | 26 +#> na.action : na.rm | na.keep | na.keep | na.rm #> ---------------- #> #> Layer geneexpr @@ -852,8 +852,8 @@

C.2 - Train

#> Layer : meta_layer #> Ind. id. : IDS #> Target : disease -#> n : 69 -#> Missing : 43 +#> n : 26 +#> Missing : 0 #> p : 3 #> ----------------

We use extractModel() to retrieve the list of stored @@ -875,7 +875,7 @@

C.2 - Train

#> $ geneexpr :'data.frame': 50 obs. of 21 variables: #> $ proteinexpr:'data.frame': 50 obs. of 3 variables: #> $ methylation:'data.frame': 50 obs. of 37 variables: -#> $ meta_layer :'data.frame': 69 obs. of 5 variables: +#> $ meta_layer :'data.frame': 26 obs. of 5 variables:

The three simulated training modalities and the meta-data are returned.

diff --git a/vignettes/fuseMLR.Rmd b/vignettes/fuseMLR.Rmd index c5b2147..b0bc08e 100644 --- a/vignettes/fuseMLR.Rmd +++ b/vignettes/fuseMLR.Rmd @@ -86,7 +86,7 @@ createTrainLayer(training = training, param_train_list = list(probability = TRUE, mtry = 1L), param_pred_list = list(), - na_action = "na.keep") + na_action = "na.rm") ``` ```{r proteinexpr, include=TRUE, eval=TRUE}