From a0cc4c88c054b08344935fcf77a2d639a428a3a3 Mon Sep 17 00:00:00 2001
From: Cesaire Joris Kuete Fouodo <fouodo@imbs.uni-luebeck.de>
Date: Wed, 27 Nov 2024 17:39:12 +0100
Subject: [PATCH] Version 1.1

---
 README.Rmd               | 34 +++++++++++-------
 README.md                | 78 ++++++++++++++++++++++++++++++++++------
 vignettes/how_to_use.Rmd | 24 ++++++++++---
 3 files changed, 107 insertions(+), 29 deletions(-)

diff --git a/README.Rmd b/README.Rmd
index fb8272b..c13cd0b 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -1,12 +1,3 @@
----
-title: "fuseMLR"
-author: Cesaire J. K. Fouodo
-output: 
-  md_document:
-    variant: gfm
-    preserve_yaml: true
----
-
 ```{r setup, include=FALSE}
 knitr::opts_chunk$set(echo = TRUE)
 ```
@@ -192,11 +183,21 @@ print(training)
 
 Use `extractModel` to retrieve the list of stored models and `extractData` to retrieve training data.
 
+
 ```{r basic_lrnr, include=TRUE, eval=TRUE}
 models_list <- extractModel(training = training)
-data_list <- extractData(training = training)
+print(str(object = models_list, max.level = 1L))
+```
+
+The list of four models (three random forests and one weighted meta-model) trained on each layer is returned.
+
+```{r basic_data, include=TRUE, eval=TRUE}
+data_list <- extractData(object = training)
+str(object = data_list, max.level = 1)
 ```
 
+The list of the four training data (the three simulated training modalities and the meta-data) is returned.
+
 #### E) Predicting
 
 In this section, we create a ```testing``` instance (from the *Testing* class) and make predictions for new data. This is done analogously to ```training```. The only difference that only the testing data modalities are required. Relevant functions are ```createTesting()``` and ```createTestLayer()```.
@@ -222,10 +223,18 @@ createTestLayer(testing = testing,
                  test_data = multi_omics$testing$proteinexpr)
 ```
 
-- An upset plot of the training data: Visualize patient overlap across layers.
+A look on testing data.
+
+```{r basic_test_data, include=TRUE, eval=TRUE}
+data_list <- extractData(object = testing)
+str(object = data_list, max.level = 1)
+```
+
+An upset plot of the training data: Visualize patient overlap across layers.
 
 ```{r upsetplot_new, include=TRUE, eval=TRUE, }
 upsetplot(object = testing, order.by = "freq")
+# See also extractData(testing = testing)
 ```
 
 - Predict the testing object.
@@ -267,7 +276,6 @@ perf_overlapping <- sapply(X = actual_pred[complete.cases(actual_pred),
 print(perf_overlapping)
 ```
 
-Note that our example is based on simulated data for usage illustration; only one run is not enough to appreciate the performances of our models.
 
 # E - Interface and wrapping #
 
@@ -371,7 +379,7 @@ library(knitr)
 
 # Create a data frame
 data <- data.frame(
-  Leaner = c("weightedMeanLearner", "bestSpecificLearner"),
+  Leaner = c("weightedMeanLearner", "bestLayerLearner"),
   Description = c("The weighted mean meta learner. It uses meta data to estimate the weights of the modality-specific models", "The best layer-specific model is used as meta model.")
 )
 
diff --git a/README.md b/README.md
index 7c20032..0d04fcb 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,12 @@
+---
+title: "fuseMLR"
+author: Cesaire J. K. Fouodo
+output: 
+  md_document:
+    variant: gfm
+    preserve_yaml: true
+---
+
 <!-- badges: start -->
 
 [![R-CMD-check](https://github.com/imbs-hl/fuseMLR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/imbs-hl/fuseMLR/actions/workflows/R-CMD-check.yaml)
@@ -164,7 +173,7 @@ createTrainLayer(training = training,
                                          mtry = 1L,
                                      na.action = "na.learn"),
                  param_pred_list = list(),
-                 na_rm = FALSE)
+                 na_action = "na.keep")
 ```
 
     ## Training        : training
@@ -192,7 +201,7 @@ createTrainLayer(training = training,
                                          mtry = 1L,
                                      na.action = "na.learn"),
                  param_pred_list = list(),
-                 na_rm = FALSE)
+                 na_action = "na.keep")
 ```
 
     ## Training        : training
@@ -220,7 +229,7 @@ createTrainLayer(training = training,
                                          mtry = 1L,
                                      na.action = "na.learn"),
                  param_pred_list = list(),
-                 na_rm = FALSE)
+                 na_action = "na.keep")
 ```
 
     ## Training        : training
@@ -327,6 +336,9 @@ training <- fusemlr(training = training,
                                           k = 10L))
 ```
 
+    ## Warning in fusemlr(training = training, use_var_sel = TRUE, resampling_method =
+    ## NULL, : Variable selection has been already performed.
+
     ## Training for fold 1.
 
     ## Training on layer geneexpr started.
@@ -488,6 +500,7 @@ print(training)
     ## Status          : Trained
     ## Number of layers: 4
     ## Layers trained  : 4
+    ## Var. sel. used  : Yes
     ## p               : 131 | 160 | 160 |  3
     ## n               :  50 |  50 |  50 | 64
 
@@ -500,9 +513,34 @@ Use `extractModel` to retrieve the list of stored models and
 
 ``` r
 models_list <- extractModel(training = training)
-data_list <- extractData(training = training)
+print(str(object = models_list, max.level = 1L))
 ```
 
+    ## List of 4
+    ##  $ geneexpr   :List of 14
+    ##  $ proteinexpr:List of 14
+    ##  $ methylation:List of 14
+    ##  $ meta_layer : 'weightedMeanLearner' Named num [1:3] 0.512 0.276 0.212
+    ##   ..- attr(*, "names")= chr [1:3] "geneexpr" "proteinexpr" "methylation"
+    ## NULL
+
+The list of four models (three random forests and one weighted
+meta-model) trained on each layer is returned.
+
+``` r
+data_list <- extractData(object = training)
+str(object = data_list, max.level = 1)
+```
+
+    ## List of 4
+    ##  $ geneexpr   :'data.frame': 50 obs. of  133 variables:
+    ##  $ proteinexpr:'data.frame': 50 obs. of  162 variables:
+    ##  $ methylation:'data.frame': 50 obs. of  162 variables:
+    ##  $ meta_layer :'data.frame': 64 obs. of  5 variables:
+
+The list of the four training data (the three simulated training
+modalities and the meta-data) is returned.
+
 #### E) Predicting
 
 In this section, we create a `testing` instance (from the *Testing*
@@ -551,8 +589,20 @@ createTestLayer(testing = testing,
     ## p               : 131 | 160 | 160
     ## n               :  20 |  20 |  20
 
-- An upset plot of the training data: Visualize patient overlap across
-  layers.
+A look on testing data.
+
+``` r
+data_list <- extractData(object = testing)
+str(object = data_list, max.level = 1)
+```
+
+    ## List of 3
+    ##  $ geneexpr   :'data.frame': 20 obs. of  132 variables:
+    ##  $ proteinexpr:'data.frame': 20 obs. of  161 variables:
+    ##  $ methylation:'data.frame': 20 obs. of  161 variables:
+
+An upset plot of the training data: Visualize patient overlap across
+layers.
 
 ``` r
 upsetplot(object = testing, order.by = "freq")
@@ -560,6 +610,10 @@ upsetplot(object = testing, order.by = "freq")
 
 ![](README_files/figure-gfm/upsetplot_new-1.png)<!-- -->
 
+``` r
+# See also extractData(testing = testing)
+```
+
 - Predict the testing object.
 
 ``` r
@@ -640,9 +694,6 @@ print(perf_overlapping)
     ##    geneexpr proteinexpr methylation  meta_layer 
     ##   0.3093583   0.3448970   0.2932064   0.2993118
 
-Note that our example is based on simulated data for usage illustration;
-only one run is not enough to appreciate the performances of our models.
-
 # E - Interface and wrapping
 
 We distinguish common supervised learning arguments from method specific
@@ -682,7 +733,7 @@ createTrainLayer(training = training,
                                          kernel = 'radial',
                                          probability = TRUE),
                  param_pred_list = list(probability = TRUE),
-                 na_rm = TRUE,
+                 na_action = "na.keep",
                  x = "x",
                  y = "y",
                  object = "object",
@@ -699,6 +750,7 @@ createTrainLayer(training = training,
     ## Status          : Trained
     ## Number of layers: 4
     ## Layers trained  : 4
+    ## Var. sel. used  : Yes
     ## p               : 131 | 160 | 160 |  3
     ## n               :  50 |  50 |  50 | 64
 
@@ -726,6 +778,9 @@ training <- fusemlr(training = training,
                     use_var_sel = TRUE)
 ```
 
+    ## Warning in fusemlr(training = training, use_var_sel = TRUE): Variable selection
+    ## has been already performed.
+
     ## Training for fold 1.
 
     ## Training on layer geneexpr started.
@@ -887,6 +942,7 @@ print(training)
     ## Status          : Trained
     ## Number of layers: 4
     ## Layers trained  : 5
+    ## Var. sel. used  : Yes
     ## p               : 131 | 160 | 160 |  3
     ## n               :  50 |  50 |  50 | 64
 
@@ -947,7 +1003,7 @@ implemented the following ones.
 | Leaner              | Description                                                                                               |
 |:--------------------|:----------------------------------------------------------------------------------------------------------|
 | weightedMeanLearner | The weighted mean meta learner. It uses meta data to estimate the weights of the modality-specific models |
-| bestSpecificLearner | The best layer-specific model is used as meta model.                                                      |
+| bestLayerLearner    | The best layer-specific model is used as meta model.                                                      |
 
 © 2024 Institute of Medical Biometry and Statistics (IMBS). All rights
 reserved.
diff --git a/vignettes/how_to_use.Rmd b/vignettes/how_to_use.Rmd
index 62792f4..60c09c2 100644
--- a/vignettes/how_to_use.Rmd
+++ b/vignettes/how_to_use.Rmd
@@ -165,9 +165,17 @@ We use `extractModel()` to retrieve the list of stored models and `extractData()
 
 ```{r basic_lrnr, include=TRUE, eval=TRUE}
 models_list <- extractModel(training = training)
-data_list <- extractData(training = training)
+print(str(object = models_list, max.level = 1L))
 ```
 
+The list of four models (three random forests and one weighted meta-model) trained on each layer is returned.
+
+```{r basic_data, include=TRUE, eval=TRUE}
+data_list <- extractData(object = training)
+str(object = data_list, max.level = 1)
+```
+
+The list of the four training data (the three simulated training modalities and the meta-data) is returned.
 
 # D - Predicting #
 
@@ -194,6 +202,13 @@ createTestLayer(testing = testing,
                 test_data = multi_omics$testing$methylation)
 ```
 
+A look on testing data.
+
+```{r basic_test_data, include=TRUE, eval=TRUE}
+data_list <- extractData(object = testing)
+str(object = data_list, max.level = 1)
+```
+
 We can also generate an upset plot to visualize patient overlap across testing layers.
 
 ```{r upsetplot_new, include=TRUE, eval=TRUE, }
@@ -238,7 +253,6 @@ perf_overlapping <- sapply(X = actual_pred[complete.cases(actual_pred),
 print(perf_overlapping)
 ```
 
-Note that our example is based on simulated data for usage illustration; only one run is not enough to appreciate the performances of our models.
 
 # E - Interface and wrapping #
 
@@ -294,12 +308,12 @@ mylasso <- function (x, y,
                      nlambda = 25,
                      nfolds = 5) {
   # Perform cross-validation to find the optimal lambda
-  cv_lasso <- cv.glmnet(x = as.matrix(x), y = y,
+  cv_lasso <- glmnet::cv.glmnet(x = as.matrix(x), y = y,
                         family = "binomial",
                         type.measure = "deviance",
                         nfolds = nfolds)
   best_lambda <- cv_lasso$lambda.min
-  lasso_best <- glmnet(x = as.matrix(x), y = y,
+  lasso_best <- glmnet::glmnet(x = as.matrix(x), y = y,
                        family = "binomial",
                        alpha = 1,
                        lambda = best_lambda
@@ -342,7 +356,7 @@ library(knitr)
 
 # Create a data frame
 data <- data.frame(
-  Leaner = c("weightedMeanLearner", "bestSpecificLearner"),
+  Leaner = c("weightedMeanLearner", "bestLayerLearner"),
   Description = c("The weighted mean meta learner. It uses meta data to estimate the weights of the modality-specific models", "The best layer-specific model is used as meta model.")
 )