From a0cc4c88c054b08344935fcf77a2d639a428a3a3 Mon Sep 17 00:00:00 2001 From: Cesaire Joris Kuete Fouodo Date: Wed, 27 Nov 2024 17:39:12 +0100 Subject: [PATCH] Version 1.1 --- README.Rmd | 34 +++++++++++------- README.md | 78 ++++++++++++++++++++++++++++++++++------ vignettes/how_to_use.Rmd | 24 ++++++++++--- 3 files changed, 107 insertions(+), 29 deletions(-) diff --git a/README.Rmd b/README.Rmd index fb8272b..c13cd0b 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,12 +1,3 @@ ---- -title: "fuseMLR" -author: Cesaire J. K. Fouodo -output: - md_document: - variant: gfm - preserve_yaml: true ---- - ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` @@ -192,11 +183,21 @@ print(training) Use `extractModel` to retrieve the list of stored models and `extractData` to retrieve training data. + ```{r basic_lrnr, include=TRUE, eval=TRUE} models_list <- extractModel(training = training) -data_list <- extractData(training = training) +print(str(object = models_list, max.level = 1L)) +``` + +The list of four models (three random forests and one weighted meta-model) trained on each layer is returned. + +```{r basic_data, include=TRUE, eval=TRUE} +data_list <- extractData(object = training) +str(object = data_list, max.level = 1) ``` +The list of the four training data (the three simulated training modalities and the meta-data) is returned. + #### E) Predicting In this section, we create a ```testing``` instance (from the *Testing* class) and make predictions for new data. This is done analogously to ```training```. The only difference that only the testing data modalities are required. Relevant functions are ```createTesting()``` and ```createTestLayer()```. @@ -222,10 +223,18 @@ createTestLayer(testing = testing, test_data = multi_omics$testing$proteinexpr) ``` -- An upset plot of the training data: Visualize patient overlap across layers. +A look on testing data. + +```{r basic_test_data, include=TRUE, eval=TRUE} +data_list <- extractData(object = testing) +str(object = data_list, max.level = 1) +``` + +An upset plot of the training data: Visualize patient overlap across layers. ```{r upsetplot_new, include=TRUE, eval=TRUE, } upsetplot(object = testing, order.by = "freq") +# See also extractData(testing = testing) ``` - Predict the testing object. @@ -267,7 +276,6 @@ perf_overlapping <- sapply(X = actual_pred[complete.cases(actual_pred), print(perf_overlapping) ``` -Note that our example is based on simulated data for usage illustration; only one run is not enough to appreciate the performances of our models. # E - Interface and wrapping # @@ -371,7 +379,7 @@ library(knitr) # Create a data frame data <- data.frame( - Leaner = c("weightedMeanLearner", "bestSpecificLearner"), + Leaner = c("weightedMeanLearner", "bestLayerLearner"), Description = c("The weighted mean meta learner. It uses meta data to estimate the weights of the modality-specific models", "The best layer-specific model is used as meta model.") ) diff --git a/README.md b/README.md index 7c20032..0d04fcb 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,12 @@ +--- +title: "fuseMLR" +author: Cesaire J. K. Fouodo +output: + md_document: + variant: gfm + preserve_yaml: true +--- + [![R-CMD-check](https://github.com/imbs-hl/fuseMLR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/imbs-hl/fuseMLR/actions/workflows/R-CMD-check.yaml) @@ -164,7 +173,7 @@ createTrainLayer(training = training, mtry = 1L, na.action = "na.learn"), param_pred_list = list(), - na_rm = FALSE) + na_action = "na.keep") ``` ## Training : training @@ -192,7 +201,7 @@ createTrainLayer(training = training, mtry = 1L, na.action = "na.learn"), param_pred_list = list(), - na_rm = FALSE) + na_action = "na.keep") ``` ## Training : training @@ -220,7 +229,7 @@ createTrainLayer(training = training, mtry = 1L, na.action = "na.learn"), param_pred_list = list(), - na_rm = FALSE) + na_action = "na.keep") ``` ## Training : training @@ -327,6 +336,9 @@ training <- fusemlr(training = training, k = 10L)) ``` + ## Warning in fusemlr(training = training, use_var_sel = TRUE, resampling_method = + ## NULL, : Variable selection has been already performed. + ## Training for fold 1. ## Training on layer geneexpr started. @@ -488,6 +500,7 @@ print(training) ## Status : Trained ## Number of layers: 4 ## Layers trained : 4 + ## Var. sel. used : Yes ## p : 131 | 160 | 160 | 3 ## n : 50 | 50 | 50 | 64 @@ -500,9 +513,34 @@ Use `extractModel` to retrieve the list of stored models and ``` r models_list <- extractModel(training = training) -data_list <- extractData(training = training) +print(str(object = models_list, max.level = 1L)) ``` + ## List of 4 + ## $ geneexpr :List of 14 + ## $ proteinexpr:List of 14 + ## $ methylation:List of 14 + ## $ meta_layer : 'weightedMeanLearner' Named num [1:3] 0.512 0.276 0.212 + ## ..- attr(*, "names")= chr [1:3] "geneexpr" "proteinexpr" "methylation" + ## NULL + +The list of four models (three random forests and one weighted +meta-model) trained on each layer is returned. + +``` r +data_list <- extractData(object = training) +str(object = data_list, max.level = 1) +``` + + ## List of 4 + ## $ geneexpr :'data.frame': 50 obs. of 133 variables: + ## $ proteinexpr:'data.frame': 50 obs. of 162 variables: + ## $ methylation:'data.frame': 50 obs. of 162 variables: + ## $ meta_layer :'data.frame': 64 obs. of 5 variables: + +The list of the four training data (the three simulated training +modalities and the meta-data) is returned. + #### E) Predicting In this section, we create a `testing` instance (from the *Testing* @@ -551,8 +589,20 @@ createTestLayer(testing = testing, ## p : 131 | 160 | 160 ## n : 20 | 20 | 20 -- An upset plot of the training data: Visualize patient overlap across - layers. +A look on testing data. + +``` r +data_list <- extractData(object = testing) +str(object = data_list, max.level = 1) +``` + + ## List of 3 + ## $ geneexpr :'data.frame': 20 obs. of 132 variables: + ## $ proteinexpr:'data.frame': 20 obs. of 161 variables: + ## $ methylation:'data.frame': 20 obs. of 161 variables: + +An upset plot of the training data: Visualize patient overlap across +layers. ``` r upsetplot(object = testing, order.by = "freq") @@ -560,6 +610,10 @@ upsetplot(object = testing, order.by = "freq") ![](README_files/figure-gfm/upsetplot_new-1.png) +``` r +# See also extractData(testing = testing) +``` + - Predict the testing object. ``` r @@ -640,9 +694,6 @@ print(perf_overlapping) ## geneexpr proteinexpr methylation meta_layer ## 0.3093583 0.3448970 0.2932064 0.2993118 -Note that our example is based on simulated data for usage illustration; -only one run is not enough to appreciate the performances of our models. - # E - Interface and wrapping We distinguish common supervised learning arguments from method specific @@ -682,7 +733,7 @@ createTrainLayer(training = training, kernel = 'radial', probability = TRUE), param_pred_list = list(probability = TRUE), - na_rm = TRUE, + na_action = "na.keep", x = "x", y = "y", object = "object", @@ -699,6 +750,7 @@ createTrainLayer(training = training, ## Status : Trained ## Number of layers: 4 ## Layers trained : 4 + ## Var. sel. used : Yes ## p : 131 | 160 | 160 | 3 ## n : 50 | 50 | 50 | 64 @@ -726,6 +778,9 @@ training <- fusemlr(training = training, use_var_sel = TRUE) ``` + ## Warning in fusemlr(training = training, use_var_sel = TRUE): Variable selection + ## has been already performed. + ## Training for fold 1. ## Training on layer geneexpr started. @@ -887,6 +942,7 @@ print(training) ## Status : Trained ## Number of layers: 4 ## Layers trained : 5 + ## Var. sel. used : Yes ## p : 131 | 160 | 160 | 3 ## n : 50 | 50 | 50 | 64 @@ -947,7 +1003,7 @@ implemented the following ones. | Leaner | Description | |:--------------------|:----------------------------------------------------------------------------------------------------------| | weightedMeanLearner | The weighted mean meta learner. It uses meta data to estimate the weights of the modality-specific models | -| bestSpecificLearner | The best layer-specific model is used as meta model. | +| bestLayerLearner | The best layer-specific model is used as meta model. | © 2024 Institute of Medical Biometry and Statistics (IMBS). All rights reserved. diff --git a/vignettes/how_to_use.Rmd b/vignettes/how_to_use.Rmd index 62792f4..60c09c2 100644 --- a/vignettes/how_to_use.Rmd +++ b/vignettes/how_to_use.Rmd @@ -165,9 +165,17 @@ We use `extractModel()` to retrieve the list of stored models and `extractData() ```{r basic_lrnr, include=TRUE, eval=TRUE} models_list <- extractModel(training = training) -data_list <- extractData(training = training) +print(str(object = models_list, max.level = 1L)) ``` +The list of four models (three random forests and one weighted meta-model) trained on each layer is returned. + +```{r basic_data, include=TRUE, eval=TRUE} +data_list <- extractData(object = training) +str(object = data_list, max.level = 1) +``` + +The list of the four training data (the three simulated training modalities and the meta-data) is returned. # D - Predicting # @@ -194,6 +202,13 @@ createTestLayer(testing = testing, test_data = multi_omics$testing$methylation) ``` +A look on testing data. + +```{r basic_test_data, include=TRUE, eval=TRUE} +data_list <- extractData(object = testing) +str(object = data_list, max.level = 1) +``` + We can also generate an upset plot to visualize patient overlap across testing layers. ```{r upsetplot_new, include=TRUE, eval=TRUE, } @@ -238,7 +253,6 @@ perf_overlapping <- sapply(X = actual_pred[complete.cases(actual_pred), print(perf_overlapping) ``` -Note that our example is based on simulated data for usage illustration; only one run is not enough to appreciate the performances of our models. # E - Interface and wrapping # @@ -294,12 +308,12 @@ mylasso <- function (x, y, nlambda = 25, nfolds = 5) { # Perform cross-validation to find the optimal lambda - cv_lasso <- cv.glmnet(x = as.matrix(x), y = y, + cv_lasso <- glmnet::cv.glmnet(x = as.matrix(x), y = y, family = "binomial", type.measure = "deviance", nfolds = nfolds) best_lambda <- cv_lasso$lambda.min - lasso_best <- glmnet(x = as.matrix(x), y = y, + lasso_best <- glmnet::glmnet(x = as.matrix(x), y = y, family = "binomial", alpha = 1, lambda = best_lambda @@ -342,7 +356,7 @@ library(knitr) # Create a data frame data <- data.frame( - Leaner = c("weightedMeanLearner", "bestSpecificLearner"), + Leaner = c("weightedMeanLearner", "bestLayerLearner"), Description = c("The weighted mean meta learner. It uses meta data to estimate the weights of the modality-specific models", "The best layer-specific model is used as meta model.") )