From 0eed9fa335067f9ecd361320a03cedcd7d92f38e Mon Sep 17 00:00:00 2001 From: Cesaire Joris Kuete Fouodo Date: Wed, 11 Dec 2024 21:19:27 +0100 Subject: [PATCH] on outputs --- doc/fuseMLR.html | 794 ++++++++++++++++++++++++----------------------- 1 file changed, 410 insertions(+), 384 deletions(-) diff --git a/doc/fuseMLR.html b/doc/fuseMLR.html index 4af5a3d..8e721e8 100644 --- a/doc/fuseMLR.html +++ b/doc/fuseMLR.html @@ -12,7 +12,7 @@ - + How does fuseMLR work? @@ -340,7 +340,7 @@

How does fuseMLR work?

Cesaire Fouodo

-

2024-12-09

+

2024-12-11

@@ -650,6 +650,17 @@

C.2 - Variable selection

#> 53 methylation cg19393006 #> 54 methylation cg12507125 #> 55 methylation cg01442799 +

Let us display the training object again to see the update on +variable level.

+
print(training)
+#> Training        : training
+#> Problem type    : classification
+#> Status          : Not trained
+#> Number of layers: 4
+#> Layers trained  : 0
+#> p               : 19 |  1 | 35
+#> n               : 50 | 50 | 50
+#> na.action       : na.keep | na.keep | na.keep

For each layer, the variable selection results show the chosen variables.

@@ -659,197 +670,212 @@

C.2 - Train

the subset of selected variables. Here we set use_var_sel = TRUE to previously perform variable selection before model training.

-
set.seed(5462)
-fusemlr(training = training,
-        use_var_sel = TRUE)
-#> Creating fold predictions.
-#> ================================================================================
-#> Training on layer geneexpr started.
-#> Training on layer geneexpr done.
-#> Training on layer proteinexpr started.
-#> Training on layer proteinexpr done.
-#> Training on layer methylation started.
-#> Training on layer methylation done.
-#> Training        : training
-#> Problem type    : classification
-#> Status          : Trained
-#> Number of layers: 4
-#> Layers trained  : 4
-#> Var. sel. used  : Yes
-#> p               : 131 | 160 | 367 |  3
-#> n               :  50 |  50 |  50 | 69
-#> na.action       : na.keep | na.keep | na.keep | na.rm
-print(training)
-#> Training        : training
-#> Problem type    : classification
-#> Status          : Trained
-#> Number of layers: 4
-#> Layers trained  : 4
-#> Var. sel. used  : Yes
-#> p               : 131 | 160 | 367 |  3
-#> n               :  50 |  50 |  50 | 69
-#> na.action       : na.keep | na.keep | na.keep | na.rm
+
set.seed(5462)
+fusemlr(training = training,
+        use_var_sel = TRUE)
+#> Creating fold predictions.
+#> 
+  |                                                                            
+  |                                                                      |   0%
+  |                                                                            
+  |=======                                                               |  10%
+  |                                                                            
+  |==============                                                        |  20%
+  |                                                                            
+  |=====================                                                 |  30%
+  |                                                                            
+  |============================                                          |  40%
+  |                                                                            
+  |===================================                                   |  50%
+  |                                                                            
+  |==========================================                            |  60%
+  |                                                                            
+  |=================================================                     |  70%
+  |                                                                            
+  |========================================================              |  80%
+  |                                                                            
+  |===============================================================       |  90%
+  |                                                                            
+  |======================================================================| 100%
+#> Training of base model on layer geneexpr started.
+#> Training of base model on layer geneexpr done.
+#> Training of base model on layer proteinexpr started.
+#> Training of base model on layer proteinexpr done.
+#> Training of base model on layer methylation started.
+#> Training of base model on layer methylation done.
+

The display of the training object now updates information about the +trained layers.

+
print(training)
+#> Training        : training
+#> Problem type    : classification
+#> Status          : Trained
+#> Number of layers: 4
+#> Layers trained  : 4
+#> Var. sel. used  : Yes
+#> p               : 19 |  1 | 35 |  3
+#> n               : 50 | 50 | 50 | 69
+#> na.action       : na.keep | na.keep | na.keep | na.rm

We can also display a summary of training to see more details on layer levels. Information about the training data modality, the variable selection method and the learner stored at each layer will be displayed.

-
summary(training)
-#> Training training
-#> ----------------
-#> Training        : training
-#> Problem type    : classification
-#> Status          : Trained
-#> Number of layers: 4
-#> Layers trained  : 4
-#> Var. sel. used  : Yes
-#> p               : 131 | 160 | 367 |  3
-#> n               :  50 |  50 |  50 | 69
-#> na.action       : na.keep | na.keep | na.keep | na.rm
-#> ----------------
-#> 
-#>    Layer geneexpr
-#>    ----------------
-#>    TrainLayer            : geneexpr
-#>    Status                : Trained
-#>    Nb. of objects stored : 4
-#>    ----------------
-#>    Object(s) on layer geneexpr
-#> 
-#>       ----------------
-#>       TrainData : geneexpr_data
-#>       Layer      : geneexpr
-#>       Ind. id.   : IDS
-#>       Target     : disease
-#>       n          : 50
-#>       Missing    : 0
-#>       p          : 131
-#>       ----------------
-#> 
-#>       ----------------
-#>       VarSel           : geneexpr_varsel
-#>       TrainLayer       : geneexpr
-#>       Package          : Boruta
-#>       Function         : Boruta
-#>       ----------------
-#> 
-#>       ----------------
-#>       Learner          : geneexpr_lrner
-#>       TrainLayer       : geneexpr
-#>       Package          : ranger
-#>       Learn function   : ranger
-#>       ----------------
-#> 
-#> 
-#>    Layer proteinexpr
-#>    ----------------
-#>    TrainLayer            : proteinexpr
-#>    Status                : Trained
-#>    Nb. of objects stored : 4
-#>    ----------------
-#>    Object(s) on layer proteinexpr
-#> 
-#>       ----------------
-#>       TrainData : proteinexpr_data
-#>       Layer      : proteinexpr
-#>       Ind. id.   : IDS
-#>       Target     : disease
-#>       n          : 50
-#>       Missing    : 0
-#>       p          : 160
-#>       ----------------
-#> 
-#>       ----------------
-#>       VarSel           : proteinexpr_varsel
-#>       TrainLayer       : proteinexpr
-#>       Package          : Boruta
-#>       Function         : Boruta
-#>       ----------------
-#> 
-#>       ----------------
-#>       Learner          : proteinexpr_lrner
-#>       TrainLayer       : proteinexpr
-#>       Package          : ranger
-#>       Learn function   : ranger
-#>       ----------------
-#> 
-#> 
-#>    Layer methylation
-#>    ----------------
-#>    TrainLayer            : methylation
-#>    Status                : Trained
-#>    Nb. of objects stored : 4
-#>    ----------------
-#>    Object(s) on layer methylation
-#> 
-#>       ----------------
-#>       TrainData : methylation_data
-#>       Layer      : methylation
-#>       Ind. id.   : IDS
-#>       Target     : disease
-#>       n          : 50
-#>       Missing    : 0
-#>       p          : 367
-#>       ----------------
-#> 
-#>       ----------------
-#>       VarSel           : methylation_varsel
-#>       TrainLayer       : methylation
-#>       Package          : Boruta
-#>       Function         : Boruta
-#>       ----------------
-#> 
-#>       ----------------
-#>       Learner          : methylation_lrner
-#>       TrainLayer       : methylation
-#>       Package          : ranger
-#>       Learn function   : ranger
-#>       ----------------
-#> 
-#> 
-#>    MetaLayer
-#>    ----------------
-#>    TrainMetaLayer    : meta_layer
-#>    Status            : Trained
-#>    Nb. of objects stored : 3
-#> 
-#>    ----------------
-#>    Object(s) on MetaLayer
-#> 
-#>       ----------------
-#>       Learner          : meta_layer_lrner
-#>       TrainLayer       : meta_layer
-#>       Learn function   : weightedMeanLearner
-#>       ----------------
-#> 
-#>       ----------------
-#>       TrainData : modality-specific predictions
-#>       Layer      : meta_layer
-#>       Ind. id.   : IDS
-#>       Target     : disease
-#>       n          : 69
-#>       Missing    : 43
-#>       p          : 3
-#>       ----------------
+
summary(training)
+#> Training training
+#> ----------------
+#> Training        : training
+#> Problem type    : classification
+#> Status          : Trained
+#> Number of layers: 4
+#> Layers trained  : 4
+#> Var. sel. used  : Yes
+#> p               : 19 |  1 | 35 |  3
+#> n               : 50 | 50 | 50 | 69
+#> na.action       : na.keep | na.keep | na.keep | na.rm
+#> ----------------
+#> 
+#>    Layer geneexpr
+#>    ----------------
+#>    TrainLayer            : geneexpr
+#>    Status                : Trained
+#>    Nb. of objects stored : 4
+#>    ----------------
+#>    Object(s) on layer geneexpr
+#> 
+#>       ----------------
+#>       TrainData : geneexpr_data
+#>       Layer      : geneexpr
+#>       Ind. id.   : IDS
+#>       Target     : disease
+#>       n          : 50
+#>       Missing    : 0
+#>       p          : 19
+#>       ----------------
+#> 
+#>       ----------------
+#>       VarSel           : geneexpr_varsel
+#>       TrainLayer       : geneexpr
+#>       Package          : Boruta
+#>       Function         : Boruta
+#>       ----------------
+#> 
+#>       ----------------
+#>       Learner          : geneexpr_lrner
+#>       TrainLayer       : geneexpr
+#>       Package          : ranger
+#>       Learn function   : ranger
+#>       ----------------
+#> 
+#> 
+#>    Layer proteinexpr
+#>    ----------------
+#>    TrainLayer            : proteinexpr
+#>    Status                : Trained
+#>    Nb. of objects stored : 4
+#>    ----------------
+#>    Object(s) on layer proteinexpr
+#> 
+#>       ----------------
+#>       TrainData : proteinexpr_data
+#>       Layer      : proteinexpr
+#>       Ind. id.   : IDS
+#>       Target     : disease
+#>       n          : 50
+#>       Missing    : 0
+#>       p          : 1
+#>       ----------------
+#> 
+#>       ----------------
+#>       VarSel           : proteinexpr_varsel
+#>       TrainLayer       : proteinexpr
+#>       Package          : Boruta
+#>       Function         : Boruta
+#>       ----------------
+#> 
+#>       ----------------
+#>       Learner          : proteinexpr_lrner
+#>       TrainLayer       : proteinexpr
+#>       Package          : ranger
+#>       Learn function   : ranger
+#>       ----------------
+#> 
+#> 
+#>    Layer methylation
+#>    ----------------
+#>    TrainLayer            : methylation
+#>    Status                : Trained
+#>    Nb. of objects stored : 4
+#>    ----------------
+#>    Object(s) on layer methylation
+#> 
+#>       ----------------
+#>       TrainData : methylation_data
+#>       Layer      : methylation
+#>       Ind. id.   : IDS
+#>       Target     : disease
+#>       n          : 50
+#>       Missing    : 0
+#>       p          : 35
+#>       ----------------
+#> 
+#>       ----------------
+#>       VarSel           : methylation_varsel
+#>       TrainLayer       : methylation
+#>       Package          : Boruta
+#>       Function         : Boruta
+#>       ----------------
+#> 
+#>       ----------------
+#>       Learner          : methylation_lrner
+#>       TrainLayer       : methylation
+#>       Package          : ranger
+#>       Learn function   : ranger
+#>       ----------------
+#> 
+#> 
+#>    MetaLayer
+#>    ----------------
+#>    TrainMetaLayer    : meta_layer
+#>    Status            : Trained
+#>    Nb. of objects stored : 3
+#> 
+#>    ----------------
+#>    Object(s) on MetaLayer
+#> 
+#>       ----------------
+#>       Learner          : meta_layer_lrner
+#>       TrainLayer       : meta_layer
+#>       Learn function   : weightedMeanLearner
+#>       ----------------
+#> 
+#>       ----------------
+#>       TrainData : modality-specific predictions
+#>       Layer      : meta_layer
+#>       Ind. id.   : IDS
+#>       Target     : disease
+#>       n          : 69
+#>       Missing    : 43
+#>       p          : 3
+#>       ----------------

We use extractModel() to retrieve the list of stored models and extractData() to retrieve training data.

-
models_list <- extractModel(training = training)
-str(object = models_list, max.level = 1L)
-#> List of 4
-#>  $ geneexpr   :List of 14
-#>  $ proteinexpr:List of 14
-#>  $ methylation:List of 14
-#>  $ meta_layer : 'weightedMeanLearner' Named num [1:3] 0.599 0.157 0.244
-#>   ..- attr(*, "names")= chr [1:3] "geneexpr" "proteinexpr" "methylation"
+
models_list <- extractModel(training = training)
+str(object = models_list, max.level = 1L)
+#> List of 4
+#>  $ geneexpr   :List of 14
+#>  $ proteinexpr:List of 14
+#>  $ methylation:List of 14
+#>  $ meta_layer : 'weightedMeanLearner' Named num [1:3] 0.599 0.157 0.244
+#>   ..- attr(*, "names")= chr [1:3] "geneexpr" "proteinexpr" "methylation"

Three random forests and one weighted meta-model trained on each layer are returned. The smallest weight is assigned to protein abundance, while the highest is given to gene expression.

-
data_list <- extractData(object = training)
-str(object = data_list, max.level = 1)
-#> List of 4
-#>  $ geneexpr   :'data.frame': 50 obs. of  133 variables:
-#>  $ proteinexpr:'data.frame': 50 obs. of  162 variables:
-#>  $ methylation:'data.frame': 50 obs. of  369 variables:
-#>  $ meta_layer :'data.frame': 69 obs. of  5 variables:
+
data_list <- extractData(object = training)
+str(object = data_list, max.level = 1)
+#> List of 4
+#>  $ geneexpr   :'data.frame': 50 obs. of  21 variables:
+#>  $ proteinexpr:'data.frame': 50 obs. of  3 variables:
+#>  $ methylation:'data.frame': 50 obs. of  37 variables:
+#>  $ meta_layer :'data.frame': 69 obs. of  5 variables:

The three simulated training modalities and the meta-data are returned.

@@ -861,135 +887,135 @@

D - Predicting

analogously to training. Only the testing data modalities are required. Relevant functions are createTesting() and createTestLayer().

-
# Create testing for predictions
-testing <- createTesting(id = "testing",
-                         ind_col = "IDS")
-
# Create gene expression layer
-createTestLayer(testing = testing,
-                test_layer_id = "geneexpr",
-                test_data = multi_omics$testing$geneexpr)
-#> Testing         : testing
-#> Number of layers: 1
-#> p               : 131
-#> n               :  20
-
# Create gene protein abundance layer
-createTestLayer(testing = testing,
-                test_layer_id = "proteinexpr",
-                test_data = multi_omics$testing$proteinexpr)
-#> Testing         : testing
-#> Number of layers: 2
-#> p               : 131 | 160
-#> n               :  20 |  20
-
# Create methylation layer
+
# Create testing for predictions
+testing <- createTesting(id = "testing",
+                         ind_col = "IDS")
+
# Create gene expression layer
 createTestLayer(testing = testing,
-                test_layer_id = "methylation",
-                test_data = multi_omics$testing$methylation)
+                test_layer_id = "geneexpr",
+                test_data = multi_omics$testing$geneexpr)
 #> Testing         : testing
-#> Number of layers: 3
-#> p               : 131 | 160 | 367
-#> n               :  20 |  20 |  20
+#> Number of layers: 1 +#> p : 131 +#> n : 20
+
# Create gene protein abundance layer
+createTestLayer(testing = testing,
+                test_layer_id = "proteinexpr",
+                test_data = multi_omics$testing$proteinexpr)
+#> Testing         : testing
+#> Number of layers: 2
+#> p               : 131 | 160
+#> n               :  20 |  20
+
# Create methylation layer
+createTestLayer(testing = testing,
+                test_layer_id = "methylation",
+                test_data = multi_omics$testing$methylation)
+#> Testing         : testing
+#> Number of layers: 3
+#> p               : 131 | 160 | 367
+#> n               :  20 |  20 |  20

A summary of testing.

-
summary(testing)
-#> Testing testing
-#> ----------------
-#> Testing         : testing
-#> Number of layers: 3
-#> p               : 131 | 160 | 367
-#> n               :  20 |  20 |  20
-#> ----------------
-#> 
-#> Class     : TestData
-#> name      : geneexpr_data
-#> ind. id.  : IDS
-#> n         : 20
-#> p         : 132
-#> 
-#> 
-#> Class     : TestData
-#> name      : proteinexpr_data
-#> ind. id.  : IDS
-#> n         : 20
-#> p         : 161
-#> 
-#> 
-#> Class     : TestData
-#> name      : methylation_data
-#> ind. id.  : IDS
-#> n         : 20
-#> p         : 368
+
summary(testing)
+#> Testing testing
+#> ----------------
+#> Testing         : testing
+#> Number of layers: 3
+#> p               : 131 | 160 | 367
+#> n               :  20 |  20 |  20
+#> ----------------
+#> 
+#> Class     : TestData
+#> name      : geneexpr_data
+#> ind. id.  : IDS
+#> n         : 20
+#> p         : 132
+#> 
+#> 
+#> Class     : TestData
+#> name      : proteinexpr_data
+#> ind. id.  : IDS
+#> n         : 20
+#> p         : 161
+#> 
+#> 
+#> Class     : TestData
+#> name      : methylation_data
+#> ind. id.  : IDS
+#> n         : 20
+#> p         : 368

A look on testing data.

-
data_list <- extractData(object = testing)
-str(object = data_list, max.level = 1)
-#> List of 3
-#>  $ geneexpr   :'data.frame': 20 obs. of  132 variables:
-#>  $ proteinexpr:'data.frame': 20 obs. of  161 variables:
-#>  $ methylation:'data.frame': 20 obs. of  368 variables:
+
data_list <- extractData(object = testing)
+str(object = data_list, max.level = 1)
+#> List of 3
+#>  $ geneexpr   :'data.frame': 20 obs. of  132 variables:
+#>  $ proteinexpr:'data.frame': 20 obs. of  161 variables:
+#>  $ methylation:'data.frame': 20 obs. of  368 variables:

We can also generate an upset plot to visualize patient overlap across testing layers.

-
upsetplot(object = testing, order.by = "freq")
+
upsetplot(object = testing, order.by = "freq")

Function predict() is available for predicting.

-
predictions <- predict(object = training, testing = testing)
-print(predictions)
-#> $predicting
-#> Predicting   : testing
-#> Nb. layers   : 4
-#> 
-#> $predicted_values
-#>               IDS  geneexpr proteinexpr methylation meta_layer
-#> 1  participant100        NA   0.6326063   0.1251825  0.3242845
-#> 2   participant20        NA          NA   0.1571294  0.1571294
-#> 3   participant24 0.7400333   0.6601421   0.6843302  0.7138681
-#> 4   participant25 0.3868008          NA          NA  0.3868008
-#> 5   participant27 0.3497389          NA   0.3822556  0.3591507
-#> 6   participant28 0.6292540   0.1459032          NA  0.5285877
-#> 7    participant3 0.6919667   0.8694921          NA  0.7289395
-#> 8   participant32 0.1178167   0.4782968   0.2063500  0.1961747
-#> 9   participant34 0.5838603   0.8426365   0.7550254  0.6663532
-#> 10  participant39 0.5797437          NA   0.7959706  0.6423299
-#> 11  participant42 0.7620048   0.5215286   0.4115976  0.6386829
-#> 12  participant51        NA   0.2175794          NA  0.2175794
-#> 13  participant53        NA   0.5215286          NA  0.5215286
-#> 14  participant54 0.3963286          NA          NA  0.3963286
-#> 15  participant55 0.4559016          NA          NA  0.4559016
-#> 16   participant6        NA   0.2678444   0.7796278  0.5788153
-#> 17  participant63        NA   0.5215286   0.2305881  0.3447467
-#> 18  participant64 0.2425183   0.8734579          NA  0.3739225
-#> 19  participant68 0.6432230          NA   0.8299405  0.6972678
-#> 20  participant71 0.5282675   0.1441889   0.5876659  0.4822686
-#> 21  participant75        NA   0.6960524   0.2517754  0.4260999
-#> 22  participant77 0.2102119   0.9070841   0.1535889  0.3061458
-#> 23  participant79 0.3001992   0.2175794          NA  0.2829922
-#> 24  participant81 0.2856817   0.3816246   0.2459389  0.2910988
-#> 25  participant84 0.7755270   0.4325476   0.6476595  0.6903327
-#> 26  participant86 0.5468381          NA   0.6990262  0.5908885
-#> 27  participant94 0.4678897   0.4060643   0.7376230  0.5239321
-#> 28  participant97        NA          NA   0.2271817  0.2271817
-#> 29  participant98        NA   0.1441889   0.1903873  0.1722601
+
predictions <- predict(object = training, testing = testing)
+print(predictions)
+#> $predicting
+#> Predicting   : testing
+#> Nb. layers   : 4
+#> 
+#> $predicted_values
+#>               IDS  geneexpr proteinexpr methylation meta_layer
+#> 1  participant100        NA   0.6326063   0.1251825  0.3242845
+#> 2   participant20        NA          NA   0.1571294  0.1571294
+#> 3   participant24 0.7400333   0.6601421   0.6843302  0.7138681
+#> 4   participant25 0.3868008          NA          NA  0.3868008
+#> 5   participant27 0.3497389          NA   0.3822556  0.3591507
+#> 6   participant28 0.6292540   0.1459032          NA  0.5285877
+#> 7    participant3 0.6919667   0.8694921          NA  0.7289395
+#> 8   participant32 0.1178167   0.4782968   0.2063500  0.1961747
+#> 9   participant34 0.5838603   0.8426365   0.7550254  0.6663532
+#> 10  participant39 0.5797437          NA   0.7959706  0.6423299
+#> 11  participant42 0.7620048   0.5215286   0.4115976  0.6386829
+#> 12  participant51        NA   0.2175794          NA  0.2175794
+#> 13  participant53        NA   0.5215286          NA  0.5215286
+#> 14  participant54 0.3963286          NA          NA  0.3963286
+#> 15  participant55 0.4559016          NA          NA  0.4559016
+#> 16   participant6        NA   0.2678444   0.7796278  0.5788153
+#> 17  participant63        NA   0.5215286   0.2305881  0.3447467
+#> 18  participant64 0.2425183   0.8734579          NA  0.3739225
+#> 19  participant68 0.6432230          NA   0.8299405  0.6972678
+#> 20  participant71 0.5282675   0.1441889   0.5876659  0.4822686
+#> 21  participant75        NA   0.6960524   0.2517754  0.4260999
+#> 22  participant77 0.2102119   0.9070841   0.1535889  0.3061458
+#> 23  participant79 0.3001992   0.2175794          NA  0.2829922
+#> 24  participant81 0.2856817   0.3816246   0.2459389  0.2910988
+#> 25  participant84 0.7755270   0.4325476   0.6476595  0.6903327
+#> 26  participant86 0.5468381          NA   0.6990262  0.5908885
+#> 27  participant94 0.4678897   0.4060643   0.7376230  0.5239321
+#> 28  participant97        NA          NA   0.2271817  0.2271817
+#> 29  participant98        NA   0.1441889   0.1903873  0.1722601

Prediction performances for layer-specific levels and the meta-layer are estimated. We use the Brier Score (BS) to assess calibration performance and the Area Under the Curve (AUC) to evaluate classification accuracy.

-
pred_values <- predictions$predicted_values
-actual_pred <- merge(x = pred_values,
-                     y = multi_omics$testing$target,
-                     by = "IDS",
-                     all.y = TRUE)
-y <- as.numeric(actual_pred$disease == "1")
-
-# On all patients
-perf_bs <- sapply(X = actual_pred[ , 2L:5L], FUN = function (my_pred) {
-  bs <- mean((y[complete.cases(my_pred)] - my_pred[complete.cases(my_pred)])^2)
-  roc_obj <- pROC::roc(y[complete.cases(my_pred)], my_pred[complete.cases(my_pred)])
-  auc <- pROC::auc(roc_obj)
-  performances = rbind(bs, auc)
-  return(performances)
-})
-rownames(perf_bs) <- c("BS", "AUC")
-print(perf_bs)
-#>      geneexpr proteinexpr methylation meta_layer
-#> BS  0.1304363   0.3260078  0.07980678  0.1286471
-#> AUC 1.0000000   0.5350000  1.00000000  1.0000000
+
pred_values <- predictions$predicted_values
+actual_pred <- merge(x = pred_values,
+                     y = multi_omics$testing$target,
+                     by = "IDS",
+                     all.y = TRUE)
+y <- as.numeric(actual_pred$disease == "1")
+
+# On all patients
+perf_bs <- sapply(X = actual_pred[ , 2L:5L], FUN = function (my_pred) {
+  bs <- mean((y[complete.cases(my_pred)] - my_pred[complete.cases(my_pred)])^2)
+  roc_obj <- pROC::roc(y[complete.cases(my_pred)], my_pred[complete.cases(my_pred)])
+  auc <- pROC::auc(roc_obj)
+  performances = rbind(bs, auc)
+  return(performances)
+})
+rownames(perf_bs) <- c("BS", "AUC")
+print(perf_bs)
+#>      geneexpr proteinexpr methylation meta_layer
+#> BS  0.1304363   0.3260078  0.07980678  0.1286471
+#> AUC 1.0000000   0.5350000  1.00000000  1.0000000

As expected, the performance of the meta-learner in terms of Brier Score falls between the worst and best modality-specific performance measures. For AUC, the meta-learner performs as well as the best @@ -1042,39 +1068,39 @@

Interface

createTrainLayer to extract the predicted values. Similar arguments are also available for the createTrainMetaLayer function to generate meta-layer.

-
# Re-create the gene expression layer with support vector machine as learner.
-createTrainLayer(training = training,
-                 train_layer_id = "geneexpr",
-                 train_data = multi_omics$training$geneexpr,
-                 varsel_package = "Boruta",
-                 varsel_fct = "Boruta",
-                 varsel_param = list(num.trees = 1000L,
-                                     mtry = 3L,
-                                     probability = TRUE),
-                 lrner_package = "e1071",
-                 lrn_fct = "svm",
-                 param_train_list = list(type = 'C-classification',
-                                         kernel = 'radial',
-                                         probability = TRUE),
-                 param_pred_list = list(probability = TRUE),
-                 na_action = "na.rm",
-                 x_lrn = "x",
-                 y_lrn = "y",
-                 object = "object",
-                 data = "newdata", # Name discrepancy resolved.
-                 extract_pred_fct = function (pred) { 
-                   pred <- attr(pred, "probabilities")
-                   return(pred[ , 1L])
-                 }
-)
-# Variable selection
-set.seed(5467)
-var_sel_res <- varSelection(training = training)
-set.seed(5462)
-training <- fusemlr(training = training,
-                    use_var_sel = TRUE)
-
-print(training)
+
# Re-create the gene expression layer with support vector machine as learner.
+createTrainLayer(training = training,
+                 train_layer_id = "geneexpr",
+                 train_data = multi_omics$training$geneexpr,
+                 varsel_package = "Boruta",
+                 varsel_fct = "Boruta",
+                 varsel_param = list(num.trees = 1000L,
+                                     mtry = 3L,
+                                     probability = TRUE),
+                 lrner_package = "e1071",
+                 lrn_fct = "svm",
+                 param_train_list = list(type = 'C-classification',
+                                         kernel = 'radial',
+                                         probability = TRUE),
+                 param_pred_list = list(probability = TRUE),
+                 na_action = "na.rm",
+                 x_lrn = "x",
+                 y_lrn = "y",
+                 object = "object",
+                 data = "newdata", # Name discrepancy resolved.
+                 extract_pred_fct = function (pred) { 
+                   pred <- attr(pred, "probabilities")
+                   return(pred[ , 1L])
+                 }
+)
+# Variable selection
+set.seed(5467)
+var_sel_res <- varSelection(training = training)
+set.seed(5462)
+training <- fusemlr(training = training,
+                    use_var_sel = TRUE)
+
+print(training)

Wrapping

@@ -1084,62 +1110,62 @@

Wrapping

-
# We wrap the original functions
-mylasso <- function (x, y,
-                     nlambda = 25,
-                     nfolds = 5) {
-  # Perform cross-validation to find the optimal lambda
-  cv_lasso <- glmnet::cv.glmnet(x = as.matrix(x), y = y,
-                        family = "binomial",
-                        type.measure = "deviance",
-                        nfolds = nfolds)
-  best_lambda <- cv_lasso$lambda.min
-  lasso_best <- glmnet::glmnet(x = as.matrix(x), y = y,
-                       family = "binomial",
-                       alpha = 1,
-                       lambda = best_lambda
-  )
-  lasso_model <- list(model = lasso_best)
-  class(lasso_model) <- "mylasso"
-  return(lasso_model)
-}
+
# We wrap the original functions
+mylasso <- function (x, y,
+                     nlambda = 25,
+                     nfolds = 5) {
+  # Perform cross-validation to find the optimal lambda
+  cv_lasso <- glmnet::cv.glmnet(x = as.matrix(x), y = y,
+                        family = "binomial",
+                        type.measure = "deviance",
+                        nfolds = nfolds)
+  best_lambda <- cv_lasso$lambda.min
+  lasso_best <- glmnet::glmnet(x = as.matrix(x), y = y,
+                       family = "binomial",
+                       alpha = 1,
+                       lambda = best_lambda
+  )
+  lasso_model <- list(model = lasso_best)
+  class(lasso_model) <- "mylasso"
+  return(lasso_model)
+}
-
# We extend the generic predict function mylasso. 
-predict.mylasso <- function (object, data) {
-  glmnet_pred <- predict(object = object$model,
-                         newx = as.matrix(data),
-                         type = "response",
-                         s = object$model$lambda)
-  return(as.vector(glmnet_pred))
-}
-
-# Re-create the gene expression layer with support vector machine as learner.
-createTrainMetaLayer(training = training,
-                     meta_layer_id = "meta_layer",
-                     lrner_package = NULL,
-                     lrn_fct = "mylasso",
-                     param_train_list = list(nlambda = 100L),
-                     na_action = "na.impute")
-set.seed(5462)
-training <- fusemlr(training = training,
-                    use_var_sel = TRUE)
-print(training)
+
# We extend the generic predict function mylasso. 
+predict.mylasso <- function (object, data) {
+  glmnet_pred <- predict(object = object$model,
+                         newx = as.matrix(data),
+                         type = "response",
+                         s = object$model$lambda)
+  return(as.vector(glmnet_pred))
+}
+
+# Re-create the gene expression layer with support vector machine as learner.
+createTrainMetaLayer(training = training,
+                     meta_layer_id = "meta_layer",
+                     lrner_package = NULL,
+                     lrn_fct = "mylasso",
+                     param_train_list = list(nlambda = 100L),
+                     na_action = "na.impute")
+set.seed(5462)
+training <- fusemlr(training = training,
+                    use_var_sel = TRUE)
+print(training)
-
# Re-create the gene expression layer with support vector machine as learner.
-createTrainMetaLayer(training = training,
-                     meta_layer_id = "meta_layer",
-                     lrner_package = NULL,
-                     lrn_fct = "mylasso",
-                     param_train_list = list(nlambda = 100L),
-                     na_action = "na.impute")
-set.seed(5462)
-training <- fusemlr(training = training,
-                    use_var_sel = TRUE)
-print(training)
+
# Re-create the gene expression layer with support vector machine as learner.
+createTrainMetaLayer(training = training,
+                     meta_layer_id = "meta_layer",
+                     lrner_package = NULL,
+                     lrn_fct = "mylasso",
+                     param_train_list = list(nlambda = 100L),
+                     na_action = "na.impute")
+set.seed(5462)
+training <- fusemlr(training = training,
+                    use_var_sel = TRUE)
+print(training)