From 0eed9fa335067f9ecd361320a03cedcd7d92f38e Mon Sep 17 00:00:00 2001
From: Cesaire Joris Kuete Fouodo Let us display the training object again to see the update on
+variable level. For each layer, the variable selection results show the chosen
variables.How does fuseMLR work?
Cesaire Fouodo
-2024-12-09
+2024-12-11
@@ -650,6 +650,17 @@ C.2 - Variable selection
#> 53 methylation cg19393006
#> 54 methylation cg12507125
#> 55 methylation cg01442799
+print(training)
+#> Training : training
+#> Problem type : classification
+#> Status : Not trained
+#> Number of layers: 4
+#> Layers trained : 0
+#> p : 19 | 1 | 35
+#> n : 50 | 50 | 50
+#> na.action : na.keep | na.keep | na.keep
C.2 - Train
the subset of selected variables. Here we set
use_var_sel = TRUE
to previously perform variable selection
before model training.
set.seed(5462)
-fusemlr(training = training,
- use_var_sel = TRUE)
-#> Creating fold predictions.
-#> ================================================================================
-#> Training on layer geneexpr started.
-#> Training on layer geneexpr done.
-#> Training on layer proteinexpr started.
-#> Training on layer proteinexpr done.
-#> Training on layer methylation started.
-#> Training on layer methylation done.
-#> Training : training
-#> Problem type : classification
-#> Status : Trained
-#> Number of layers: 4
-#> Layers trained : 4
-#> Var. sel. used : Yes
-#> p : 131 | 160 | 367 | 3
-#> n : 50 | 50 | 50 | 69
-#> na.action : na.keep | na.keep | na.keep | na.rm
-print(training)
-#> Training : training
-#> Problem type : classification
-#> Status : Trained
-#> Number of layers: 4
-#> Layers trained : 4
-#> Var. sel. used : Yes
-#> p : 131 | 160 | 367 | 3
-#> n : 50 | 50 | 50 | 69
-#> na.action : na.keep | na.keep | na.keep | na.rm
set.seed(5462)
+fusemlr(training = training,
+ use_var_sel = TRUE)
+#> Creating fold predictions.
+#>
+ |
+ | | 0%
+ |
+ |======= | 10%
+ |
+ |============== | 20%
+ |
+ |===================== | 30%
+ |
+ |============================ | 40%
+ |
+ |=================================== | 50%
+ |
+ |========================================== | 60%
+ |
+ |================================================= | 70%
+ |
+ |======================================================== | 80%
+ |
+ |=============================================================== | 90%
+ |
+ |======================================================================| 100%
+#> Training of base model on layer geneexpr started.
+#> Training of base model on layer geneexpr done.
+#> Training of base model on layer proteinexpr started.
+#> Training of base model on layer proteinexpr done.
+#> Training of base model on layer methylation started.
+#> Training of base model on layer methylation done.
The display of the training object now updates information about the +trained layers.
+print(training)
+#> Training : training
+#> Problem type : classification
+#> Status : Trained
+#> Number of layers: 4
+#> Layers trained : 4
+#> Var. sel. used : Yes
+#> p : 19 | 1 | 35 | 3
+#> n : 50 | 50 | 50 | 69
+#> na.action : na.keep | na.keep | na.keep | na.rm
We can also display a summary of training
to see more
details on layer levels. Information about the training data modality,
the variable selection method and the learner stored at each layer will
be displayed.
summary(training)
-#> Training training
-#> ----------------
-#> Training : training
-#> Problem type : classification
-#> Status : Trained
-#> Number of layers: 4
-#> Layers trained : 4
-#> Var. sel. used : Yes
-#> p : 131 | 160 | 367 | 3
-#> n : 50 | 50 | 50 | 69
-#> na.action : na.keep | na.keep | na.keep | na.rm
-#> ----------------
-#>
-#> Layer geneexpr
-#> ----------------
-#> TrainLayer : geneexpr
-#> Status : Trained
-#> Nb. of objects stored : 4
-#> ----------------
-#> Object(s) on layer geneexpr
-#>
-#> ----------------
-#> TrainData : geneexpr_data
-#> Layer : geneexpr
-#> Ind. id. : IDS
-#> Target : disease
-#> n : 50
-#> Missing : 0
-#> p : 131
-#> ----------------
-#>
-#> ----------------
-#> VarSel : geneexpr_varsel
-#> TrainLayer : geneexpr
-#> Package : Boruta
-#> Function : Boruta
-#> ----------------
-#>
-#> ----------------
-#> Learner : geneexpr_lrner
-#> TrainLayer : geneexpr
-#> Package : ranger
-#> Learn function : ranger
-#> ----------------
-#>
-#>
-#> Layer proteinexpr
-#> ----------------
-#> TrainLayer : proteinexpr
-#> Status : Trained
-#> Nb. of objects stored : 4
-#> ----------------
-#> Object(s) on layer proteinexpr
-#>
-#> ----------------
-#> TrainData : proteinexpr_data
-#> Layer : proteinexpr
-#> Ind. id. : IDS
-#> Target : disease
-#> n : 50
-#> Missing : 0
-#> p : 160
-#> ----------------
-#>
-#> ----------------
-#> VarSel : proteinexpr_varsel
-#> TrainLayer : proteinexpr
-#> Package : Boruta
-#> Function : Boruta
-#> ----------------
-#>
-#> ----------------
-#> Learner : proteinexpr_lrner
-#> TrainLayer : proteinexpr
-#> Package : ranger
-#> Learn function : ranger
-#> ----------------
-#>
-#>
-#> Layer methylation
-#> ----------------
-#> TrainLayer : methylation
-#> Status : Trained
-#> Nb. of objects stored : 4
-#> ----------------
-#> Object(s) on layer methylation
-#>
-#> ----------------
-#> TrainData : methylation_data
-#> Layer : methylation
-#> Ind. id. : IDS
-#> Target : disease
-#> n : 50
-#> Missing : 0
-#> p : 367
-#> ----------------
-#>
-#> ----------------
-#> VarSel : methylation_varsel
-#> TrainLayer : methylation
-#> Package : Boruta
-#> Function : Boruta
-#> ----------------
-#>
-#> ----------------
-#> Learner : methylation_lrner
-#> TrainLayer : methylation
-#> Package : ranger
-#> Learn function : ranger
-#> ----------------
-#>
-#>
-#> MetaLayer
-#> ----------------
-#> TrainMetaLayer : meta_layer
-#> Status : Trained
-#> Nb. of objects stored : 3
-#>
-#> ----------------
-#> Object(s) on MetaLayer
-#>
-#> ----------------
-#> Learner : meta_layer_lrner
-#> TrainLayer : meta_layer
-#> Learn function : weightedMeanLearner
-#> ----------------
-#>
-#> ----------------
-#> TrainData : modality-specific predictions
-#> Layer : meta_layer
-#> Ind. id. : IDS
-#> Target : disease
-#> n : 69
-#> Missing : 43
-#> p : 3
-#> ----------------
summary(training)
+#> Training training
+#> ----------------
+#> Training : training
+#> Problem type : classification
+#> Status : Trained
+#> Number of layers: 4
+#> Layers trained : 4
+#> Var. sel. used : Yes
+#> p : 19 | 1 | 35 | 3
+#> n : 50 | 50 | 50 | 69
+#> na.action : na.keep | na.keep | na.keep | na.rm
+#> ----------------
+#>
+#> Layer geneexpr
+#> ----------------
+#> TrainLayer : geneexpr
+#> Status : Trained
+#> Nb. of objects stored : 4
+#> ----------------
+#> Object(s) on layer geneexpr
+#>
+#> ----------------
+#> TrainData : geneexpr_data
+#> Layer : geneexpr
+#> Ind. id. : IDS
+#> Target : disease
+#> n : 50
+#> Missing : 0
+#> p : 19
+#> ----------------
+#>
+#> ----------------
+#> VarSel : geneexpr_varsel
+#> TrainLayer : geneexpr
+#> Package : Boruta
+#> Function : Boruta
+#> ----------------
+#>
+#> ----------------
+#> Learner : geneexpr_lrner
+#> TrainLayer : geneexpr
+#> Package : ranger
+#> Learn function : ranger
+#> ----------------
+#>
+#>
+#> Layer proteinexpr
+#> ----------------
+#> TrainLayer : proteinexpr
+#> Status : Trained
+#> Nb. of objects stored : 4
+#> ----------------
+#> Object(s) on layer proteinexpr
+#>
+#> ----------------
+#> TrainData : proteinexpr_data
+#> Layer : proteinexpr
+#> Ind. id. : IDS
+#> Target : disease
+#> n : 50
+#> Missing : 0
+#> p : 1
+#> ----------------
+#>
+#> ----------------
+#> VarSel : proteinexpr_varsel
+#> TrainLayer : proteinexpr
+#> Package : Boruta
+#> Function : Boruta
+#> ----------------
+#>
+#> ----------------
+#> Learner : proteinexpr_lrner
+#> TrainLayer : proteinexpr
+#> Package : ranger
+#> Learn function : ranger
+#> ----------------
+#>
+#>
+#> Layer methylation
+#> ----------------
+#> TrainLayer : methylation
+#> Status : Trained
+#> Nb. of objects stored : 4
+#> ----------------
+#> Object(s) on layer methylation
+#>
+#> ----------------
+#> TrainData : methylation_data
+#> Layer : methylation
+#> Ind. id. : IDS
+#> Target : disease
+#> n : 50
+#> Missing : 0
+#> p : 35
+#> ----------------
+#>
+#> ----------------
+#> VarSel : methylation_varsel
+#> TrainLayer : methylation
+#> Package : Boruta
+#> Function : Boruta
+#> ----------------
+#>
+#> ----------------
+#> Learner : methylation_lrner
+#> TrainLayer : methylation
+#> Package : ranger
+#> Learn function : ranger
+#> ----------------
+#>
+#>
+#> MetaLayer
+#> ----------------
+#> TrainMetaLayer : meta_layer
+#> Status : Trained
+#> Nb. of objects stored : 3
+#>
+#> ----------------
+#> Object(s) on MetaLayer
+#>
+#> ----------------
+#> Learner : meta_layer_lrner
+#> TrainLayer : meta_layer
+#> Learn function : weightedMeanLearner
+#> ----------------
+#>
+#> ----------------
+#> TrainData : modality-specific predictions
+#> Layer : meta_layer
+#> Ind. id. : IDS
+#> Target : disease
+#> n : 69
+#> Missing : 43
+#> p : 3
+#> ----------------
We use extractModel()
to retrieve the list of stored
models and extractData()
to retrieve training data.
models_list <- extractModel(training = training)
-str(object = models_list, max.level = 1L)
-#> List of 4
-#> $ geneexpr :List of 14
-#> $ proteinexpr:List of 14
-#> $ methylation:List of 14
-#> $ meta_layer : 'weightedMeanLearner' Named num [1:3] 0.599 0.157 0.244
-#> ..- attr(*, "names")= chr [1:3] "geneexpr" "proteinexpr" "methylation"
models_list <- extractModel(training = training)
+str(object = models_list, max.level = 1L)
+#> List of 4
+#> $ geneexpr :List of 14
+#> $ proteinexpr:List of 14
+#> $ methylation:List of 14
+#> $ meta_layer : 'weightedMeanLearner' Named num [1:3] 0.599 0.157 0.244
+#> ..- attr(*, "names")= chr [1:3] "geneexpr" "proteinexpr" "methylation"
Three random forests and one weighted meta-model trained on each layer are returned. The smallest weight is assigned to protein abundance, while the highest is given to gene expression.
-data_list <- extractData(object = training)
-str(object = data_list, max.level = 1)
-#> List of 4
-#> $ geneexpr :'data.frame': 50 obs. of 133 variables:
-#> $ proteinexpr:'data.frame': 50 obs. of 162 variables:
-#> $ methylation:'data.frame': 50 obs. of 369 variables:
-#> $ meta_layer :'data.frame': 69 obs. of 5 variables:
data_list <- extractData(object = training)
+str(object = data_list, max.level = 1)
+#> List of 4
+#> $ geneexpr :'data.frame': 50 obs. of 21 variables:
+#> $ proteinexpr:'data.frame': 50 obs. of 3 variables:
+#> $ methylation:'data.frame': 50 obs. of 37 variables:
+#> $ meta_layer :'data.frame': 69 obs. of 5 variables:
The three simulated training modalities and the meta-data are returned.
@@ -861,135 +887,135 @@training
. Only the testing data modalities
are required. Relevant functions are createTesting()
and
createTestLayer()
.
-
-# Create gene expression layer
-createTestLayer(testing = testing,
- test_layer_id = "geneexpr",
- test_data = multi_omics$testing$geneexpr)
-#> Testing : testing
-#> Number of layers: 1
-#> p : 131
-#> n : 20
# Create gene protein abundance layer
-createTestLayer(testing = testing,
- test_layer_id = "proteinexpr",
- test_data = multi_omics$testing$proteinexpr)
-#> Testing : testing
-#> Number of layers: 2
-#> p : 131 | 160
-#> n : 20 | 20
# Create methylation layer
+
+# Create gene expression layer
createTestLayer(testing = testing,
- test_layer_id = "methylation",
- test_data = multi_omics$testing$methylation)
+ test_layer_id = "geneexpr",
+ test_data = multi_omics$testing$geneexpr)
#> Testing : testing
-#> Number of layers: 3
-#> p : 131 | 160 | 367
-#> n : 20 | 20 | 20
+#> Number of layers: 1
+#> p : 131
+#> n : 20
# Create gene protein abundance layer
+createTestLayer(testing = testing,
+ test_layer_id = "proteinexpr",
+ test_data = multi_omics$testing$proteinexpr)
+#> Testing : testing
+#> Number of layers: 2
+#> p : 131 | 160
+#> n : 20 | 20
# Create methylation layer
+createTestLayer(testing = testing,
+ test_layer_id = "methylation",
+ test_data = multi_omics$testing$methylation)
+#> Testing : testing
+#> Number of layers: 3
+#> p : 131 | 160 | 367
+#> n : 20 | 20 | 20
A summary of testing
.
summary(testing)
-#> Testing testing
-#> ----------------
-#> Testing : testing
-#> Number of layers: 3
-#> p : 131 | 160 | 367
-#> n : 20 | 20 | 20
-#> ----------------
-#>
-#> Class : TestData
-#> name : geneexpr_data
-#> ind. id. : IDS
-#> n : 20
-#> p : 132
-#>
-#>
-#> Class : TestData
-#> name : proteinexpr_data
-#> ind. id. : IDS
-#> n : 20
-#> p : 161
-#>
-#>
-#> Class : TestData
-#> name : methylation_data
-#> ind. id. : IDS
-#> n : 20
-#> p : 368
summary(testing)
+#> Testing testing
+#> ----------------
+#> Testing : testing
+#> Number of layers: 3
+#> p : 131 | 160 | 367
+#> n : 20 | 20 | 20
+#> ----------------
+#>
+#> Class : TestData
+#> name : geneexpr_data
+#> ind. id. : IDS
+#> n : 20
+#> p : 132
+#>
+#>
+#> Class : TestData
+#> name : proteinexpr_data
+#> ind. id. : IDS
+#> n : 20
+#> p : 161
+#>
+#>
+#> Class : TestData
+#> name : methylation_data
+#> ind. id. : IDS
+#> n : 20
+#> p : 368
A look on testing data.
-data_list <- extractData(object = testing)
-str(object = data_list, max.level = 1)
-#> List of 3
-#> $ geneexpr :'data.frame': 20 obs. of 132 variables:
-#> $ proteinexpr:'data.frame': 20 obs. of 161 variables:
-#> $ methylation:'data.frame': 20 obs. of 368 variables:
data_list <- extractData(object = testing)
+str(object = data_list, max.level = 1)
+#> List of 3
+#> $ geneexpr :'data.frame': 20 obs. of 132 variables:
+#> $ proteinexpr:'data.frame': 20 obs. of 161 variables:
+#> $ methylation:'data.frame': 20 obs. of 368 variables:
We can also generate an upset plot to visualize patient overlap across testing layers.
- +Function predict()
is available for predicting.
predictions <- predict(object = training, testing = testing)
-print(predictions)
-#> $predicting
-#> Predicting : testing
-#> Nb. layers : 4
-#>
-#> $predicted_values
-#> IDS geneexpr proteinexpr methylation meta_layer
-#> 1 participant100 NA 0.6326063 0.1251825 0.3242845
-#> 2 participant20 NA NA 0.1571294 0.1571294
-#> 3 participant24 0.7400333 0.6601421 0.6843302 0.7138681
-#> 4 participant25 0.3868008 NA NA 0.3868008
-#> 5 participant27 0.3497389 NA 0.3822556 0.3591507
-#> 6 participant28 0.6292540 0.1459032 NA 0.5285877
-#> 7 participant3 0.6919667 0.8694921 NA 0.7289395
-#> 8 participant32 0.1178167 0.4782968 0.2063500 0.1961747
-#> 9 participant34 0.5838603 0.8426365 0.7550254 0.6663532
-#> 10 participant39 0.5797437 NA 0.7959706 0.6423299
-#> 11 participant42 0.7620048 0.5215286 0.4115976 0.6386829
-#> 12 participant51 NA 0.2175794 NA 0.2175794
-#> 13 participant53 NA 0.5215286 NA 0.5215286
-#> 14 participant54 0.3963286 NA NA 0.3963286
-#> 15 participant55 0.4559016 NA NA 0.4559016
-#> 16 participant6 NA 0.2678444 0.7796278 0.5788153
-#> 17 participant63 NA 0.5215286 0.2305881 0.3447467
-#> 18 participant64 0.2425183 0.8734579 NA 0.3739225
-#> 19 participant68 0.6432230 NA 0.8299405 0.6972678
-#> 20 participant71 0.5282675 0.1441889 0.5876659 0.4822686
-#> 21 participant75 NA 0.6960524 0.2517754 0.4260999
-#> 22 participant77 0.2102119 0.9070841 0.1535889 0.3061458
-#> 23 participant79 0.3001992 0.2175794 NA 0.2829922
-#> 24 participant81 0.2856817 0.3816246 0.2459389 0.2910988
-#> 25 participant84 0.7755270 0.4325476 0.6476595 0.6903327
-#> 26 participant86 0.5468381 NA 0.6990262 0.5908885
-#> 27 participant94 0.4678897 0.4060643 0.7376230 0.5239321
-#> 28 participant97 NA NA 0.2271817 0.2271817
-#> 29 participant98 NA 0.1441889 0.1903873 0.1722601
predictions <- predict(object = training, testing = testing)
+print(predictions)
+#> $predicting
+#> Predicting : testing
+#> Nb. layers : 4
+#>
+#> $predicted_values
+#> IDS geneexpr proteinexpr methylation meta_layer
+#> 1 participant100 NA 0.6326063 0.1251825 0.3242845
+#> 2 participant20 NA NA 0.1571294 0.1571294
+#> 3 participant24 0.7400333 0.6601421 0.6843302 0.7138681
+#> 4 participant25 0.3868008 NA NA 0.3868008
+#> 5 participant27 0.3497389 NA 0.3822556 0.3591507
+#> 6 participant28 0.6292540 0.1459032 NA 0.5285877
+#> 7 participant3 0.6919667 0.8694921 NA 0.7289395
+#> 8 participant32 0.1178167 0.4782968 0.2063500 0.1961747
+#> 9 participant34 0.5838603 0.8426365 0.7550254 0.6663532
+#> 10 participant39 0.5797437 NA 0.7959706 0.6423299
+#> 11 participant42 0.7620048 0.5215286 0.4115976 0.6386829
+#> 12 participant51 NA 0.2175794 NA 0.2175794
+#> 13 participant53 NA 0.5215286 NA 0.5215286
+#> 14 participant54 0.3963286 NA NA 0.3963286
+#> 15 participant55 0.4559016 NA NA 0.4559016
+#> 16 participant6 NA 0.2678444 0.7796278 0.5788153
+#> 17 participant63 NA 0.5215286 0.2305881 0.3447467
+#> 18 participant64 0.2425183 0.8734579 NA 0.3739225
+#> 19 participant68 0.6432230 NA 0.8299405 0.6972678
+#> 20 participant71 0.5282675 0.1441889 0.5876659 0.4822686
+#> 21 participant75 NA 0.6960524 0.2517754 0.4260999
+#> 22 participant77 0.2102119 0.9070841 0.1535889 0.3061458
+#> 23 participant79 0.3001992 0.2175794 NA 0.2829922
+#> 24 participant81 0.2856817 0.3816246 0.2459389 0.2910988
+#> 25 participant84 0.7755270 0.4325476 0.6476595 0.6903327
+#> 26 participant86 0.5468381 NA 0.6990262 0.5908885
+#> 27 participant94 0.4678897 0.4060643 0.7376230 0.5239321
+#> 28 participant97 NA NA 0.2271817 0.2271817
+#> 29 participant98 NA 0.1441889 0.1903873 0.1722601
Prediction performances for layer-specific levels and the meta-layer are estimated. We use the Brier Score (BS) to assess calibration performance and the Area Under the Curve (AUC) to evaluate classification accuracy.
-pred_values <- predictions$predicted_values
-actual_pred <- merge(x = pred_values,
- y = multi_omics$testing$target,
- by = "IDS",
- all.y = TRUE)
-y <- as.numeric(actual_pred$disease == "1")
-
-# On all patients
-perf_bs <- sapply(X = actual_pred[ , 2L:5L], FUN = function (my_pred) {
- bs <- mean((y[complete.cases(my_pred)] - my_pred[complete.cases(my_pred)])^2)
- roc_obj <- pROC::roc(y[complete.cases(my_pred)], my_pred[complete.cases(my_pred)])
- auc <- pROC::auc(roc_obj)
- performances = rbind(bs, auc)
- return(performances)
-})
-rownames(perf_bs) <- c("BS", "AUC")
-print(perf_bs)
-#> geneexpr proteinexpr methylation meta_layer
-#> BS 0.1304363 0.3260078 0.07980678 0.1286471
-#> AUC 1.0000000 0.5350000 1.00000000 1.0000000
pred_values <- predictions$predicted_values
+actual_pred <- merge(x = pred_values,
+ y = multi_omics$testing$target,
+ by = "IDS",
+ all.y = TRUE)
+y <- as.numeric(actual_pred$disease == "1")
+
+# On all patients
+perf_bs <- sapply(X = actual_pred[ , 2L:5L], FUN = function (my_pred) {
+ bs <- mean((y[complete.cases(my_pred)] - my_pred[complete.cases(my_pred)])^2)
+ roc_obj <- pROC::roc(y[complete.cases(my_pred)], my_pred[complete.cases(my_pred)])
+ auc <- pROC::auc(roc_obj)
+ performances = rbind(bs, auc)
+ return(performances)
+})
+rownames(perf_bs) <- c("BS", "AUC")
+print(perf_bs)
+#> geneexpr proteinexpr methylation meta_layer
+#> BS 0.1304363 0.3260078 0.07980678 0.1286471
+#> AUC 1.0000000 0.5350000 1.00000000 1.0000000
As expected, the performance of the meta-learner in terms of Brier Score falls between the worst and best modality-specific performance measures. For AUC, the meta-learner performs as well as the best @@ -1042,39 +1068,39 @@
createTrainLayer
to extract the predicted values. Similar
arguments are also available for the createTrainMetaLayer
function to generate meta-layer.
-# Re-create the gene expression layer with support vector machine as learner.
-createTrainLayer(training = training,
- train_layer_id = "geneexpr",
- train_data = multi_omics$training$geneexpr,
- varsel_package = "Boruta",
- varsel_fct = "Boruta",
- varsel_param = list(num.trees = 1000L,
- mtry = 3L,
- probability = TRUE),
- lrner_package = "e1071",
- lrn_fct = "svm",
- param_train_list = list(type = 'C-classification',
- kernel = 'radial',
- probability = TRUE),
- param_pred_list = list(probability = TRUE),
- na_action = "na.rm",
- x_lrn = "x",
- y_lrn = "y",
- object = "object",
- data = "newdata", # Name discrepancy resolved.
- extract_pred_fct = function (pred) {
- pred <- attr(pred, "probabilities")
- return(pred[ , 1L])
- }
-)
-# Variable selection
-set.seed(5467)
-var_sel_res <- varSelection(training = training)
-set.seed(5462)
-training <- fusemlr(training = training,
- use_var_sel = TRUE)
-
-print(training)
# Re-create the gene expression layer with support vector machine as learner.
+createTrainLayer(training = training,
+ train_layer_id = "geneexpr",
+ train_data = multi_omics$training$geneexpr,
+ varsel_package = "Boruta",
+ varsel_fct = "Boruta",
+ varsel_param = list(num.trees = 1000L,
+ mtry = 3L,
+ probability = TRUE),
+ lrner_package = "e1071",
+ lrn_fct = "svm",
+ param_train_list = list(type = 'C-classification',
+ kernel = 'radial',
+ probability = TRUE),
+ param_pred_list = list(probability = TRUE),
+ na_action = "na.rm",
+ x_lrn = "x",
+ y_lrn = "y",
+ object = "object",
+ data = "newdata", # Name discrepancy resolved.
+ extract_pred_fct = function (pred) {
+ pred <- attr(pred, "probabilities")
+ return(pred[ , 1L])
+ }
+)
+# Variable selection
+set.seed(5467)
+var_sel_res <- varSelection(training = training)
+set.seed(5462)
+training <- fusemlr(training = training,
+ use_var_sel = TRUE)
+
+print(training)
# We wrap the original functions
-mylasso <- function (x, y,
- nlambda = 25,
- nfolds = 5) {
- # Perform cross-validation to find the optimal lambda
- cv_lasso <- glmnet::cv.glmnet(x = as.matrix(x), y = y,
- family = "binomial",
- type.measure = "deviance",
- nfolds = nfolds)
- best_lambda <- cv_lasso$lambda.min
- lasso_best <- glmnet::glmnet(x = as.matrix(x), y = y,
- family = "binomial",
- alpha = 1,
- lambda = best_lambda
- )
- lasso_model <- list(model = lasso_best)
- class(lasso_model) <- "mylasso"
- return(lasso_model)
-}
# We wrap the original functions
+mylasso <- function (x, y,
+ nlambda = 25,
+ nfolds = 5) {
+ # Perform cross-validation to find the optimal lambda
+ cv_lasso <- glmnet::cv.glmnet(x = as.matrix(x), y = y,
+ family = "binomial",
+ type.measure = "deviance",
+ nfolds = nfolds)
+ best_lambda <- cv_lasso$lambda.min
+ lasso_best <- glmnet::glmnet(x = as.matrix(x), y = y,
+ family = "binomial",
+ alpha = 1,
+ lambda = best_lambda
+ )
+ lasso_model <- list(model = lasso_best)
+ class(lasso_model) <- "mylasso"
+ return(lasso_model)
+}
predict
.# We extend the generic predict function mylasso.
-predict.mylasso <- function (object, data) {
- glmnet_pred <- predict(object = object$model,
- newx = as.matrix(data),
- type = "response",
- s = object$model$lambda)
- return(as.vector(glmnet_pred))
-}
-
-# Re-create the gene expression layer with support vector machine as learner.
-createTrainMetaLayer(training = training,
- meta_layer_id = "meta_layer",
- lrner_package = NULL,
- lrn_fct = "mylasso",
- param_train_list = list(nlambda = 100L),
- na_action = "na.impute")
-set.seed(5462)
-training <- fusemlr(training = training,
- use_var_sel = TRUE)
-print(training)
# We extend the generic predict function mylasso.
+predict.mylasso <- function (object, data) {
+ glmnet_pred <- predict(object = object$model,
+ newx = as.matrix(data),
+ type = "response",
+ s = object$model$lambda)
+ return(as.vector(glmnet_pred))
+}
+
+# Re-create the gene expression layer with support vector machine as learner.
+createTrainMetaLayer(training = training,
+ meta_layer_id = "meta_layer",
+ lrner_package = NULL,
+ lrn_fct = "mylasso",
+ param_train_list = list(nlambda = 100L),
+ na_action = "na.impute")
+set.seed(5462)
+training <- fusemlr(training = training,
+ use_var_sel = TRUE)
+print(training)
# Re-create the gene expression layer with support vector machine as learner.
-createTrainMetaLayer(training = training,
- meta_layer_id = "meta_layer",
- lrner_package = NULL,
- lrn_fct = "mylasso",
- param_train_list = list(nlambda = 100L),
- na_action = "na.impute")
-set.seed(5462)
-training <- fusemlr(training = training,
- use_var_sel = TRUE)
-print(training)
# Re-create the gene expression layer with support vector machine as learner.
+createTrainMetaLayer(training = training,
+ meta_layer_id = "meta_layer",
+ lrner_package = NULL,
+ lrn_fct = "mylasso",
+ param_train_list = list(nlambda = 100L),
+ na_action = "na.impute")
+set.seed(5462)
+training <- fusemlr(training = training,
+ use_var_sel = TRUE)
+print(training)