From 0eed9fa335067f9ecd361320a03cedcd7d92f38e Mon Sep 17 00:00:00 2001
From: Cesaire Joris Kuete Fouodo Let us display the training object again to see the update on
+variable level. For each layer, the variable selection results show the chosen
variables.How does fuseMLR work?
Cesaire Fouodo
@@ -650,6 +650,17 @@ C.2 - Variable selection
#> 53 methylation cg19393006
#> 54 methylation cg12507125
#> 55 methylation cg01442799
+#> Training : training
+#> Problem type : classification
+#> Status : Not trained
+#> Number of layers: 4
+#> Layers trained : 0
+#> p : 19 | 1 | 35
+#> n : 50 | 50 | 50
+#> na.action : na.keep | na.keep | na.keep
C.2 - Train
the subset of selected variables. Here we set
use_var_sel = TRUE
to previously perform variable selection
before model training.
-fusemlr(training = training,
- use_var_sel = TRUE)
-#> Creating fold predictions.
-#> ================================================================================
-#> Training on layer geneexpr started.
-#> Training on layer geneexpr done.
-#> Training on layer proteinexpr started.
-#> Training on layer proteinexpr done.
-#> Training on layer methylation started.
-#> Training on layer methylation done.
-#> Training : training
-#> Problem type : classification
-#> Status : Trained
-#> Number of layers: 4
-#> Layers trained : 4
-#> Var. sel. used : Yes
-#> p : 131 | 160 | 367 | 3
-#> n : 50 | 50 | 50 | 69
-#> na.action : na.keep | na.keep | na.keep | na.rm
-#> Training : training
-#> Problem type : classification
-#> Status : Trained
-#> Number of layers: 4
-#> Layers trained : 4
-#> Var. sel. used : Yes
-#> p : 131 | 160 | 367 | 3
-#> n : 50 | 50 | 50 | 69
-#> na.action : na.keep | na.keep | na.keep | na.rm
+fusemlr(training = training,
+ use_var_sel = TRUE)
+#> Creating fold predictions.
+ |
+ | | 0%
+ |
+ |======= | 10%
+ |
+ |============== | 20%
+ |
+ |===================== | 30%
+ |
+ |============================ | 40%
+ |
+ |=================================== | 50%
+ |
+ |========================================== | 60%
+ |
+ |================================================= | 70%
+ |
+ |======================================================== | 80%
+ |
+ |=============================================================== | 90%
+ |
+ |======================================================================| 100%
+#> Training of base model on layer geneexpr started.
+#> Training of base model on layer geneexpr done.
+#> Training of base model on layer proteinexpr started.
+#> Training of base model on layer proteinexpr done.
+#> Training of base model on layer methylation started.
+#> Training of base model on layer methylation done.
The display of the training object now updates information about the +trained layers.
+#> Training : training
+#> Problem type : classification
+#> Status : Trained
+#> Number of layers: 4
+#> Layers trained : 4
+#> Var. sel. used : Yes
+#> p : 19 | 1 | 35 | 3
+#> n : 50 | 50 | 50 | 69
+#> na.action : na.keep | na.keep | na.keep | na.rm
We can also display a summary of training
to see more
details on layer levels. Information about the training data modality,
the variable selection method and the learner stored at each layer will
be displayed.
-#> Training training
-#> ----------------
-#> Training : training
-#> Problem type : classification
-#> Status : Trained
-#> Number of layers: 4
-#> Layers trained : 4
-#> Var. sel. used : Yes
-#> p : 131 | 160 | 367 | 3
-#> n : 50 | 50 | 50 | 69
-#> na.action : na.keep | na.keep | na.keep | na.rm
-#> ----------------
-#> Layer geneexpr
-#> ----------------
-#> TrainLayer : geneexpr
-#> Status : Trained
-#> Nb. of objects stored : 4
-#> ----------------
-#> Object(s) on layer geneexpr
-#> ----------------
-#> TrainData : geneexpr_data
-#> Layer : geneexpr
-#> Ind. id. : IDS
-#> Target : disease
-#> n : 50
-#> Missing : 0
-#> p : 131
-#> ----------------
-#> ----------------
-#> VarSel : geneexpr_varsel
-#> TrainLayer : geneexpr
-#> Package : Boruta
-#> Function : Boruta
-#> ----------------
-#> ----------------
-#> Learner : geneexpr_lrner
-#> TrainLayer : geneexpr
-#> Package : ranger
-#> Learn function : ranger
-#> ----------------
-#> Layer proteinexpr
-#> ----------------
-#> TrainLayer : proteinexpr
-#> Status : Trained
-#> Nb. of objects stored : 4
-#> ----------------
-#> Object(s) on layer proteinexpr
-#> ----------------
-#> TrainData : proteinexpr_data
-#> Layer : proteinexpr
-#> Ind. id. : IDS
-#> Target : disease
-#> n : 50
-#> Missing : 0
-#> p : 160
-#> ----------------
-#> ----------------
-#> VarSel : proteinexpr_varsel
-#> TrainLayer : proteinexpr
-#> Package : Boruta
-#> Function : Boruta
-#> ----------------
-#> ----------------
-#> Learner : proteinexpr_lrner
-#> TrainLayer : proteinexpr
-#> Package : ranger
-#> Learn function : ranger
-#> ----------------
-#> Layer methylation
-#> ----------------
-#> TrainLayer : methylation
-#> Status : Trained
-#> Nb. of objects stored : 4
-#> ----------------
-#> Object(s) on layer methylation
-#> ----------------
-#> TrainData : methylation_data
-#> Layer : methylation
-#> Ind. id. : IDS
-#> Target : disease
-#> n : 50
-#> Missing : 0
-#> p : 367
-#> ----------------
-#> ----------------
-#> VarSel : methylation_varsel
-#> TrainLayer : methylation
-#> Package : Boruta
-#> Function : Boruta
-#> ----------------
-#> ----------------
-#> Learner : methylation_lrner
-#> TrainLayer : methylation
-#> Package : ranger
-#> Learn function : ranger
-#> ----------------
-#> MetaLayer
-#> ----------------
-#> TrainMetaLayer : meta_layer
-#> Status : Trained
-#> Nb. of objects stored : 3
-#> ----------------
-#> Object(s) on MetaLayer
-#> ----------------
-#> Learner : meta_layer_lrner
-#> TrainLayer : meta_layer
-#> Learn function : weightedMeanLearner
-#> ----------------
-#> ----------------
-#> TrainData : modality-specific predictions
-#> Layer : meta_layer
-#> Ind. id. : IDS
-#> Target : disease
-#> n : 69
-#> Missing : 43
-#> p : 3
-#> ----------------
+#> Training training
+#> ----------------
+#> Training : training
+#> Problem type : classification
+#> Status : Trained
+#> Number of layers: 4
+#> Layers trained : 4
+#> Var. sel. used : Yes
+#> p : 19 | 1 | 35 | 3
+#> n : 50 | 50 | 50 | 69
+#> na.action : na.keep | na.keep | na.keep | na.rm
+#> ----------------
+#> Layer geneexpr
+#> ----------------
+#> TrainLayer : geneexpr
+#> Status : Trained
+#> Nb. of objects stored : 4
+#> ----------------
+#> Object(s) on layer geneexpr
+#> ----------------
+#> TrainData : geneexpr_data
+#> Layer : geneexpr
+#> Ind. id. : IDS
+#> Target : disease
+#> n : 50
+#> Missing : 0
+#> p : 19
+#> ----------------
+#> ----------------
+#> VarSel : geneexpr_varsel
+#> TrainLayer : geneexpr
+#> Package : Boruta
+#> Function : Boruta
+#> ----------------
+#> ----------------
+#> Learner : geneexpr_lrner
+#> TrainLayer : geneexpr
+#> Package : ranger
+#> Learn function : ranger
+#> ----------------
+#> Layer proteinexpr
+#> ----------------
+#> TrainLayer : proteinexpr
+#> Status : Trained
+#> Nb. of objects stored : 4
+#> ----------------
+#> Object(s) on layer proteinexpr
+#> ----------------
+#> TrainData : proteinexpr_data
+#> Layer : proteinexpr
+#> Ind. id. : IDS
+#> Target : disease
+#> n : 50
+#> Missing : 0
+#> p : 1
+#> ----------------
+#> ----------------
+#> VarSel : proteinexpr_varsel
+#> TrainLayer : proteinexpr
+#> Package : Boruta
+#> Function : Boruta
+#> ----------------
+#> ----------------
+#> Learner : proteinexpr_lrner
+#> TrainLayer : proteinexpr
+#> Package : ranger
+#> Learn function : ranger
+#> ----------------
+#> Layer methylation
+#> ----------------
+#> TrainLayer : methylation
+#> Status : Trained
+#> Nb. of objects stored : 4
+#> ----------------
+#> Object(s) on layer methylation
+#> ----------------
+#> TrainData : methylation_data
+#> Layer : methylation
+#> Ind. id. : IDS
+#> Target : disease
+#> n : 50
+#> Missing : 0
+#> p : 35
+#> ----------------
+#> ----------------
+#> VarSel : methylation_varsel
+#> TrainLayer : methylation
+#> Package : Boruta
+#> Function : Boruta
+#> ----------------
+#> ----------------
+#> Learner : methylation_lrner
+#> TrainLayer : methylation
+#> Package : ranger
+#> Learn function : ranger
+#> ----------------
+#> MetaLayer
+#> ----------------
+#> TrainMetaLayer : meta_layer
+#> Status : Trained
+#> Nb. of objects stored : 3
+#> ----------------
+#> Object(s) on MetaLayer
+#> ----------------
+#> Learner : meta_layer_lrner
+#> TrainLayer : meta_layer
+#> Learn function : weightedMeanLearner
+#> ----------------
+#> ----------------
+#> TrainData : modality-specific predictions
+#> Layer : meta_layer
+#> Ind. id. : IDS
+#> Target : disease
+#> n : 69
+#> Missing : 43
+#> p : 3
+#> ----------------
We use extractModel()
to retrieve the list of stored
models and extractData()
to retrieve training data.
models_list <- extractModel(training = training)
-str(object = models_list, max.level = 1L)
-#> List of 4
-#> $ geneexpr :List of 14
-#> $ proteinexpr:List of 14
-#> $ methylation:List of 14
-#> $ meta_layer : 'weightedMeanLearner' Named num [1:3] 0.599 0.157 0.244
-#> ..- attr(*, "names")= chr [1:3] "geneexpr" "proteinexpr" "methylation"
models_list <- extractModel(training = training)
+str(object = models_list, max.level = 1L)
+#> List of 4
+#> $ geneexpr :List of 14
+#> $ proteinexpr:List of 14
+#> $ methylation:List of 14
+#> $ meta_layer : 'weightedMeanLearner' Named num [1:3] 0.599 0.157 0.244
+#> ..- attr(*, "names")= chr [1:3] "geneexpr" "proteinexpr" "methylation"
Three random forests and one weighted meta-model trained on each layer are returned. The smallest weight is assigned to protein abundance, while the highest is given to gene expression.
-data_list <- extractData(object = training)
-str(object = data_list, max.level = 1)
-#> List of 4
-#> $ geneexpr :'data.frame': 50 obs. of 133 variables:
-#> $ proteinexpr:'data.frame': 50 obs. of 162 variables:
-#> $ methylation:'data.frame': 50 obs. of 369 variables:
-#> $ meta_layer :'data.frame': 69 obs. of 5 variables:
data_list <- extractData(object = training)
+str(object = data_list, max.level = 1)
+#> List of 4
+#> $ geneexpr :'data.frame': 50 obs. of 21 variables:
+#> $ proteinexpr:'data.frame': 50 obs. of 3 variables:
+#> $ methylation:'data.frame': 50 obs. of 37 variables:
+#> $ meta_layer :'data.frame': 69 obs. of 5 variables:
The three simulated training modalities and the meta-data are returned.
@@ -861,135 +887,135 @@training
. Only the testing data modalities
are required. Relevant functions are createTesting()
-# Create gene expression layer
-createTestLayer(testing = testing,
- test_layer_id = "geneexpr",
- test_data = multi_omics$testing$geneexpr)
-#> Testing : testing
-#> Number of layers: 1
-#> p : 131
-#> n : 20
# Create gene protein abundance layer
-createTestLayer(testing = testing,
- test_layer_id = "proteinexpr",
- test_data = multi_omics$testing$proteinexpr)
-#> Testing : testing
-#> Number of layers: 2
-#> p : 131 | 160
-#> n : 20 | 20
# Create methylation layer
+# Create gene expression layer
createTestLayer(testing = testing,
- test_layer_id = "methylation",
- test_data = multi_omics$testing$methylation)
+ test_layer_id = "geneexpr",
+ test_data = multi_omics$testing$geneexpr)
#> Testing : testing
-#> Number of layers: 3
-#> p : 131 | 160 | 367
-#> n : 20 | 20 | 20
+#> Number of layers: 1
+#> p : 131
+#> n : 20
# Create gene protein abundance layer
+createTestLayer(testing = testing,
+ test_layer_id = "proteinexpr",
+ test_data = multi_omics$testing$proteinexpr)
+#> Testing : testing
+#> Number of layers: 2
+#> p : 131 | 160
+#> n : 20 | 20
# Create methylation layer
+createTestLayer(testing = testing,
+ test_layer_id = "methylation",
+ test_data = multi_omics$testing$methylation)
+#> Testing : testing
+#> Number of layers: 3
+#> p : 131 | 160 | 367
+#> n : 20 | 20 | 20
A summary of testing
-#> Testing testing
-#> ----------------
-#> Testing : testing
-#> Number of layers: 3
-#> p : 131 | 160 | 367
-#> n : 20 | 20 | 20
-#> ----------------
-#> Class : TestData
-#> name : geneexpr_data
-#> ind. id. : IDS
-#> n : 20
-#> p : 132
-#> Class : TestData
-#> name : proteinexpr_data
-#> ind. id. : IDS
-#> n : 20
-#> p : 161
-#> Class : TestData
-#> name : methylation_data
-#> ind. id. : IDS
-#> n : 20
-#> p : 368
+#> Testing testing
+#> ----------------
+#> Testing : testing
+#> Number of layers: 3
+#> p : 131 | 160 | 367
+#> n : 20 | 20 | 20
+#> ----------------
+#> Class : TestData
+#> name : geneexpr_data
+#> ind. id. : IDS
+#> n : 20
+#> p : 132
+#> Class : TestData
+#> name : proteinexpr_data
+#> ind. id. : IDS
+#> n : 20
+#> p : 161
+#> Class : TestData
+#> name : methylation_data
+#> ind. id. : IDS
+#> n : 20
+#> p : 368
A look on testing data.
-data_list <- extractData(object = testing)
-str(object = data_list, max.level = 1)
-#> List of 3
-#> $ geneexpr :'data.frame': 20 obs. of 132 variables:
-#> $ proteinexpr:'data.frame': 20 obs. of 161 variables:
-#> $ methylation:'data.frame': 20 obs. of 368 variables:
data_list <- extractData(object = testing)
+str(object = data_list, max.level = 1)
+#> List of 3
+#> $ geneexpr :'data.frame': 20 obs. of 132 variables:
+#> $ proteinexpr:'data.frame': 20 obs. of 161 variables:
+#> $ methylation:'data.frame': 20 obs. of 368 variables:
We can also generate an upset plot to visualize patient overlap across testing layers.
- +Function predict()
is available for predicting.
predictions <- predict(object = training, testing = testing)
-#> $predicting
-#> Predicting : testing
-#> Nb. layers : 4
-#> $predicted_values
-#> IDS geneexpr proteinexpr methylation meta_layer
-#> 1 participant100 NA 0.6326063 0.1251825 0.3242845
-#> 2 participant20 NA NA 0.1571294 0.1571294
-#> 3 participant24 0.7400333 0.6601421 0.6843302 0.7138681
-#> 4 participant25 0.3868008 NA NA 0.3868008
-#> 5 participant27 0.3497389 NA 0.3822556 0.3591507
-#> 6 participant28 0.6292540 0.1459032 NA 0.5285877
-#> 7 participant3 0.6919667 0.8694921 NA 0.7289395
-#> 8 participant32 0.1178167 0.4782968 0.2063500 0.1961747
-#> 9 participant34 0.5838603 0.8426365 0.7550254 0.6663532
-#> 10 participant39 0.5797437 NA 0.7959706 0.6423299
-#> 11 participant42 0.7620048 0.5215286 0.4115976 0.6386829
-#> 12 participant51 NA 0.2175794 NA 0.2175794
-#> 13 participant53 NA 0.5215286 NA 0.5215286
-#> 14 participant54 0.3963286 NA NA 0.3963286
-#> 15 participant55 0.4559016 NA NA 0.4559016
-#> 16 participant6 NA 0.2678444 0.7796278 0.5788153
-#> 17 participant63 NA 0.5215286 0.2305881 0.3447467
-#> 18 participant64 0.2425183 0.8734579 NA 0.3739225
-#> 19 participant68 0.6432230 NA 0.8299405 0.6972678
-#> 20 participant71 0.5282675 0.1441889 0.5876659 0.4822686
-#> 21 participant75 NA 0.6960524 0.2517754 0.4260999
-#> 22 participant77 0.2102119 0.9070841 0.1535889 0.3061458
-#> 23 participant79 0.3001992 0.2175794 NA 0.2829922
-#> 24 participant81 0.2856817 0.3816246 0.2459389 0.2910988
-#> 25 participant84 0.7755270 0.4325476 0.6476595 0.6903327
-#> 26 participant86 0.5468381 NA 0.6990262 0.5908885
-#> 27 participant94 0.4678897 0.4060643 0.7376230 0.5239321
-#> 28 participant97 NA NA 0.2271817 0.2271817
-#> 29 participant98 NA 0.1441889 0.1903873 0.1722601
predictions <- predict(object = training, testing = testing)
+#> $predicting
+#> Predicting : testing
+#> Nb. layers : 4
+#> $predicted_values
+#> IDS geneexpr proteinexpr methylation meta_layer
+#> 1 participant100 NA 0.6326063 0.1251825 0.3242845
+#> 2 participant20 NA NA 0.1571294 0.1571294
+#> 3 participant24 0.7400333 0.6601421 0.6843302 0.7138681
+#> 4 participant25 0.3868008 NA NA 0.3868008
+#> 5 participant27 0.3497389 NA 0.3822556 0.3591507
+#> 6 participant28 0.6292540 0.1459032 NA 0.5285877
+#> 7 participant3 0.6919667 0.8694921 NA 0.7289395
+#> 8 participant32 0.1178167 0.4782968 0.2063500 0.1961747
+#> 9 participant34 0.5838603 0.8426365 0.7550254 0.6663532
+#> 10 participant39 0.5797437 NA 0.7959706 0.6423299
+#> 11 participant42 0.7620048 0.5215286 0.4115976 0.6386829
+#> 12 participant51 NA 0.2175794 NA 0.2175794
+#> 13 participant53 NA 0.5215286 NA 0.5215286
+#> 14 participant54 0.3963286 NA NA 0.3963286
+#> 15 participant55 0.4559016 NA NA 0.4559016
+#> 16 participant6 NA 0.2678444 0.7796278 0.5788153
+#> 17 participant63 NA 0.5215286 0.2305881 0.3447467
+#> 18 participant64 0.2425183 0.8734579 NA 0.3739225
+#> 19 participant68 0.6432230 NA 0.8299405 0.6972678
+#> 20 participant71 0.5282675 0.1441889 0.5876659 0.4822686
+#> 21 participant75 NA 0.6960524 0.2517754 0.4260999
+#> 22 participant77 0.2102119 0.9070841 0.1535889 0.3061458
+#> 23 participant79 0.3001992 0.2175794 NA 0.2829922
+#> 24 participant81 0.2856817 0.3816246 0.2459389 0.2910988
+#> 25 participant84 0.7755270 0.4325476 0.6476595 0.6903327
+#> 26 participant86 0.5468381 NA 0.6990262 0.5908885
+#> 27 participant94 0.4678897 0.4060643 0.7376230 0.5239321
+#> 28 participant97 NA NA 0.2271817 0.2271817
+#> 29 participant98 NA 0.1441889 0.1903873 0.1722601
Prediction performances for layer-specific levels and the meta-layer are estimated. We use the Brier Score (BS) to assess calibration performance and the Area Under the Curve (AUC) to evaluate classification accuracy.
-pred_values <- predictions$predicted_values
-actual_pred <- merge(x = pred_values,
- y = multi_omics$testing$target,
- by = "IDS",
- all.y = TRUE)
-y <- as.numeric(actual_pred$disease == "1")
-# On all patients
-perf_bs <- sapply(X = actual_pred[ , 2L:5L], FUN = function (my_pred) {
- bs <- mean((y[complete.cases(my_pred)] - my_pred[complete.cases(my_pred)])^2)
- roc_obj <- pROC::roc(y[complete.cases(my_pred)], my_pred[complete.cases(my_pred)])
- auc <- pROC::auc(roc_obj)
- performances = rbind(bs, auc)
- return(performances)
-rownames(perf_bs) <- c("BS", "AUC")
-#> geneexpr proteinexpr methylation meta_layer
-#> BS 0.1304363 0.3260078 0.07980678 0.1286471
-#> AUC 1.0000000 0.5350000 1.00000000 1.0000000
pred_values <- predictions$predicted_values
+actual_pred <- merge(x = pred_values,
+ y = multi_omics$testing$target,
+ by = "IDS",
+ all.y = TRUE)
+y <- as.numeric(actual_pred$disease == "1")
+# On all patients
+perf_bs <- sapply(X = actual_pred[ , 2L:5L], FUN = function (my_pred) {
+ bs <- mean((y[complete.cases(my_pred)] - my_pred[complete.cases(my_pred)])^2)
+ roc_obj <- pROC::roc(y[complete.cases(my_pred)], my_pred[complete.cases(my_pred)])
+ auc <- pROC::auc(roc_obj)
+ performances = rbind(bs, auc)
+ return(performances)
+rownames(perf_bs) <- c("BS", "AUC")
+#> geneexpr proteinexpr methylation meta_layer
+#> BS 0.1304363 0.3260078 0.07980678 0.1286471
+#> AUC 1.0000000 0.5350000 1.00000000 1.0000000
As expected, the performance of the meta-learner in terms of Brier Score falls between the worst and best modality-specific performance measures. For AUC, the meta-learner performs as well as the best @@ -1042,39 +1068,39 @@
to extract the predicted values. Similar
arguments are also available for the createTrainMetaLayer
function to generate meta-layer.
-# Re-create the gene expression layer with support vector machine as learner.
-createTrainLayer(training = training,
- train_layer_id = "geneexpr",
- train_data = multi_omics$training$geneexpr,
- varsel_package = "Boruta",
- varsel_fct = "Boruta",
- varsel_param = list(num.trees = 1000L,
- mtry = 3L,
- probability = TRUE),
- lrner_package = "e1071",
- lrn_fct = "svm",
- param_train_list = list(type = 'C-classification',
- kernel = 'radial',
- probability = TRUE),
- param_pred_list = list(probability = TRUE),
- na_action = "na.rm",
- x_lrn = "x",
- y_lrn = "y",
- object = "object",
- data = "newdata", # Name discrepancy resolved.
- extract_pred_fct = function (pred) {
- pred <- attr(pred, "probabilities")
- return(pred[ , 1L])
- }
-# Variable selection
-var_sel_res <- varSelection(training = training)
-training <- fusemlr(training = training,
- use_var_sel = TRUE)
# Re-create the gene expression layer with support vector machine as learner.
+createTrainLayer(training = training,
+ train_layer_id = "geneexpr",
+ train_data = multi_omics$training$geneexpr,
+ varsel_package = "Boruta",
+ varsel_fct = "Boruta",
+ varsel_param = list(num.trees = 1000L,
+ mtry = 3L,
+ probability = TRUE),
+ lrner_package = "e1071",
+ lrn_fct = "svm",
+ param_train_list = list(type = 'C-classification',
+ kernel = 'radial',
+ probability = TRUE),
+ param_pred_list = list(probability = TRUE),
+ na_action = "na.rm",
+ x_lrn = "x",
+ y_lrn = "y",
+ object = "object",
+ data = "newdata", # Name discrepancy resolved.
+ extract_pred_fct = function (pred) {
+ pred <- attr(pred, "probabilities")
+ return(pred[ , 1L])
+ }
+# Variable selection
+var_sel_res <- varSelection(training = training)
+training <- fusemlr(training = training,
+ use_var_sel = TRUE)
# We wrap the original functions
-mylasso <- function (x, y,
- nlambda = 25,
- nfolds = 5) {
- # Perform cross-validation to find the optimal lambda
- cv_lasso <- glmnet::cv.glmnet(x = as.matrix(x), y = y,
- family = "binomial",
- type.measure = "deviance",
- nfolds = nfolds)
- best_lambda <- cv_lasso$lambda.min
- lasso_best <- glmnet::glmnet(x = as.matrix(x), y = y,
- family = "binomial",
- alpha = 1,
- lambda = best_lambda
- )
- lasso_model <- list(model = lasso_best)
- class(lasso_model) <- "mylasso"
- return(lasso_model)
# We wrap the original functions
+mylasso <- function (x, y,
+ nlambda = 25,
+ nfolds = 5) {
+ # Perform cross-validation to find the optimal lambda
+ cv_lasso <- glmnet::cv.glmnet(x = as.matrix(x), y = y,
+ family = "binomial",
+ type.measure = "deviance",
+ nfolds = nfolds)
+ best_lambda <- cv_lasso$lambda.min
+ lasso_best <- glmnet::glmnet(x = as.matrix(x), y = y,
+ family = "binomial",
+ alpha = 1,
+ lambda = best_lambda
+ )
+ lasso_model <- list(model = lasso_best)
+ class(lasso_model) <- "mylasso"
+ return(lasso_model)
.# We extend the generic predict function mylasso.
-predict.mylasso <- function (object, data) {
- glmnet_pred <- predict(object = object$model,
- newx = as.matrix(data),
- type = "response",
- s = object$model$lambda)
- return(as.vector(glmnet_pred))
-# Re-create the gene expression layer with support vector machine as learner.
-createTrainMetaLayer(training = training,
- meta_layer_id = "meta_layer",
- lrner_package = NULL,
- lrn_fct = "mylasso",
- param_train_list = list(nlambda = 100L),
- na_action = "na.impute")
-training <- fusemlr(training = training,
- use_var_sel = TRUE)
# We extend the generic predict function mylasso.
+predict.mylasso <- function (object, data) {
+ glmnet_pred <- predict(object = object$model,
+ newx = as.matrix(data),
+ type = "response",
+ s = object$model$lambda)
+ return(as.vector(glmnet_pred))
+# Re-create the gene expression layer with support vector machine as learner.
+createTrainMetaLayer(training = training,
+ meta_layer_id = "meta_layer",
+ lrner_package = NULL,
+ lrn_fct = "mylasso",
+ param_train_list = list(nlambda = 100L),
+ na_action = "na.impute")
+training <- fusemlr(training = training,
+ use_var_sel = TRUE)
# Re-create the gene expression layer with support vector machine as learner.
-createTrainMetaLayer(training = training,
- meta_layer_id = "meta_layer",
- lrner_package = NULL,
- lrn_fct = "mylasso",
- param_train_list = list(nlambda = 100L),
- na_action = "na.impute")
-training <- fusemlr(training = training,
- use_var_sel = TRUE)
# Re-create the gene expression layer with support vector machine as learner.
+createTrainMetaLayer(training = training,
+ meta_layer_id = "meta_layer",
+ lrner_package = NULL,
+ lrn_fct = "mylasso",
+ param_train_list = list(nlambda = 100L),
+ na_action = "na.impute")
+training <- fusemlr(training = training,
+ use_var_sel = TRUE)