diff --git a/R/Lrner.R b/R/Lrner.R index 44ac483..ec0a6ce 100644 --- a/R/Lrner.R +++ b/R/Lrner.R @@ -60,6 +60,17 @@ Lrner <- R6Class("Lrner", cat(sprintf("Param id : %s\n", private$param$id)) }, #' @description + #' Printer + #' @param ... (any) \cr + #' + summary = function (...) { + cat(sprintf(" Learner : %s\n", private$id)) + cat(sprintf(" TrainLayer : %s\n", private$train_layer$getId())) + cat(sprintf(" Package : %s\n", private$package)) + cat(sprintf(" Learn function : %s\n", private$lrn_fct)) + cat(sprintf(" Param id : %s\n", private$param$id)) + }, + #' @description #' Tains the current learner (from class [Lrner]) on the current training data (from class [TrainData]). #' #' @param ind_subset `vector(1)` \cr @@ -157,6 +168,24 @@ Lrner <- R6Class("Lrner", #' getPackage = function () { return(private$package) + }, + #' @description + #' Getter of the learner package implementing the learn function. + #' + #' @return + #' The name of the package implementing the learn function. + #' + getIndSubset = function () { + return(private$ind_subset) + }, + #' @description + #' Getter of the learner package implementing the learn function. + #' + #' @return + #' The name of the package implementing the learn function. + #' + getVarSubset = function () { + return(private$ind_subset) } ), private = list( @@ -173,7 +202,6 @@ Lrner <- R6Class("Lrner", # Individuals subset IDs. ind_subset = NULL, # Variable subset IDs. - # TODO: Set it after variable selection. var_subset = NULL ), cloneable = FALSE diff --git a/R/Model.R b/R/Model.R index 4a4f593..1844159 100644 --- a/R/Model.R +++ b/R/Model.R @@ -62,6 +62,17 @@ Model <- R6Class("Model", print(private$train_data) }, #' @description + #' Summary + #' @param ... (any) \cr + #' + summary = function (...) { + cat(" Model \n\n") + cat(" -----------------------\n") + cat(sprintf("Individual(s) used : $s\n", length(private$lrner$getVarSubset()))) + cat(sprintf("Variable(s) used : $s\n", length(private$lrner$getIndSubset()))) + cat(" -----------------------\n") + }, + #' @description #' Getter of the base model #' #' @export @@ -169,8 +180,8 @@ Model <- R6Class("Model", id = ind_subset, pred = predicted_obj) pred_colnames = c("Layer", - new_data$getIndCol(), - "Prediction") + new_data$getIndCol(), + "Prediction") names(predicted_obj) = pred_colnames } else { if (is.list(predicted_obj)) { diff --git a/R/NewLayer.R b/R/NewLayer.R index f404693..c83c491 100644 --- a/R/NewLayer.R +++ b/R/NewLayer.R @@ -35,8 +35,7 @@ NewLayer <- R6Class("NewLayer", #' @param ... (any) \cr #' print = function (...){ - cat("Class : NewLayer\n") - cat(sprintf("id : %s\n", private$id)) + cat(sprintf("NewLayer : %s\n", private$id)) cat(sprintf("Contains %s object.\n", length(private$hash_table))) }, #' @description @@ -96,6 +95,19 @@ NewLayer <- R6Class("NewLayer", #' checkNewDataExist = function () { return(super$checkClassExist(.class = "NewData")) + }, + #' @description + #' Generate summary. + #' + #' @export + #' + summary = function () { + layer_kc = self$getKeyClass() + for (k in layer_kc[ , "key"]) { + current_obj = self$getFromHashTable(key = k) + print(current_obj) + cat("\n") + } } ), private = list( diff --git a/R/PredictLayer.R b/R/PredictLayer.R index 6d4fe03..790fe78 100644 --- a/R/PredictLayer.R +++ b/R/PredictLayer.R @@ -99,6 +99,19 @@ PredictLayer <- R6Class("PredictLayer", } } return(self) + }, + #' @description + #' Generate summary. + #' + #' @export + #' + summary = function () { + layer_kc = self$getKeyClass() + for (k in layer_kc[ , "key"]) { + current_obj = self$getFromHashTable(key = k) + print(current_obj) + cat("\n") + } } ), private = list( diff --git a/R/TrainData.R b/R/TrainData.R index 3b04e1f..cce1ef0 100644 --- a/R/TrainData.R +++ b/R/TrainData.R @@ -26,10 +26,16 @@ TrainData <- R6Class("TrainData", ind_col = train_layer$getTrainStudy()$getIndCol() target = train_layer$getTrainStudy()$getTarget() if (!any(c("TrainLayer", "TrainMetaLayer") %in% class(train_layer))) { - stop("A Traindata can belong a TrainLayer or a TrainMetaLayer object.") + stop("A Traindata can belong a TrainLayer or a TrainMetaLayer object.\n") } if (!all(c(ind_col, target) %in% colnames(data_frame))) { - stop("Individual column ID or target variable not found in the provided data.frame.") + stop("Individual column ID or target variable not found in the provided data.frame.\n") + } + missing_target = is.na(data_frame[ , target]) + if (any(missing_target)) { + warning(sprintf("%s individual(s) with missing target value(s) recognized and removed\n", + sum(missing_target))) + data_frame = data_frame[!missing_target] } super$initialize(id = id, ind_col = train_layer$getTrainStudy()$getIndCol(), @@ -69,9 +75,23 @@ TrainData <- R6Class("TrainData", cat(sprintf("ind. id. : %s\n", private$ind_col)) cat(sprintf("target : %s\n", private$target)) cat(sprintf("n : %s\n", nrow(private$data_frame))) + cat(sprintf("Missing : %s\n", sum(!complete.cases(private$data_frame)))) cat(sprintf("p : %s\n", ncol(private$data_frame))) }, #' @description + #' Summary + #' @param ... (any) \cr + #' + summary = function (...) { + cat(sprintf(" TrainData : %s\n", private$id)) + cat(sprintf(" Layer : %s\n", private$train_layer$getId())) + cat(sprintf(" ind. id. : %s\n", private$ind_col)) + cat(sprintf(" target : %s\n", private$target)) + cat(sprintf(" n : %s\n", nrow(private$data_frame))) + cat(sprintf(" Missing : %s\n", sum(!complete.cases(private$data_frame)))) + cat(sprintf(" p : %s\n", ncol(private$data_frame))) + }, + #' @description #' Getter of the current \code{data.frame} wihtout individual #' ID nor target variables. #' diff --git a/R/TrainLayer.R b/R/TrainLayer.R index abe7825..ab1b179 100644 --- a/R/TrainLayer.R +++ b/R/TrainLayer.R @@ -18,345 +18,381 @@ #' @importFrom R6 R6Class #' @seealso [TrainStudy], [Lrner], [TrainData], [NewData] and [Model] TrainLayer <- R6Class("TrainLayer", - inherit = HashTable, - public = list( - #' @description - #' constructor - #' - #' @param id (`character(1)`)\cr - #' See class Param - #' @param train_study (`TrainStudy(1)`)\cr - #' - initialize = function (id, train_study) { - super$initialize(id = id) - private$train_study = train_study - if ("TrainStudy" %in% class(train_study)) { - train_study$add2HashTable(key = id, - value = self, - .class = "TrainLayer") - } else { - stop("A TrainLayer can only belong to a TrainStudy.") - } - private$status = FALSE - }, - #' @description - #' Printer - #' @param ... (any) \cr - #' - print = function (...){ - if (!private$status) { - status = "Not trained" - } else { - status = "Trained" - } - cat(sprintf("TrainLayer : %s\n", private$id)) - cat(sprintf("Status : %s\n", status)) - stored_obj = self$getKeyClass() - if (!nrow(stored_obj)) { - cat("Empty layer.") - } else { - cat(sprintf("Nb. of objects stored : %s\n", nrow(stored_obj))) - print(stored_obj) - } - }, - #' @description - #' Getter of the current study - #' - #' @return - #' The current study is returned. - #' - getTrainStudy = function () { - return(private$train_study) - }, - #' @description - #' Trains the current layer. - #' - #' @param ind_subset `vector(1)` \cr - #' ID subset of individuals to be used for training. - #' @param use_var_sel `boolean(1)` \cr - #' If TRUE, variable selection is performed before training. - #' - #' @return - #' The current layer is returned with the resulting model. - #' @export - #' - train = function (ind_subset = NULL, use_var_sel = FALSE) { - layer_kc = self$getKeyClass() - # Stop if either learner of data is missing on this layer. - if (!("Lrner" %in% layer_kc[ , "class"])){ - stop(sprintf("No learner on layer %s.", self$getId())) - } else { - if (!("TrainData" %in% layer_kc[ , "class"])) { - stop(sprintf("No data on layer %s.", self$getId())) - } - } - # The learner is trained on the current dataset - lrner_key = layer_kc[layer_kc$class == "Lrner" , "key"] - lrner = self$getFromHashTable(key = lrner_key[1L]) - model = lrner$train(ind_subset = ind_subset, - use_var_sel = use_var_sel) - # Updating the training status. - if (!private$status) { - # The training layer has not been trained before. - private$train_study$increaseNbTrainedLayer() - private$status = TRUE - } else { - # The training layer has been trained before. - private$status = TRUE - } - return(model) - }, - #' @description - #' Variable selection on the current layer. - #' - #' @param ind_subset `vector(1)` \cr - #' ID subset of individuals to be used for variable selection. - #' - #' @return - #' The current layer is returned with the resulting model. - #' @export - #' - varSelection = function (ind_subset = NULL) { - layer_kc = self$getKeyClass() - # Stop if either selector or data is missing on this layer. - if (!("VarSel" %in% layer_kc[ , "class"])){ - stop(sprintf("No var. sel. method on layer %s.", self$getId())) - } else { - if (!("TrainData" %in% layer_kc[ , "class"])) { - stop(sprintf("No data on layer %s.", self$getId())) - } - } - # The learner is trained on the current dataset - varsel_key = layer_kc[layer_kc$class == "VarSel" , "key"] - varsel = self$getFromHashTable(key = varsel_key[1L]) - selected = varsel$varSelection(ind_subset = ind_subset) - return(selected) - }, - #' @description - #' Predicts values for the new layer taking as argument. - #' - #' @param new_layer `TrainLayer()` \cr - #' @param ind_subset `vector()` \cr - #' - #' @return - #' A new [PredictLayer] object with the predicted data is returned. - #' @export - #' - predict = function (new_layer, - ind_subset = NULL) { - k = self$getId() - # Layer IDs must match together. - if (k == new_layer$getId()) { - m_layer = self$getModel() - } else { - stop("The new layer ID does not match with the current layer ID.") - } - # Check that a model exists on the current layer - if (is.null(m_layer)) { - stop(sprintf("There is no model stored on layer %s.", - self$getId())) - } - new_data = new_layer$getNewData() - # Predicting: Data and model exist on this layer. + inherit = HashTable, + public = list( + #' @description + #' constructor + #' + #' @param id (`character(1)`)\cr + #' See class Param + #' @param train_study (`TrainStudy(1)`)\cr + #' + initialize = function (id, train_study) { + super$initialize(id = id) + private$train_study = train_study + if ("TrainStudy" %in% class(train_study)) { + train_study$add2HashTable(key = id, + value = self, + .class = "TrainLayer") + } else { + stop("A TrainLayer can only belong to a TrainStudy.") + } + private$status = FALSE + }, + #' @description + #' Printer + #' @param ... (any) \cr + #' + print = function (...){ + if (!private$status) { + status = "Not trained" + } else { + status = "Trained" + } + cat(sprintf("TrainLayer : %s\n", private$id)) + cat(sprintf("Status : %s\n", status)) + stored_obj = self$getKeyClass() + if (!nrow(stored_obj)) { + cat("Empty layer.\n") + } else { + cat(sprintf("Nb. of objects stored : %s\n", nrow(stored_obj))) + cat("-----------------------\n") + print(stored_obj) + } + }, + #' @description + #' Getter of the current study + #' + #' @return + #' The current study is returned. + #' + getTrainStudy = function () { + return(private$train_study) + }, + #' @description + #' Trains the current layer. + #' + #' @param ind_subset `vector(1)` \cr + #' ID subset of individuals to be used for training. + #' @param use_var_sel `boolean(1)` \cr + #' If TRUE, variable selection is performed before training. + #' + #' @return + #' The current layer is returned with the resulting model. + #' @export + #' + train = function (ind_subset = NULL, use_var_sel = FALSE) { + layer_kc = self$getKeyClass() + # Stop if either learner of data is missing on this layer. + if (!("Lrner" %in% layer_kc[ , "class"])){ + stop(sprintf("No learner on layer %s.", self$getId())) + } else { + if (!("TrainData" %in% layer_kc[ , "class"])) { + stop(sprintf("No data on layer %s.", self$getId())) + } + } + # The learner is trained on the current dataset + lrner_key = layer_kc[layer_kc$class == "Lrner" , "key"] + lrner = self$getFromHashTable(key = lrner_key[1L]) + model = lrner$train(ind_subset = ind_subset, + use_var_sel = use_var_sel) + # Updating the training status. + if (!private$status) { + # The training layer has not been trained before. + private$train_study$increaseNbTrainedLayer() + private$status = TRUE + } else { + # The training layer has been trained before. + private$status = TRUE + } + return(model) + }, + #' @description + #' Variable selection on the current layer. + #' + #' @param ind_subset `vector(1)` \cr + #' ID subset of individuals to be used for variable selection. + #' + #' @return + #' The current layer is returned with the resulting model. + #' @export + #' + varSelection = function (ind_subset = NULL) { + layer_kc = self$getKeyClass() + # Stop if either selector or data is missing on this layer. + if (!("VarSel" %in% layer_kc[ , "class"])){ + stop(sprintf("No var. sel. method on layer %s.", self$getId())) + } else { + if (!("TrainData" %in% layer_kc[ , "class"])) { + stop(sprintf("No data on layer %s.", self$getId())) + } + } + # The learner is trained on the current dataset + varsel_key = layer_kc[layer_kc$class == "VarSel" , "key"] + varsel = self$getFromHashTable(key = varsel_key[1L]) + selected = varsel$varSelection(ind_subset = ind_subset) + return(selected) + }, + #' @description + #' Predicts values for the new layer taking as argument. + #' + #' @param new_layer `TrainLayer()` \cr + #' @param ind_subset `vector()` \cr + #' + #' @return + #' A new [PredictLayer] object with the predicted data is returned. + #' @export + #' + predict = function (new_layer, + ind_subset = NULL) { + k = self$getId() + # Layer IDs must match together. + if (k == new_layer$getId()) { + m_layer = self$getModel() + } else { + stop("The new layer ID does not match with the current layer ID.") + } + # Check that a model exists on the current layer + if (is.null(m_layer)) { + stop(sprintf("There is no model stored on layer %s.", + self$getId())) + } + new_data = new_layer$getNewData() + # Predicting: Data and model exist on this layer. - model = self$getModel() - pred_data = model$predict(new_data = new_data, - ind_subset = ind_subset) - # Initialize a predicted layer to store predictions - pred_layer = PredictLayer$new( - id = private$id - ) - pred_data$setPredictLayer(pred_layer) - return(pred_layer) - }, - #' @description - #' Getter of the training dataset stored on the current layer. - #' - #' @return - #' The stored [TrainData] object is returned. - #' @export - #' - getTrainData = function () { - layer_kc = self$getKeyClass() - if ("TrainData" %in% layer_kc[ , "class"]) { - train_data_key = layer_kc[layer_kc$class == "TrainData" , + model = self$getModel() + pred_data = model$predict(new_data = new_data, + ind_subset = ind_subset) + # Initialize a predicted layer to store predictions + pred_layer = PredictLayer$new( + id = private$id + ) + pred_data$setPredictLayer(pred_layer) + return(pred_layer) + }, + #' @description + #' Getter of the training dataset stored on the current layer. + #' + #' @return + #' The stored [TrainData] object is returned. + #' @export + #' + getTrainData = function () { + layer_kc = self$getKeyClass() + if ("TrainData" %in% layer_kc[ , "class"]) { + train_data_key = layer_kc[layer_kc$class == "TrainData" , + "key"] + train_data = self$getFromHashTable(key = train_data_key[1L]) + } else { + stop(sprintf("No train data on layer %s.", self$getId())) + } + return(train_data) + }, + #' @description + #' Getter of target values from the current layer. + #' + #' @return + #' A \code{data.frame} containing individuals IDs and corresponding target values. + #' @export + #' + getTargetValues = function () { + layer_kc = self$getKeyClass() + # Stop if training data is missing on this layer. + if (("TrainData" %in% layer_kc[ , "class"])) { + # Searching for layer specific training dataset + train_data_key = layer_kc[layer_kc$class == "TrainData" , + "key"] + train_data = self$getTrainData() + train_data_frame = train_data$getDataFrame() + target_data = train_data_frame[ , c(train_data$getIndCol(), + train_data$getTargetName())] + return(target_data) + } else { + stop(sprintf("No data on layer %s.", self$getId())) + } + }, + #' @description + #' Getter of IDS from the current layer. + #' + #' @return + #' A \code{data.frame} containing individuals IDs values. + #' @export + #' + getIndIDs = function () { + layer_kc = self$getKeyClass() + # Stop if training data is missing on this layer. + # FIXME: Restrict this function to TrainData only. + if (("NewData" %in% layer_kc[ , "class"])) { + # Searching for layer specific new dataset + data_key = layer_kc[layer_kc$class == "NewData" , + "key"] + current_data = self$getNewData() + } else { + if (("TrainData" %in% layer_kc[ , "class"])) { + # Searching for layer specific new dataset + data_key = layer_kc[layer_kc$class == "TrainData" , + "key"] + current_data = self$getTrainData() + } else { + stop(sprintf("No data on layer %s.", self$getId())) + } + } + current_data_frame = current_data$getDataFrame() + ids_data = current_data_frame[ , current_data$getIndCol(), drop = FALSE] + return(ids_data) + }, + #' @description + #' Getter of the new data. + #' + #' @return + #' The stored [NewData] object is returned. + #' @export + # FIXME: This function has been moved to NewLayer, so remove it after testing. + getNewData = function () { + layer_kc = self$getKeyClass() + if (any(c("NewData", "TrainData") %in% layer_kc[ , "class"])) { + if ("NewData" %in% layer_kc[ , "class"]) { + new_data_key = layer_kc[layer_kc$class == "NewData" , + "key"] + new_data = self$getFromHashTable(key = new_data_key[1L]) + } else { + new_data = self$getTrainData() + } + } else { + stop(sprintf("No new data on layer %s.", self$getId())) + } + return(new_data) + }, + #' @description + #' Getter of the learner. + #' + #' @return + #' The stored [Lrner] object is returned. + #' @export + getLrner = function () { + layer_kc = self$getKeyClass() + if (!("Lrner" %in% layer_kc[ , "class"])) { + stop(sprintf("No Lrner on layer %s.", self$getId())) + } else { + lrner_key = layer_kc[layer_kc$class == "Lrner" , "key"] - train_data = self$getFromHashTable(key = train_data_key[1L]) - } else { - stop(sprintf("No train data on layer %s.", self$getId())) - } - return(train_data) - }, - #' @description - #' Getter of target values from the current layer. - #' - #' @return - #' A \code{data.frame} containing individuals IDs and corresponding target values. - #' @export - #' - getTargetValues = function () { - layer_kc = self$getKeyClass() - # Stop if training data is missing on this layer. - if (("TrainData" %in% layer_kc[ , "class"])) { - # Searching for layer specific training dataset - train_data_key = layer_kc[layer_kc$class == "TrainData" , + lrner = self$getFromHashTable(key = lrner_key[1L]) + } + return(lrner) + }, + #' @description + #' Getter of the variable selector. + #' + #' @return + #' The stored [VarSel] object is returned. + #' @export + getVarSel = function () { + layer_kc = self$getKeyClass() + if (!("VarSel" %in% layer_kc[ , "class"])) { + stop(sprintf("No VarSel on layer %s.", self$getId())) + } else { + varsel_key = layer_kc[layer_kc$class == "VarSel" , + "key"] + varsel = self$getFromHashTable(key = varsel_key[1L]) + } + return(varsel) + }, + #' @description + #' Getter of the model. + #' + #' @return + #' The stored [Model] object is returned. + #' @export + #' + getModel = function () { + layer_kc = self$getKeyClass() + if (!("Model" %in% layer_kc[ , "class"])) { + stop(sprintf("No Model on layer %s.", self$getId())) + } else { + model_key = layer_kc[layer_kc$class == "Model" , "key"] - train_data = self$getTrainData() - train_data_frame = train_data$getDataFrame() - target_data = train_data_frame[ , c(train_data$getIndCol(), - train_data$getTargetName())] - return(target_data) - } else { - stop(sprintf("No data on layer %s.", self$getId())) - } - }, - #' @description - #' Getter of IDS from the current layer. - #' - #' @return - #' A \code{data.frame} containing individuals IDs values. - #' @export - #' - getIndIDs = function () { - layer_kc = self$getKeyClass() - # Stop if training data is missing on this layer. - # FIXME: Restrict this function to TrainData only. - if (("NewData" %in% layer_kc[ , "class"])) { - # Searching for layer specific new dataset - data_key = layer_kc[layer_kc$class == "NewData" , - "key"] - current_data = self$getNewData() - } else { - if (("TrainData" %in% layer_kc[ , "class"])) { - # Searching for layer specific new dataset - data_key = layer_kc[layer_kc$class == "TrainData" , - "key"] - current_data = self$getTrainData() - } else { - stop(sprintf("No data on layer %s.", self$getId())) - } - } - current_data_frame = current_data$getDataFrame() - ids_data = current_data_frame[ , current_data$getIndCol(), drop = FALSE] - return(ids_data) - }, - #' @description - #' Getter of the new data. - #' - #' @return - #' The stored [NewData] object is returned. - #' @export - # FIXME: This function has been moved to NewLayer, so remove it after testing. - getNewData = function () { - layer_kc = self$getKeyClass() - if (any(c("NewData", "TrainData") %in% layer_kc[ , "class"])) { - if ("NewData" %in% layer_kc[ , "class"]) { - new_data_key = layer_kc[layer_kc$class == "NewData" , - "key"] - new_data = self$getFromHashTable(key = new_data_key[1L]) - } else { - new_data = self$getTrainData() - } - } else { - stop(sprintf("No new data on layer %s.", self$getId())) - } - return(new_data) - }, - #' @description - #' Getter of the learner. - #' - #' @return - #' The stored [Lrner] object is returned. - #' @export - getLrner = function () { - layer_kc = self$getKeyClass() - if (!("Lrner" %in% layer_kc[ , "class"])) { - stop(sprintf("No Lrner on layer %s.", self$getId())) - } else { - lrner_key = layer_kc[layer_kc$class == "Lrner" , - "key"] - lrner = self$getFromHashTable(key = lrner_key[1L]) - } - return(lrner) - }, - #' @description - #' Getter of the variable selector. - #' - #' @return - #' The stored [VarSel] object is returned. - #' @export - getVarSel = function () { - layer_kc = self$getKeyClass() - if (!("VarSel" %in% layer_kc[ , "class"])) { - stop(sprintf("No VarSel on layer %s.", self$getId())) - } else { - varsel_key = layer_kc[layer_kc$class == "VarSel" , - "key"] - varsel = self$getFromHashTable(key = varsel_key[1L]) - } - return(varsel) - }, - #' @description - #' Getter of the model. - #' - #' @return - #' The stored [Model] object is returned. - #' @export - #' - getModel = function () { - layer_kc = self$getKeyClass() - if (!("Model" %in% layer_kc[ , "class"])) { - stop(sprintf("No Model on layer %s.", self$getId())) - } else { - model_key = layer_kc[layer_kc$class == "Model" , - "key"] - model = self$getFromHashTable(key = model_key[1L]) - } - return(model) - }, - #' @description - #' Getter of predictions. - #' - #' @return - #' The stored predictions are returned. - #' @export - # FIXME: Move this function to PredictLayer - getPredictions = function () { - layer_kc = self$getKeyClass() - if (!("Prediction" %in% layer_kc[ , "class"])) { - stop(sprintf("No Prediction on layer %s.", self$getId())) - } else { - prediction_key = layer_kc[layer_kc$class == "Prediction", - "key"] - predictions = self$getFromHashTable( - key = prediction_key[1L]) - } - return(predictions) - }, - #' @description - #' Check whether a training data has been already stored. - #' - #' @return - #' Boolean value - #' - #TODO: checkLrnerExist with "s" - checkLrnerExist = function () { - return(super$checkClassExist(.class = "Lrner")) - }, - #' @description - #' Check whether a training data has been already stored. - #' - #' @return - #' Boolean value - #' - checkTrainDataExist = function () { - return(super$checkClassExist(.class = "TrainData")) - } - ), - private = list( - train_study = NULL, - status = FALSE - ), - # TODO: define a deep_clone function for this class. - cloneable = FALSE + model = self$getFromHashTable(key = model_key[1L]) + } + return(model) + }, + #' @description + #' Getter of predictions. + #' + #' @return + #' The stored predictions are returned. + #' @export + # FIXME: Move this function to PredictLayer + getPredictions = function () { + layer_kc = self$getKeyClass() + if (!("Prediction" %in% layer_kc[ , "class"])) { + stop(sprintf("No Prediction on layer %s.", self$getId())) + } else { + prediction_key = layer_kc[layer_kc$class == "Prediction", + "key"] + predictions = self$getFromHashTable( + key = prediction_key[1L]) + } + return(predictions) + }, + #' @description + #' Check whether a training data has been already stored. + #' + #' @return + #' Boolean value + #' + #TODO: checkLrnerExist with "s" + checkLrnerExist = function () { + return(super$checkClassExist(.class = "Lrner")) + }, + #' @description + #' Check whether a training data has been already stored. + #' + #' @return + #' Boolean value + #' + checkTrainDataExist = function () { + return(super$checkClassExist(.class = "TrainData")) + }, + #' @description + #' Generate summary. + #' + #' @export + #' + summary = function () { + cat(sprintf(" Layer %s\n", self$getId())) + cat(" ----------------\n") + if (!private$status) { + status = "Not trained" + } else { + status = "Trained" + } + cat(sprintf(" TrainLayer : %s\n", private$id)) + cat(sprintf(" Status : %s\n", status)) + stored_obj = self$getKeyClass() + if (!nrow(stored_obj)) { + cat(" Empty layer.\n") + } else { + cat(sprintf(" Nb. of objects stored : %s\n", nrow(stored_obj))) + } + cat(" ----------------\n") + layer_kc = self$getKeyClass() + cat(sprintf(" Object(s) on layer %s\n\n", self$getId())) + if (!nrow(layer_kc)) { + cat(" Empty layer\n") + } + for (k in layer_kc[ , "key"]) { + cat(" ----------------\n") + current_obj = self$getFromHashTable(key = k) + current_obj$summary() + cat(" ----------------\n") + cat("\n") + } + } + ), + private = list( + train_study = NULL, + status = FALSE + ), + # TODO: define a deep_clone function for this class. + cloneable = FALSE ) diff --git a/R/TrainMetaLayer.R b/R/TrainMetaLayer.R index 282e1b8..e8c5f81 100644 --- a/R/TrainMetaLayer.R +++ b/R/TrainMetaLayer.R @@ -47,9 +47,9 @@ TrainMetaLayer <- R6Class("TrainMetaLayer", cat(sprintf("Status : %s\n", status)) stored_obj = self$getKeyClass() if (!nrow(stored_obj)) { - cat("Empty layer.") + cat("Empty layer.\n") } else { - cat(sprintf("Nb. of objects stroed : %s\n", nrow(stored_obj))) + cat(sprintf("Nb. of objects stored : %s\n", nrow(stored_obj))) print(stored_obj) } }, @@ -288,6 +288,42 @@ TrainMetaLayer <- R6Class("TrainMetaLayer", #' Only usefull to reset status FALSE after cross validation. set2NotTrained = function () { private$status = FALSE + }, + #' @description + #' Generate summary. + #' + #' @export + #' + summary = function () { + cat(" MetaLayer\n") + cat(" ----------------\n") + if (!private$status) { + status = "Not trained" + } else { + status = "Trained" + } + cat(sprintf(" TrainMetaLayer : %s\n", private$id)) + cat(sprintf(" Status : %s\n", status)) + stored_obj = self$getKeyClass() + if (!nrow(stored_obj)) { + cat(" Empty layer.\n") + } else { + cat(sprintf(" Nb. of objects stored : %s\n", nrow(stored_obj))) + } + cat("\n") + cat(" ----------------\n") + layer_kc = self$getKeyClass() + cat(" Object(s) on MetaLayer\n\n") + if (!nrow(layer_kc)) { + cat(" Empty layer\n") + } + for (k in layer_kc[ , "key"]) { + cat(" ----------------\n") + current_obj = self$getFromHashTable(key = k) + current_obj$summary() + cat(" ----------------\n") + cat("\n") + } } ), private = list( diff --git a/R/TrainStudy.R b/R/TrainStudy.R index 7f0d2f7..084f5e0 100644 --- a/R/TrainStudy.R +++ b/R/TrainStudy.R @@ -23,400 +23,472 @@ #' #' @seealso [TrainLayer] TrainStudy <- R6Class("TrainStudy", - inherit = HashTable, - public = list( - #' @description - #' constructor - #' - #' @param id (`character(1)`)\cr - #' @param ind_col (`character(1)`) - #' Name of column of individuals IDS - #' @param target (`character(1)`) - #' Name of the target variable - #' @seealso [NewStudy] and [PredictStudy] - initialize = function (id, ind_col, target) { - super$initialize(id = id) - private$ind_col = ind_col - private$target = target - private$status = FALSE - }, - #' @description - #' Printer - #' - #' @param ... (any) \cr - #' - print = function (...) { - nb_layers = length(private$hash_table) - if (!private$status) { - status = "Not trained" - } else { - status = "Trained" - } - cat(sprintf("TrainStudy : %s\n", private$id)) - cat(sprintf("Status : %s\n", status)) - cat(sprintf("Number of layers: %s\n", nb_layers)) - cat(sprintf("Layers trained : %s\n", private$nb_trained_layer)) - }, - #' @description - #' Train each layer of the current Trainstudy. - #' - #' @param ind_subset (`character(1)`)\cr - #' Subset of individuals IDs to be used for training. - #' @param use_var_sel `boolean(1)` \cr - #' If TRUE, selected variables available at each layer are used. - #' - #' @return - #' Returns the object itself, with a model for each layer. - #' @export - #' - #' - trainLayer = function (ind_subset = NULL, use_var_sel = FALSE) { - layers = self$getKeyClass() - if (nrow(layers)) { + inherit = HashTable, + public = list( + #' @description + #' constructor + #' + #' @param id (`character(1)`)\cr + #' @param ind_col (`character(1)`) + #' Name of column of individuals IDS + #' @param target (`character(1)`) + #' Name of the target variable + #' @seealso [NewStudy] and [PredictStudy] + initialize = function (id, ind_col, target) { + super$initialize(id = id) + private$ind_col = ind_col + private$target = target + private$status = FALSE + }, + #' @description + #' Printer + #' + #' @param ... (any) \cr + #' + print = function (...) { + nb_layers = length(private$hash_table) + if (!private$status) { + status = "Not trained" + } else { + status = "Trained" + } + cat(sprintf("TrainStudy : %s\n", private$id)) + cat(sprintf("Status : %s\n", status)) + cat(sprintf("Number of layers: %s\n", nb_layers)) + cat(sprintf("Layers trained : %s\n", private$nb_trained_layer)) + }, + #' @description + #' Train each layer of the current Trainstudy. + #' + #' @param ind_subset (`character(1)`)\cr + #' Subset of individuals IDs to be used for training. + #' @param use_var_sel `boolean(1)` \cr + #' If TRUE, selected variables available at each layer are used. + #' + #' @return + #' Returns the object itself, with a model for each layer. + #' @export + #' + #' + trainLayer = function (ind_subset = NULL, use_var_sel = FALSE) { + layers = self$getKeyClass() + if (nrow(layers)) { - # This code accesses each layer (except MetaLayer) level and trains the corres- - # ponding learner. - layers = layers[layers$class %in% "TrainLayer", ] - for (k in layers$key) { - layer = self$getFromHashTable(key = k) - layer$train(ind_subset = ind_subset, - use_var_sel = use_var_sel) - } - } else { - stop("No existing layer in the current training study.") - } - invisible(self) - }, - #' @description - #' Predicts values given new data. - #' - #' @param new_study (`NewData(1)`) \cr - #' Object of class [NewData]. - #' @param ind_subset (`vector(1)`) \cr - #' Subset of individuals IDs to be used for training. - #' - #' @return - #' A new [TrainStudy] with predicted values for each layer. - #' @export - #' - predictLayer = function (new_study, - ind_subset = NULL) { - # Initialize a Trainstudy to store predictions - pred_study = PredictStudy$new(id = new_study$getId(), - ind_col = new_study$getIndCol()) - layers = new_study$getKeyClass() - # This code accesses each layer (except MetaLayer) level - # and make predictions for the new layer in input. - # TODO: A TrainLayer can be predicted as a NewLayer. - layers = layers[layers$class %in% c("NewLayer", "TrainLayer"), ] - for (k in layers$key) { - new_layer = new_study$getFromHashTable(key = k) - new_layer_kc = new_layer$getKeyClass() - m_layer = self$getFromHashTable(key = k) - pred_layer = m_layer$predict(new_layer = new_layer, - ind_subset = ind_subset) - # Add a new predicted layer to the predicted study - pred_layer$setPredictStudy(pred_study) - } - return(pred_study) - }, - #' @description - #' Creates a meta training dataset and assigns it to the meta layer. - #' - #' - #' @param resampling_method (`function(1)`) \cr - #' Function for internal validation. - #' @param resampling_arg (`list(1)`) \cr - #' List of arguments to be passed to the function. - #' @param use_var_sel `boolean(1)` \cr - #' If TRUE, selected variables available at each layer are used. - #' - #' @return - #' The current object is returned, with a meta training dataset assigned to the meta layer. - #' @export - #' - createMetaTrainData = function (resampling_method, - resampling_arg, - use_var_sel) { - layers = self$getKeyClass() - if (!("TrainMetaLayer" %in% layers$class)) { - stop("No existing meta layer. I cannot create meta training data.") - } - resampling = do.call(eval(parse(text = resampling_method)), - resampling_arg) - if (!is.list(resampling)) { - stop("The resampling method must return a list of folds, with each fold containing a vector of training IDs.\n See example for details.") - } else { - train_layer_res_list = lapply(X = 1:length(resampling), - function (fold) { - test_index = resampling[[fold]] - train_index = setdiff(unlist(resampling), test_index) - train_ids = self$getTargetValues()[train_index, 1L] - self$trainLayer(ind_subset = train_ids, - use_var_sel = use_var_sel) - test_ids = self$getTargetValues()[test_index, 1L] - # TODO: Note: The current object is not a NewStudy, but a TrainStudy object. - pred_study = self$predictLayer(new_study = self, - ind_subset = test_ids) - pred_study_kc = pred_study$getKeyClass() - ## Assess each layer and extract model - current_pred = NULL - for (k in pred_study_kc$key) { - pred_layer = pred_study$getFromHashTable(key = k) - # pred_layer = layer$getFromHashTable(key = "PredictLayer") - pred_data = pred_layer$getPredictData() - pred_values = pred_data$getPredictData() - current_pred = rbind(current_pred, pred_values) - } - return(current_pred) - }) - predicted_values = data.frame(do.call(what = "rbind", - args = train_layer_res_list)) - # Will transform meta data.frame into wide format - predicted_values_wide = reshape(predicted_values, - idvar = colnames(predicted_values)[2], - timevar = colnames(predicted_values)[1], - direction = "wide") - colname_vector = gsub(pattern = "Prediction.", - replacement = "", - x = names(predicted_values_wide)) - names(predicted_values_wide) = colname_vector - target_df = self$getTargetValues() - predicted_values_wide = merge(x = target_df, - y = predicted_values_wide, - by = colnames(target_df)[1], - all.y = TRUE) - # Add layer specific predictions to meta layer - layers = self$getKeyClass() - meta_layer_key = layers[layers$class == "TrainMetaLayer" , "key"] - meta_layer = self$getFromHashTable(key = meta_layer_key) - # TODO: Test and remove comment. - meta_layer$openAccess() - # predicted20242806 this word just serves as temporary key - # TODO: Maybe remove this object from the training meta layer after crossvalidation. - meta_layer$setTrainData(id = "predicted20242806", - ind_col = names(predicted_values_wide)[1L], - data_frame = predicted_values_wide, - meta_layer = meta_layer, - target = colnames(target_df)[2L]) - meta_layer$set2NotTrained() - meta_layer$closeAccess() - return(predicted_values_wide) - } - }, - #' @description - #' Trains the current study. All leaners and the meta learner are trained. - #' - #' @param ind_subset (`vector(1)`) \cr - #' ID subset to be used for training. - #' @param use_var_sel `boolean(1)` \cr - #' If TRUE, variable selection is performed before training. - #' @param resampling_method (`function(1)`) \cr - #' Function for internal validation. - #' @param resampling_arg (`list(1)`) \cr - #' List of arguments to be passed to the function. - #' - #' @return - #' The current object is returned, with each learner trained on each layer. - #' @export - #' - train = function (ind_subset = NULL, - use_var_sel = FALSE, - resampling_method, - resampling_arg) { - # 1) Train each layer - self$trainLayer(ind_subset = ind_subset, - use_var_sel = use_var_sel) - # 2) Create meta training data - self$createMetaTrainData(resampling_method, - resampling_arg, - use_var_sel = use_var_sel) - # 3) Train the meta layer - # Add layer specific predictions to meta training layer - layers = self$getKeyClass() - meta_layer_key = layers[layers$class == "TrainMetaLayer" , "key"] - meta_layer = self$getFromHashTable(key = meta_layer_key) - meta_layer$train(ind_subset = ind_subset) - return(self) - }, - #' @description - #' Predicts a new study. - #' - #' @param new_study (`TrainStudy(1)`) \cr - #' A new study to be predicted. - #' @param ind_subset (`vector(1)`) \cr - #' Vector of IDs to be predicted. - #' - #' @return - #' The predicted object. All layers and the meta layer are predicted. This is the final predicted object. - #' @export - #' - predict = function (new_study, - ind_subset = NULL) { - # 1) Layer predictions - predicted_study = self$predictLayer(new_study = new_study, + # This code accesses each layer (except MetaLayer) level and trains the corres- + # ponding learner. + layers = layers[layers$class %in% "TrainLayer", ] + for (k in layers$key) { + layer = self$getFromHashTable(key = k) + layer$train(ind_subset = ind_subset, + use_var_sel = use_var_sel) + } + } else { + stop("No existing layer in the current training study.") + } + invisible(self) + }, + #' @description + #' Predicts values given new data. + #' + #' @param new_study (`NewData(1)`) \cr + #' Object of class [NewData]. + #' @param ind_subset (`vector(1)`) \cr + #' Subset of individuals IDs to be used for training. + #' + #' @return + #' A new [TrainStudy] with predicted values for each layer. + #' @export + #' + predictLayer = function (new_study, + ind_subset = NULL) { + # Initialize a Trainstudy to store predictions + pred_study = PredictStudy$new(id = new_study$getId(), + ind_col = new_study$getIndCol()) + layers = new_study$getKeyClass() + # This code accesses each layer (except MetaLayer) level + # and make predictions for the new layer in input. + # TODO: A TrainLayer can be predicted as a NewLayer. + layers = layers[layers$class %in% c("NewLayer", "TrainLayer"), ] + for (k in layers$key) { + new_layer = new_study$getFromHashTable(key = k) + new_layer_kc = new_layer$getKeyClass() + m_layer = self$getFromHashTable(key = k) + pred_layer = m_layer$predict(new_layer = new_layer, ind_subset = ind_subset) - # 2) Meta layer predicted new data; resume layer specific - # predictions and create a new data. - meta_layer_id = self$getTrainMetaLayer()$getId() - new_meta_data = predicted_study$createMetaNewData( - meta_layer_id = meta_layer_id) - # 3) Predict new meta layer by the trained meta layer - layers = self$getKeyClass() - meta_layer_key = layers[layers$class == "TrainMetaLayer", "key"] - meta_layer = self$getFromHashTable(key = meta_layer_key) - # TODO: getNewLayer maybe rename it getLayer? - predicted_layer = meta_layer$predict(new_layer = new_meta_data$getNewLayer(), - ind_subset = ind_subset) - # Store final meta predicted values on meta layer - predicted_study$removeFromHashTable(key = predicted_layer$getId()) - predicted_study$add2HashTable(key = predicted_layer$getId(), - value = predicted_layer, - .class = "PredictData") - # Updating the predicted meta layer - # TODO: This is already done by predicting the meta layer. If no error, remove me. - # predicted_study$add2HashTable(key = meta_layer_key, - # value = predicted_layer, - # .class = "Predict") - # Resume predictions - key_class_study = predicted_study$getKeyClass() - predicted_values = NULL - for (k in key_class_study[ , "key"]) { - # TODO: Please ensure the difference between [PredictData] and - # predicted values (predicted data.frame) when writting the paper. - pred_layer = predicted_study$getFromHashTable(key = k) - pred_data = pred_layer$getPredictData() - pred_values = pred_data$getPredictData() - predicted_values = data.frame(rbind(predicted_values, - pred_values)) - } - # Will transform meta data.frame into wide format - predicted_values_wide = reshape(predicted_values, - idvar = colnames(predicted_values)[2L], - timevar = colnames(predicted_values)[1L], - direction = "wide") - colname_vector = gsub(pattern = "Prediction.", - replacement = "", - x = names(predicted_values_wide)) - names(predicted_values_wide) = colname_vector + # Add a new predicted layer to the predicted study + pred_layer$setPredictStudy(pred_study) + } + return(pred_study) + }, + #' @description + #' Creates a meta training dataset and assigns it to the meta layer. + #' + #' + #' @param resampling_method (`function(1)`) \cr + #' Function for internal validation. + #' @param resampling_arg (`list(1)`) \cr + #' List of arguments to be passed to the function. + #' @param use_var_sel `boolean(1)` \cr + #' If TRUE, selected variables available at each layer are used. + #' + #' @return + #' The current object is returned, with a meta training dataset assigned to the meta layer. + #' @export + #' + createMetaTrainData = function (resampling_method, + resampling_arg, + use_var_sel) { + layers = self$getKeyClass() + if (!("TrainMetaLayer" %in% layers$class)) { + stop("No existing meta layer. I cannot create meta training data.") + } + resampling = do.call(eval(parse(text = resampling_method)), + resampling_arg) + if (!is.list(resampling)) { + stop("The resampling method must return a list of folds, with each fold containing a vector of training IDs.\n See example for details.") + } else { + train_layer_res_list = lapply(X = 1:length(resampling), + function (fold) { + test_index = resampling[[fold]] + train_index = setdiff(unlist(resampling), test_index) + train_ids = self$getTargetValues()[train_index, 1L] + self$trainLayer(ind_subset = train_ids, + use_var_sel = use_var_sel) + test_ids = self$getTargetValues()[test_index, 1L] + # TODO: Note: The current object is not a NewStudy, but a TrainStudy object. + pred_study = self$predictLayer(new_study = self, + ind_subset = test_ids) + pred_study_kc = pred_study$getKeyClass() + ## Assess each layer and extract model + current_pred = NULL + for (k in pred_study_kc$key) { + pred_layer = pred_study$getFromHashTable(key = k) + # pred_layer = layer$getFromHashTable(key = "PredictLayer") + pred_data = pred_layer$getPredictData() + pred_values = pred_data$getPredictData() + current_pred = rbind(current_pred, pred_values) + } + return(current_pred) + }) + predicted_values = data.frame(do.call(what = "rbind", + args = train_layer_res_list)) + # Will transform meta data.frame into wide format + predicted_values_wide = reshape(predicted_values, + idvar = colnames(predicted_values)[2], + timevar = colnames(predicted_values)[1], + direction = "wide") + colname_vector = gsub(pattern = "Prediction.", + replacement = "", + x = names(predicted_values_wide)) + names(predicted_values_wide) = colname_vector + target_df = self$getTargetValues() + predicted_values_wide = merge(x = target_df, + y = predicted_values_wide, + by = colnames(target_df)[1], + all.y = TRUE) + # Add layer specific predictions to meta layer + layers = self$getKeyClass() + meta_layer_key = layers[layers$class == "TrainMetaLayer" , "key"] + meta_layer = self$getFromHashTable(key = meta_layer_key) + # TODO: Test and remove comment. + meta_layer$openAccess() + # predicted20242806 this word just serves as temporary key + # TODO: Maybe remove this object from the training meta layer after crossvalidation. + meta_layer$setTrainData(id = "predicted20242806", + ind_col = names(predicted_values_wide)[1L], + data_frame = predicted_values_wide, + meta_layer = meta_layer, + target = colnames(target_df)[2L]) + meta_layer$set2NotTrained() + meta_layer$closeAccess() + return(predicted_values_wide) + } + }, + #' @description + #' Trains the current study. All leaners and the meta learner are trained. + #' + #' @param ind_subset (`vector(1)`) \cr + #' ID subset to be used for training. + #' @param use_var_sel `boolean(1)` \cr + #' If TRUE, variable selection is performed before training. + #' @param resampling_method (`function(1)`) \cr + #' Function for internal validation. + #' @param resampling_arg (`list(1)`) \cr + #' List of arguments to be passed to the function. + #' + #' @return + #' The current object is returned, with each learner trained on each layer. + #' @export + #' + train = function (ind_subset = NULL, + use_var_sel = FALSE, + resampling_method, + resampling_arg) { + # Test that the study contains ovelapping individuals + if (!self$test_overlap()) { + stop("This study does not contain overlapping individuals.") + } + # 1) Train each layer + self$trainLayer(ind_subset = ind_subset, + use_var_sel = use_var_sel) + # 2) Create meta training data + self$createMetaTrainData(resampling_method, + resampling_arg, + use_var_sel = use_var_sel) + # 3) Train the meta layer + # Add layer specific predictions to meta training layer + layers = self$getKeyClass() + meta_layer_key = layers[layers$class == "TrainMetaLayer" , "key"] + meta_layer = self$getFromHashTable(key = meta_layer_key) + meta_layer$train(ind_subset = ind_subset) + return(self) + }, + #' @description + #' Predicts a new study. + #' + #' @param new_study (`TrainStudy(1)`) \cr + #' A new study to be predicted. + #' @param ind_subset (`vector(1)`) \cr + #' Vector of IDs to be predicted. + #' + #' @return + #' The predicted object. All layers and the meta layer are predicted. This is the final predicted object. + #' @export + #' + predict = function (new_study, + ind_subset = NULL) { + # 1) Layer predictions + predicted_study = self$predictLayer(new_study = new_study, + ind_subset = ind_subset) + # 2) Meta layer predicted new data; resume layer specific + # predictions and create a new data. + meta_layer_id = self$getTrainMetaLayer()$getId() + new_meta_data = predicted_study$createMetaNewData( + meta_layer_id = meta_layer_id) + # 3) Predict new meta layer by the trained meta layer + layers = self$getKeyClass() + meta_layer_key = layers[layers$class == "TrainMetaLayer", "key"] + meta_layer = self$getFromHashTable(key = meta_layer_key) + # TODO: getNewLayer maybe rename it getLayer? + predicted_layer = meta_layer$predict(new_layer = new_meta_data$getNewLayer(), + ind_subset = ind_subset) + # Store final meta predicted values on meta layer + predicted_study$removeFromHashTable(key = predicted_layer$getId()) + predicted_study$add2HashTable(key = predicted_layer$getId(), + value = predicted_layer, + .class = "PredictData") + # Updating the predicted meta layer + # TODO: This is already done by predicting the meta layer. If no error, remove me. + # predicted_study$add2HashTable(key = meta_layer_key, + # value = predicted_layer, + # .class = "Predict") + # Resume predictions + key_class_study = predicted_study$getKeyClass() + predicted_values = NULL + for (k in key_class_study[ , "key"]) { + # TODO: Please ensure the difference between [PredictData] and + # predicted values (predicted data.frame) when writting the paper. + pred_layer = predicted_study$getFromHashTable(key = k) + pred_data = pred_layer$getPredictData() + pred_values = pred_data$getPredictData() + predicted_values = data.frame(rbind(predicted_values, + pred_values)) + } + # Will transform meta data.frame into wide format + predicted_values_wide = reshape(predicted_values, + idvar = colnames(predicted_values)[2L], + timevar = colnames(predicted_values)[1L], + direction = "wide") + colname_vector = gsub(pattern = "Prediction.", + replacement = "", + x = names(predicted_values_wide)) + names(predicted_values_wide) = colname_vector - return(list(predicted_study = predicted_study, - predicted_values = predicted_values_wide)) - }, - #' @description - #' Variable selection on the current training study. - #' - #' @param ind_subset `vector(1)` \cr - #' ID subset of individuals to be used for variable selection. - #' - #' @return - #' The current layer is returned with the resulting model. - #' @export - #' - varSelection = function (ind_subset = NULL) { - layers = self$getKeyClass() - if (nrow(layers)) { - # This code accesses each layer (except MetaLayer) level and - # perform variable selection. - layers = layers[layers$class %in% "TrainLayer", ] - selected = NULL - for (k in layers$key) { - layer = self$getFromHashTable(key = k) - layer_var_sel = layer$varSelection(ind_subset = ind_subset) - selected = rbind(selected, - data.frame(Layer = layer$getId(), - variable = layer_var_sel)) - } - } else { - stop("No existing layer in the current training study.") - } - return(selected) - }, - #' @description - #' Gather target values from all layer. - #' - #' @return - #' A \code{data.frame} containing individuals IDs and corresponding target values. - #' @export - #' - getTargetValues = function() { - layers = self$getKeyClass() - # This code accesses each layer (except TrainMetaLayer) level - # and get the target variable - layers = layers[layers$class %in% "TrainLayer", ] - target_data = NULL - train_data = NULL - for (k in layers$key) { - layer = self$getFromHashTable(key = k) - target_data = as.data.frame(rbind(target_data, - layer$getTargetValues())) - train_data = layer$getTrainData() - } - target_data = target_data[!duplicated(target_data[ , train_data$getIndCol()]), ] - return(target_data) - }, - #' @description - #' Gather individual IDs from all layer. - #' - #' @return - #' A \code{data.frame} containing individuals IDs. - #' @export - #' - getIndIDs = function() { - layers = self$getKeyClass() - # This code accesses each layer (except TrainMetaLayer) level - # and get the target variable - layers = layers[layers$class %in% "TrainLayer", ] - ids_data = NULL - current_data = NULL - for (k in layers$key) { - layer = self$getFromHashTable(key = k) - ids_data = as.data.frame(rbind(ids_data, - layer$getIndIDs())) - } - ids_data = ids_data[!duplicated(ids_data[ , 1L]), , - drop = FALSE] - return(ids_data) - }, - #' @description - #' Getter of the meta layer. - #' - #' @return - #' Object from class [TrainMetaLayer] - #' @export - #' - getTrainMetaLayer = function () { - layers = self$getKeyClass() - meta_layer_key = layers[layers$class == "TrainMetaLayer" , "key"] - meta_layer = self$getFromHashTable(key = meta_layer_key) - return(meta_layer) - }, - #' @description - #' Getter of the individual column name. - #' @export - getIndCol = function () { - return(private$ind_col) - }, - #' @description - #' Getter of the target variable name. - #' @export - getTarget = function () { - return(private$target) - }, - #' @description - #' Increase the number of trained layer. - increaseNbTrainedLayer = function () { - private$nb_trained_layer = private$nb_trained_layer + 1 - if (private$nb_trained_layer == length(private$hash_table)) { - private$status = TRUE - } - } - ), - private = list( - ind_col = character(0L), - target = character(0L), - nb_trained_layer = 0L, - status = FALSE - ), - cloneable = FALSE + return(list(predicted_study = predicted_study, + predicted_values = predicted_values_wide)) + }, + #' @description + #' Variable selection on the current training study. + #' + #' @param ind_subset `vector(1)` \cr + #' ID subset of individuals to be used for variable selection. + #' + #' @return + #' The current layer is returned with the resulting model. + #' @export + #' + varSelection = function (ind_subset = NULL) { + layers = self$getKeyClass() + if (nrow(layers)) { + # This code accesses each layer (except MetaLayer) level and + # perform variable selection. + layers = layers[layers$class %in% "TrainLayer", ] + selected = NULL + for (k in layers$key) { + layer = self$getFromHashTable(key = k) + layer_var_sel = layer$varSelection(ind_subset = ind_subset) + selected = rbind(selected, + data.frame(Layer = layer$getId(), + variable = layer_var_sel)) + } + } else { + stop("No existing layer in the current training study.") + } + return(selected) + }, + #' @description + #' Gather target values from all layer. + #' + #' @return + #' A \code{data.frame} containing individuals IDs and corresponding target values. + #' @export + #' + getTargetValues = function() { + layers = self$getKeyClass() + # This code accesses each layer (except TrainMetaLayer) level + # and get the target variable + layers = layers[layers$class %in% "TrainLayer", ] + target_data = NULL + train_data = NULL + for (k in layers$key) { + layer = self$getFromHashTable(key = k) + target_data = as.data.frame(rbind(target_data, + layer$getTargetValues())) + train_data = layer$getTrainData() + } + target_data = target_data[!duplicated(target_data[ , train_data$getIndCol()]), ] + return(target_data) + }, + #' @description + #' Gather individual IDs from all layer. + #' + #' @return + #' A \code{data.frame} containing individuals IDs. + #' @export + #' + getIndIDs = function() { + layers = self$getKeyClass() + # This code accesses each layer (except TrainMetaLayer) level + # and get the individual IDs. + layers = layers[layers$class %in% "TrainLayer", ] + ids_data = NULL + current_data = NULL + for (k in layers$key) { + layer = self$getFromHashTable(key = k) + ids_data = as.data.frame(rbind(ids_data, + layer$getIndIDs())) + } + ids_data = ids_data[!duplicated(ids_data[ , 1L]), , + drop = FALSE] + return(ids_data) + }, + #' @description + #' Getter of the meta layer. + #' + #' @return + #' Object from class [TrainMetaLayer] + #' @export + #' + getTrainMetaLayer = function () { + layers = self$getKeyClass() + meta_layer_key = layers[layers$class == "TrainMetaLayer" , "key"] + meta_layer = self$getFromHashTable(key = meta_layer_key) + return(meta_layer) + }, + #' @description + #' Getter of the individual column name. + #' @export + getIndCol = function () { + return(private$ind_col) + }, + #' @description + #' Getter of the target variable name. + #' @export + getTarget = function () { + return(private$target) + }, + #' @description + #' Increase the number of trained layer. + increaseNbTrainedLayer = function () { + private$nb_trained_layer = private$nb_trained_layer + 1L + if (private$nb_trained_layer == length(private$hash_table)) { + private$status = TRUE + } + }, + #' @description + #' Test that individuals overlap over layers. + #' At least five individuals must overlapped. + #' + #' @export + #' + test_overlap = function () { + layers = self$getKeyClass() + # This code accesses each layer (except TrainMetaLayer) level + # and get the individual IDs. + layers = layers[layers$class %in% "TrainLayer", ] + ids_data = NULL + current_data = NULL + for (k in layers$key) { + layer = self$getFromHashTable(key = k) + ids_data = as.data.frame(rbind(ids_data, + layer$getIndIDs())) + } + if (sum(duplicated(ids_data[ , 1L])) > 5L) { + return(TRUE) + } else { + return(FALSE) + } + }, + #' @description + #' UpSet plot to show an overview of the overlap of individuals across various layers. + #' + #' @param ... \cr + #' Further parameters to be passed to the the \code{upset} function from package \code{UpSetR}. + #' + #' @export + #' + upset = function (...) { + layers = self$getKeyClass() + # This code accesses each layer (except TrainMetaLayer) level + # and get the individual IDs. + layers = layers[layers$class %in% "TrainLayer", ] + ids_list = lapply(layers$key, function (k) { + layer = self$getFromHashTable(key = k) + return(layer$getIndIDs()[ , 1L]) + }) + param_upset = list(...) + from_list_ids = do.call(eval(parse(text = "UpSetR::fromList")), + list(input = ids_list)) + names(from_list_ids) = layers$key + param_upset$data = from_list_ids + print(do.call(eval(parse(text = "UpSetR::upset")), + param_upset)) + invisible(TRUE) + }, + #' @description + #' Generate study summary + #' + #' @export + #' + summary = function () { + cat(sprintf("Study %s\n", self$getId())) + cat("----------------\n") + self$print() + cat("----------------\n") + cat("\n") + layers = self$getKeyClass() + for (k in layers$key) { + layer = self$getFromHashTable(key = k) + layer$summary() + cat("\n") + } + } + ), + private = list( + ind_col = character(0L), + target = character(0L), + nb_trained_layer = 0L, + status = FALSE + ), + cloneable = FALSE ) diff --git a/R/VarSel.R b/R/VarSel.R index 8bdd651..755c2cf 100644 --- a/R/VarSel.R +++ b/R/VarSel.R @@ -57,8 +57,17 @@ VarSel <- R6Class("VarSel", cat(sprintf("VarSel : %s\n", private$id)) cat(sprintf("TrainLayer : %s\n", private$train_layer$getId())) cat(sprintf("Package : %s\n", private$package)) - cat(sprintf("Var. Sel. fct. : %s\n", private$lrn_fct)) - cat(sprintf("Param id : %s\n", private$param$id)) + cat(sprintf("Function : %s\n", private$varsel_fct)) + }, + #' @description + #' Summary + #' @param ... (any) \cr + #' + summary = function (...) { + cat(sprintf(" VarSel : %s\n", private$id)) + cat(sprintf(" TrainLayer : %s\n", private$train_layer$getId())) + cat(sprintf(" Package : %s\n", private$package)) + cat(sprintf(" Function : %s\n", private$varsel_fct)) }, #' @description #' Tains the current learner (from class [Lrner]) on the current training data (from class [TrainData]). diff --git a/man/Lrner.Rd b/man/Lrner.Rd index 475097c..2fbb263 100644 --- a/man/Lrner.Rd +++ b/man/Lrner.Rd @@ -12,10 +12,13 @@ This class implements a learner. A \link{Lrner} object can only exist as a compo \itemize{ \item \href{#method-Lrner-new}{\code{Lrner$new()}} \item \href{#method-Lrner-print}{\code{Lrner$print()}} +\item \href{#method-Lrner-summary}{\code{Lrner$summary()}} \item \href{#method-Lrner-train}{\code{Lrner$train()}} \item \href{#method-Lrner-getTrainLayer}{\code{Lrner$getTrainLayer()}} \item \href{#method-Lrner-getId}{\code{Lrner$getId()}} \item \href{#method-Lrner-getPackage}{\code{Lrner$getPackage()}} +\item \href{#method-Lrner-getIndSubset}{\code{Lrner$getIndSubset()}} +\item \href{#method-Lrner-getVarSubset}{\code{Lrner$getVarSubset()}} } } \if{html}{\out{
}} @@ -60,6 +63,23 @@ Printer \if{html}{\out{
}}\preformatted{Lrner$print(...)}\if{html}{\out{
}} } +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{(any) \cr} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Lrner-summary}{}}} +\subsection{Method \code{summary()}}{ +Printer +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Lrner$summary(...)}\if{html}{\out{
}} +} + \subsection{Arguments}{ \if{html}{\out{
}} \describe{ @@ -127,6 +147,32 @@ Getter of the learner package implementing the learn function. \if{html}{\out{
}}\preformatted{Lrner$getPackage()}\if{html}{\out{
}} } +\subsection{Returns}{ +The name of the package implementing the learn function. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Lrner-getIndSubset}{}}} +\subsection{Method \code{getIndSubset()}}{ +Getter of the learner package implementing the learn function. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Lrner$getIndSubset()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +The name of the package implementing the learn function. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Lrner-getVarSubset}{}}} +\subsection{Method \code{getVarSubset()}}{ +Getter of the learner package implementing the learn function. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Lrner$getVarSubset()}\if{html}{\out{
}} +} + \subsection{Returns}{ The name of the package implementing the learn function. } diff --git a/man/Model.Rd b/man/Model.Rd index 0388a04..b1a93d1 100644 --- a/man/Model.Rd +++ b/man/Model.Rd @@ -15,6 +15,7 @@ A \link{Model} object can compute predictions for a \link{NewData} object. See t \itemize{ \item \href{#method-Model-new}{\code{Model$new()}} \item \href{#method-Model-print}{\code{Model$print()}} +\item \href{#method-Model-summary}{\code{Model$summary()}} \item \href{#method-Model-getBaseModel}{\code{Model$getBaseModel()}} \item \href{#method-Model-getTrainData}{\code{Model$getTrainData()}} \item \href{#method-Model-getTrainLabel}{\code{Model$getTrainLabel()}} @@ -63,6 +64,23 @@ Printer \if{html}{\out{
}}\preformatted{Model$print(...)}\if{html}{\out{
}} } +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{(any) \cr} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Model-summary}{}}} +\subsection{Method \code{summary()}}{ +Summary +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Model$summary(...)}\if{html}{\out{
}} +} + \subsection{Arguments}{ \if{html}{\out{
}} \describe{ diff --git a/man/NewLayer.Rd b/man/NewLayer.Rd index 464c3ef..e26ce78 100644 --- a/man/NewLayer.Rd +++ b/man/NewLayer.Rd @@ -23,6 +23,7 @@ A predicted layer can only contain \link{NewData}. \item \href{#method-NewLayer-getIndIDs}{\code{NewLayer$getIndIDs()}} \item \href{#method-NewLayer-getNewData}{\code{NewLayer$getNewData()}} \item \href{#method-NewLayer-checkNewDataExist}{\code{NewLayer$checkNewDataExist()}} +\item \href{#method-NewLayer-summary}{\code{NewLayer$summary()}} } } \if{html}{\out{ @@ -126,5 +127,15 @@ Check whether a new data has been already stored. \subsection{Returns}{ Boolean value } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-NewLayer-summary}{}}} +\subsection{Method \code{summary()}}{ +Generate summary. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{NewLayer$summary()}\if{html}{\out{
}} +} + } } diff --git a/man/PredictLayer.Rd b/man/PredictLayer.Rd index 6e268a6..474428d 100644 --- a/man/PredictLayer.Rd +++ b/man/PredictLayer.Rd @@ -23,6 +23,7 @@ A predicted layer can only contain \link{PredictData}. \item \href{#method-PredictLayer-getIndIDs}{\code{PredictLayer$getIndIDs()}} \item \href{#method-PredictLayer-getPredictData}{\code{PredictLayer$getPredictData()}} \item \href{#method-PredictLayer-setPredictStudy}{\code{PredictLayer$setPredictStudy()}} +\item \href{#method-PredictLayer-summary}{\code{PredictLayer$summary()}} } } \if{html}{\out{ @@ -131,5 +132,15 @@ Assigns a predicted study to the predicted layer. \subsection{Returns}{ The current object } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PredictLayer-summary}{}}} +\subsection{Method \code{summary()}}{ +Generate summary. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{PredictLayer$summary()}\if{html}{\out{
}} +} + } } diff --git a/man/TrainData.Rd b/man/TrainData.Rd index 63eb3de..67d1501 100644 --- a/man/TrainData.Rd +++ b/man/TrainData.Rd @@ -18,6 +18,7 @@ exist as a component of a \link{TrainLayer} or a \link{TrainMetaLayer} object. \itemize{ \item \href{#method-TrainData-new}{\code{TrainData$new()}} \item \href{#method-TrainData-print}{\code{TrainData$print()}} +\item \href{#method-TrainData-summary}{\code{TrainData$summary()}} \item \href{#method-TrainData-getData}{\code{TrainData$getData()}} \item \href{#method-TrainData-getTargetValues}{\code{TrainData$getTargetValues()}} \item \href{#method-TrainData-getTargetName}{\code{TrainData$getTargetName()}} @@ -73,6 +74,23 @@ Printer \if{html}{\out{
}}\preformatted{TrainData$print(...)}\if{html}{\out{
}} } +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{(any) \cr} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TrainData-summary}{}}} +\subsection{Method \code{summary()}}{ +Summary +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TrainData$summary(...)}\if{html}{\out{
}} +} + \subsection{Arguments}{ \if{html}{\out{
}} \describe{ diff --git a/man/TrainLayer.Rd b/man/TrainLayer.Rd index 05d8e69..625575c 100644 --- a/man/TrainLayer.Rd +++ b/man/TrainLayer.Rd @@ -43,6 +43,7 @@ A layer can make predictions for a new layer passed as argument to its predict f \item \href{#method-TrainLayer-getPredictions}{\code{TrainLayer$getPredictions()}} \item \href{#method-TrainLayer-checkLrnerExist}{\code{TrainLayer$checkLrnerExist()}} \item \href{#method-TrainLayer-checkTrainDataExist}{\code{TrainLayer$checkTrainDataExist()}} +\item \href{#method-TrainLayer-summary}{\code{TrainLayer$summary()}} } } \if{html}{\out{ @@ -304,5 +305,15 @@ Check whether a training data has been already stored. \subsection{Returns}{ Boolean value } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TrainLayer-summary}{}}} +\subsection{Method \code{summary()}}{ +Generate summary. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TrainLayer$summary()}\if{html}{\out{
}} +} + } } diff --git a/man/TrainMetaLayer.Rd b/man/TrainMetaLayer.Rd index f1f3fd1..4c90418 100644 --- a/man/TrainMetaLayer.Rd +++ b/man/TrainMetaLayer.Rd @@ -39,6 +39,7 @@ The meta layer can predict values given a new meta layer. \item \href{#method-TrainMetaLayer-checkLrnerExist}{\code{TrainMetaLayer$checkLrnerExist()}} \item \href{#method-TrainMetaLayer-checkTrainDataExist}{\code{TrainMetaLayer$checkTrainDataExist()}} \item \href{#method-TrainMetaLayer-set2NotTrained}{\code{TrainMetaLayer$set2NotTrained()}} +\item \href{#method-TrainMetaLayer-summary}{\code{TrainMetaLayer$summary()}} } } \if{html}{\out{ @@ -297,5 +298,15 @@ Only usefull to reset status FALSE after cross validation. \if{html}{\out{
}}\preformatted{TrainMetaLayer$set2NotTrained()}\if{html}{\out{
}} } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TrainMetaLayer-summary}{}}} +\subsection{Method \code{summary()}}{ +Generate summary. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TrainMetaLayer$summary()}\if{html}{\out{
}} +} + } } diff --git a/man/TrainStudy.Rd b/man/TrainStudy.Rd index d422d81..e34d44d 100644 --- a/man/TrainStudy.Rd +++ b/man/TrainStudy.Rd @@ -51,6 +51,9 @@ a new study. \item \href{#method-TrainStudy-getIndCol}{\code{TrainStudy$getIndCol()}} \item \href{#method-TrainStudy-getTarget}{\code{TrainStudy$getTarget()}} \item \href{#method-TrainStudy-increaseNbTrainedLayer}{\code{TrainStudy$increaseNbTrainedLayer()}} +\item \href{#method-TrainStudy-test_overlap}{\code{TrainStudy$test_overlap()}} +\item \href{#method-TrainStudy-upset}{\code{TrainStudy$upset()}} +\item \href{#method-TrainStudy-summary}{\code{TrainStudy$summary()}} } } \if{html}{\out{ @@ -329,5 +332,44 @@ Increase the number of trained layer. \if{html}{\out{
}}\preformatted{TrainStudy$increaseNbTrainedLayer()}\if{html}{\out{
}} } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TrainStudy-test_overlap}{}}} +\subsection{Method \code{test_overlap()}}{ +Test that individuals overlap over layers. +At least five individuals must overlapped. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TrainStudy$test_overlap()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TrainStudy-upset}{}}} +\subsection{Method \code{upset()}}{ +UpSet plot to show an overview of the overlap of individuals across various layers. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TrainStudy$upset(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{\cr +Further parameters to be passed to the the \code{upset} function from package \code{UpSetR}.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TrainStudy-summary}{}}} +\subsection{Method \code{summary()}}{ +Generate study summary +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TrainStudy$summary()}\if{html}{\out{
}} +} + } } diff --git a/man/VarSel.Rd b/man/VarSel.Rd index 1c9e849..499283a 100644 --- a/man/VarSel.Rd +++ b/man/VarSel.Rd @@ -12,6 +12,7 @@ This class implements a learner. A \link{VarSel} object can only exist as a comp \itemize{ \item \href{#method-VarSel-new}{\code{VarSel$new()}} \item \href{#method-VarSel-print}{\code{VarSel$print()}} +\item \href{#method-VarSel-summary}{\code{VarSel$summary()}} \item \href{#method-VarSel-varSelection}{\code{VarSel$varSelection()}} \item \href{#method-VarSel-getTrainLayer}{\code{VarSel$getTrainLayer()}} \item \href{#method-VarSel-getId}{\code{VarSel$getId()}} @@ -62,6 +63,23 @@ Printer \if{html}{\out{
}}\preformatted{VarSel$print(...)}\if{html}{\out{
}} } +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{(any) \cr} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-VarSel-summary}{}}} +\subsection{Method \code{summary()}}{ +Summary +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{VarSel$summary(...)}\if{html}{\out{
}} +} + \subsection{Arguments}{ \if{html}{\out{
}} \describe{