Remove col_0_1 param from PipeOpImpute.Ref ModelOriented#38

jjanborowka · Sep 3, 2020 · adcbbec · adcbbec
1 parent a55e867
commit adcbbec
Show file tree

Hide file tree

Showing 26 changed files with 79 additions and 130 deletions.
diff --git a/EMMA_package/EMMA/R/PipeOPAmelia.R b/EMMA_package/EMMA/R/PipeOPAmelia.R
@@ -26,8 +26,6 @@
 #' Number indicating level of the empirical (or ridge) prior. This prior shrinks the covariances of the data, but keeps the means and variances the same for problems of high missingness, small N's or large correlations among the variables. Should be kept small, perhaps 0.5 to 1 percent of the rows of the data; a reasonable upper bound is around 10 percent of the rows of the data. If empir is not set, empir=nrow(df)*0.015, default \code{NULL}.
 #' \item \code{parallel} :: \code{double(1)}\cr
 #' If true parallel calculation is used, default \code{TRUE}.
-#' \item \code{col_0_1} :: \code{logical(1)}\cr
-#' Decides whether to add a bonus column informing where values were imputed. 0 - value was in dataset, 1 - value was imputed, default \code{FALSE}.
 #' \item \code{out_fill} :: \code{character(1)}\cr
 #' Output log file location. If file already exists log message will be added. If NULL no log will be produced, default \code{NULL}.
 #'}
@@ -37,16 +35,16 @@
 PipeOpAmelia <-  R6::R6Class("Amelia_imputation",lock_objects=FALSE,
                                  inherit = PipeOpImpute,  # inherit from PipeOp
                                  public = list(
-                                   initialize = function(id = "imput_Amelia", col_0_1=FALSE,polytime=NULL,splinetime=NULL,intercs=FALSE,empir=NULL,m=3,parallel=TRUE,out_file=NULL
+                                   initialize = function(id = "imput_Amelia", polytime=NULL,splinetime=NULL,intercs=FALSE,empir=NULL,m=3,parallel=TRUE,out_file=NULL
                                    ) {
-                                     super$initialize(id, whole_task_dependent=TRUE,param_vals = list(col_0_1=col_0_1,polytime=polytime,splinetime=splinetime,intercs=intercs,empir=empir,m=m,parallel=parallel,out_file=out_file),
+                                     super$initialize(id, whole_task_dependent=TRUE,param_vals = list(polytime=polytime,splinetime=splinetime,intercs=intercs,empir=empir,m=m,parallel=parallel,out_file=out_file),
                                                       param_set= ParamSet$new(list(
                                                         'polytime'=ParamUty$new('polytime', default = NULL, tags = 'amelia'),
                                                         'splinetime'=ParamUty$new('splinetime',default = NULL,tags='amelia'),
                                                         'empir'=ParamUty$new('empir',default = NULL,tags='amelia'),
                                                         'parallel'=ParamLgl$new('parallel',default = TRUE,tags = 'amelia'),
                                                         'intercs'=ParamLgl$new('intercs',default = FALSE,tags='amelia'),
-                                                        'col_0_1'=ParamLgl$new('col_0_1',default = F,tags='amelia'),
+
                                                         'm'=ParamInt$new('m',lower = 1,upper = Inf,default = 3,tags='amelia'),
                                                         'out_file'=ParamUty$new('out_file',default = NULL,tags = 'amelia')
 
@@ -88,7 +86,7 @@ PipeOpAmelia <-  R6::R6Class("Amelia_imputation",lock_objects=FALSE,
                                          col_no_miss <- colnames(data_to_impute)[percent_of_missing==0]
 
 
-                                         data_imputed <- autotune_Amelia(data_to_impute,col_type,percent_of_missing,col_0_1 = self$param_set$values$col_0_1,
+                                         data_imputed <- autotune_Amelia(data_to_impute,col_type,percent_of_missing,
                                                                          parallel = self$param_set$values$parallel,polytime = self$param_set$values$polytime,
                                                                          splinetime = self$param_set$values$splinetime, intercs = self$param_set$values$intercs,
                                                                          empir = self$param_set$values$empir,m=self$param_set$values$m,
@@ -141,7 +139,7 @@ PipeOpAmelia <-  R6::R6Class("Amelia_imputation",lock_objects=FALSE,
                                          col_no_miss <- colnames(data_to_impute)[percent_of_missing==0]
 
 
-                                         data_imputed <- autotune_Amelia(data_to_impute,col_type,percent_of_missing,col_0_1 = self$param_set$values$col_0_1,
+                                         data_imputed <- autotune_Amelia(data_to_impute,col_type,percent_of_missing,
                                                                          parallel = self$param_set$values$parallel,polytime = self$param_set$values$polytime,
                                                                          splinetime = self$param_set$values$splinetime, intercs = self$param_set$values$intercs,
                                                                          empir = self$param_set$values$empir,m=self$param_set$values$m,
@@ -205,7 +203,5 @@ mlr_pipeops$add("Amelia_imputation", PipeOpAmelia)
 # #
 
 
-
-
 #  resample(task,graph_learner,rsmp("holdout"))
 #
diff --git a/EMMA_package/EMMA/R/PipeOPAmelia_T.R b/EMMA_package/EMMA/R/PipeOPAmelia_T.R
@@ -84,7 +84,9 @@ PipeOpAmelia_T <-  R6::R6Class("Amelia_imputation",lock_objects=FALSE,
                                                                        empir = self$param_set$values$empir,m=self$param_set$values$m,
                                                                        out_file=self$param_set$values$out_file)
 
-                                       task$cbind(as.data.table(cbind(targer,data_imputed)))
+                                       data_imputed <-  cbind(data_imputed,task$row_ids)
+                                       colnames(data_imputed)[ncol(data_imputed)] <- task$backend$primary_key
+                                       task$cbind(as.data.table(data_imputed))
 
                                      },
                                      .predict_task=function(task){
@@ -110,7 +112,9 @@ PipeOpAmelia_T <-  R6::R6Class("Amelia_imputation",lock_objects=FALSE,
                                                                        out_file=self$param_set$values$out_file)
 
 
-                                       task$cbind(as.data.table(cbind(targer,data_imputed)))
+                                       data_imputed <-  cbind(data_imputed,task$row_ids)
+                                       colnames(data_imputed)[ncol(data_imputed)] <- task$backend$primary_key
+                                       task$cbind(as.data.table(data_imputed))
 
 
 

diff --git a/EMMA_package/EMMA/R/PipeOPVIM_IRMI.R b/EMMA_package/EMMA/R/PipeOPVIM_IRMI.R
@@ -17,8 +17,6 @@
 #' Threshold for convergency, default \code{5}.
 #' \item \code{maxit} :: \code{integer(1)}\cr
 #' Maximum number of iterations, default \code{100}
-#' \item \code{col_0_1} :: \code{logical(1)}\cr
-#' Decides if add bonus column informing where imputation has been done. 0 - value was in dataset, 1 - value was imputed, default \code{FALSE}.
 #' \item \code{step} :: \code{logical(1)}\cr
 #' Stepwise model selection is applied when the parameter is set to TRUE, default \code{FALSE}.
 #' \item \code{robust} :: \code{logical(1)}\cr
@@ -38,14 +36,14 @@
 PipeOpVIM_IRMI <-  R6::R6Class("VIM_IRMI_imputation",lock_objects=FALSE,
                              inherit = PipeOpImpute,  # inherit from PipeOp
                              public = list(
-                               initialize = function(id = "imput_VIM_IRMI",eps=5,maxit=100,step=FALSE,robust=FALSE,init.method='kNN',force=FALSE,col_0_1= FALSE,
+                               initialize = function(id = "imput_VIM_IRMI",eps=5,maxit=100,step=FALSE,robust=FALSE,init.method='kNN',force=FALSE,
                                                      out_file=NULL
                                ) {
-                                 super$initialize(id, whole_task_dependent=TRUE, param_vals = list( col_0_1=col_0_1,eps=eps,maxit=maxit,step=step,robust=robust,
+                                 super$initialize(id, whole_task_dependent=TRUE, param_vals = list( eps=eps,maxit=maxit,step=step,robust=robust,
                                                                                                     init.method=init.method,force=force,out_file=out_file),
                                                   param_set= ParamSet$new(list(
 
-                                                    'col_0_1'=ParamLgl$new('col_0_1',default = F,tags='VIM_IRMI'),
+
                                                     'eps'=ParamDbl$new('eps',lower = 0,upper = Inf,default = 5,tags = 'VIM_IRMI'),
                                                     'maxit'=ParamInt$new('maxit',lower = 10,upper = Inf,default = 100,tags = 'VIM_IRMI'),
                                                     'step'=ParamLgl$new('step',default = FALSE,tags = 'VIM_IRMI'),
@@ -87,7 +85,7 @@ PipeOpVIM_IRMI <-  R6::R6Class("VIM_IRMI_imputation",lock_objects=FALSE,
 
 
                                    data_imputed <- autotune_VIM_Irmi(data_to_impute,col_type,percent_of_missing,eps = self$param_set$values$eps,maxit = self$param_set$values$maxit,
-                                                                     step = self$param_set$values$step,robust = self$param_set$values$robust,col_0_1 = self$param_set$values$col_0_1,
+                                                                     step = self$param_set$values$step,robust = self$param_set$values$robust,
                                                                      init.method = self$param_set$values$init.method,force = self$param_set$values$force,
                                                                      out_file =self$param_set$values$out_file)
 
@@ -138,7 +136,7 @@ PipeOpVIM_IRMI <-  R6::R6Class("VIM_IRMI_imputation",lock_objects=FALSE,
 
 
                                    data_imputed <- autotune_VIM_Irmi(data_to_impute,col_type,percent_of_missing,eps = self$param_set$values$eps,maxit = self$param_set$values$maxit,
-                                                                     step = self$param_set$values$step,robust = self$param_set$values$robust,col_0_1 = self$param_set$values$col_0_1,
+                                                                     step = self$param_set$values$step,robust = self$param_set$values$robust,
                                                                      init.method = self$param_set$values$init.method,force = self$param_set$values$force,
                                                                      out_file =self$param_set$values$out_file)
 

diff --git a/EMMA_package/EMMA/R/PipeOPmissForest.R b/EMMA_package/EMMA/R/PipeOPmissForest.R
@@ -22,8 +22,6 @@
 #' Vector with \emph{number of variables} values randomly sampled at each split, used only when optimize=TRUE, default \code{NULL}.
 #' \item \code{parallel} :: \code{logical(1)}\cr
 #' If TRUE parallel calculations are used, default \code{FALSE}.
-#' \item \code{col_0_1} :: \code{logical(1)}\cr
-#' Decides whether to add a bonus column informing where values were imputed. 0 - value was in dataset, 1 - value was imputed, default \code{FALSE}.
 #' \item \code{ntree} :: \code{integer(1)}\cr
 #' ntree from missForest function, default \code{100}.
 #' \item \code{optimize} :: \code{logical(1)}\cr
@@ -44,17 +42,16 @@ PipeOpmissForest <-  R6::R6Class("missForest_imputation",lock_objects=FALSE,
                            inherit = PipeOpImpute,  # inherit from PipeOp
                            public = list(
                              initialize = function(id = "imput_missForest", cores=NULL,ntree_set=c(100,200,500,1000),mtry_set=NULL,parallel=F
-                                                  ,col_0_1=FALSE,mtry=NULL,ntree=100,optimize=FALSE,maxiter=20,maxnodes=NULL,out_file=NULL
+                                                  ,mtry=NULL,ntree=100,optimize=FALSE,maxiter=20,maxnodes=NULL,out_file=NULL
                              ) {
                                super$initialize(id,whole_task_dependent=TRUE,param_vals = list(cores =cores,ntree_set =ntree_set,mtry_set=mtry_set,parallel=parallel,
-                                                                      col_0_1=col_0_1,mtry=mtry,ntree=ntree,optimize=optimize,
+                                                                      mtry=mtry,ntree=ntree,optimize=optimize,
                                                                       maxiter=maxiter,maxnodes=maxnodes,out_file=out_file),
                                                 param_set= ParamSet$new(list(
                                                   'ntree_set'=ParamUty$new('ntree_set', default = c(100,200,500,1000), tags = 'missForest'),
                                                   'cores'=ParamUty$new('cores',default = NULL,tags='missForest'),
                                                   'mtry_set'=ParamUty$new('mtry_set',default = NULL,tags='missForest'),
                                                   'parallel'=ParamLgl$new('parallel',default = FALSE,tags = 'missForest'),
-                                                  'col_0_1'=ParamLgl$new('col_0_1',default = F,tags='missForest'),
                                                   'mtry'=ParamUty$new('mtry',default = NULL,tags='missForest'),
                                                   'ntree'=ParamInt$new('ntree',lower = 10,upper = Inf,default = 100,tags='missForest'),
                                                   'optimize'=ParamLgl$new('optimize',default = FALSE,tags='missForest'),
@@ -100,7 +97,7 @@ PipeOpmissForest <-  R6::R6Class("missForest_imputation",lock_objects=FALSE,
                                  data_imputed <- autotune_missForest(data_to_impute,col_type,percent_of_missing = percent_of_missing,cores = self$param_set$values$cores,
                                                                      ntree_set = self$param_set$values$ntree_set,mtry_set = self$param_set$values$mtry_set,
                                                                      parallel = self$param_set$values$parallel,
-                                                                     col_0_1 = self$param_set$values$col_0_1,optimize = self$param_set$values$optimize,
+                                                                     optimize = self$param_set$values$optimize,
                                                                      ntree = self$param_set$values$ntree,mtry = self$param_set$values$mtry,
                                                                      maxiter=self$param_set$values$maxiter,maxnodes=self$param_set$values$maxnodes,verbose = F,
                                                                      out_file =self$param_set$values$out_file)
@@ -156,7 +153,7 @@ PipeOpmissForest <-  R6::R6Class("missForest_imputation",lock_objects=FALSE,
                                  data_imputed <- autotune_missForest(data_to_impute,col_type,percent_of_missing = percent_of_missing,cores = self$param_set$values$cores,
                                                                      ntree_set = self$param_set$values$ntree_set,mtry_set = self$param_set$values$mtry_set,
                                                                      parallel = self$param_set$values$parallel,
-                                                                     col_0_1 = self$param_set$values$col_0_1,optimize = self$param_set$values$optimize,
+                                                                     optimize = self$param_set$values$optimize,
                                                                      ntree = self$param_set$values$ntree,mtry = self$param_set$values$mtry,
                                                                      maxiter=self$param_set$values$maxiter,maxnodes=self$param_set$values$maxnodes,verbose = F,
                                                                      out_file =self$param_set$values$out_file)