Skip to content

Commit

Permalink
Remove col_0_1 param from PipeOpImpute.Ref ModelOriented#38
Browse files Browse the repository at this point in the history
  • Loading branch information
jjanborowka committed Sep 3, 2020
1 parent a55e867 commit adcbbec
Show file tree
Hide file tree
Showing 26 changed files with 79 additions and 130 deletions.
14 changes: 5 additions & 9 deletions EMMA_package/EMMA/R/PipeOPAmelia.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
#' Number indicating level of the empirical (or ridge) prior. This prior shrinks the covariances of the data, but keeps the means and variances the same for problems of high missingness, small N's or large correlations among the variables. Should be kept small, perhaps 0.5 to 1 percent of the rows of the data; a reasonable upper bound is around 10 percent of the rows of the data. If empir is not set, empir=nrow(df)*0.015, default \code{NULL}.
#' \item \code{parallel} :: \code{double(1)}\cr
#' If true parallel calculation is used, default \code{TRUE}.
#' \item \code{col_0_1} :: \code{logical(1)}\cr
#' Decides whether to add a bonus column informing where values were imputed. 0 - value was in dataset, 1 - value was imputed, default \code{FALSE}.
#' \item \code{out_fill} :: \code{character(1)}\cr
#' Output log file location. If file already exists log message will be added. If NULL no log will be produced, default \code{NULL}.
#'}
Expand All @@ -37,16 +35,16 @@
PipeOpAmelia <- R6::R6Class("Amelia_imputation",lock_objects=FALSE,
inherit = PipeOpImpute, # inherit from PipeOp
public = list(
initialize = function(id = "imput_Amelia", col_0_1=FALSE,polytime=NULL,splinetime=NULL,intercs=FALSE,empir=NULL,m=3,parallel=TRUE,out_file=NULL
initialize = function(id = "imput_Amelia", polytime=NULL,splinetime=NULL,intercs=FALSE,empir=NULL,m=3,parallel=TRUE,out_file=NULL
) {
super$initialize(id, whole_task_dependent=TRUE,param_vals = list(col_0_1=col_0_1,polytime=polytime,splinetime=splinetime,intercs=intercs,empir=empir,m=m,parallel=parallel,out_file=out_file),
super$initialize(id, whole_task_dependent=TRUE,param_vals = list(polytime=polytime,splinetime=splinetime,intercs=intercs,empir=empir,m=m,parallel=parallel,out_file=out_file),
param_set= ParamSet$new(list(
'polytime'=ParamUty$new('polytime', default = NULL, tags = 'amelia'),
'splinetime'=ParamUty$new('splinetime',default = NULL,tags='amelia'),
'empir'=ParamUty$new('empir',default = NULL,tags='amelia'),
'parallel'=ParamLgl$new('parallel',default = TRUE,tags = 'amelia'),
'intercs'=ParamLgl$new('intercs',default = FALSE,tags='amelia'),
'col_0_1'=ParamLgl$new('col_0_1',default = F,tags='amelia'),

'm'=ParamInt$new('m',lower = 1,upper = Inf,default = 3,tags='amelia'),
'out_file'=ParamUty$new('out_file',default = NULL,tags = 'amelia')

Expand Down Expand Up @@ -88,7 +86,7 @@ PipeOpAmelia <- R6::R6Class("Amelia_imputation",lock_objects=FALSE,
col_no_miss <- colnames(data_to_impute)[percent_of_missing==0]


data_imputed <- autotune_Amelia(data_to_impute,col_type,percent_of_missing,col_0_1 = self$param_set$values$col_0_1,
data_imputed <- autotune_Amelia(data_to_impute,col_type,percent_of_missing,
parallel = self$param_set$values$parallel,polytime = self$param_set$values$polytime,
splinetime = self$param_set$values$splinetime, intercs = self$param_set$values$intercs,
empir = self$param_set$values$empir,m=self$param_set$values$m,
Expand Down Expand Up @@ -141,7 +139,7 @@ PipeOpAmelia <- R6::R6Class("Amelia_imputation",lock_objects=FALSE,
col_no_miss <- colnames(data_to_impute)[percent_of_missing==0]


data_imputed <- autotune_Amelia(data_to_impute,col_type,percent_of_missing,col_0_1 = self$param_set$values$col_0_1,
data_imputed <- autotune_Amelia(data_to_impute,col_type,percent_of_missing,
parallel = self$param_set$values$parallel,polytime = self$param_set$values$polytime,
splinetime = self$param_set$values$splinetime, intercs = self$param_set$values$intercs,
empir = self$param_set$values$empir,m=self$param_set$values$m,
Expand Down Expand Up @@ -205,7 +203,5 @@ mlr_pipeops$add("Amelia_imputation", PipeOpAmelia)
# #




# resample(task,graph_learner,rsmp("holdout"))
#
8 changes: 6 additions & 2 deletions EMMA_package/EMMA/R/PipeOPAmelia_T.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ PipeOpAmelia_T <- R6::R6Class("Amelia_imputation",lock_objects=FALSE,
empir = self$param_set$values$empir,m=self$param_set$values$m,
out_file=self$param_set$values$out_file)

task$cbind(as.data.table(cbind(targer,data_imputed)))
data_imputed <- cbind(data_imputed,task$row_ids)
colnames(data_imputed)[ncol(data_imputed)] <- task$backend$primary_key
task$cbind(as.data.table(data_imputed))

},
.predict_task=function(task){
Expand All @@ -110,7 +112,9 @@ PipeOpAmelia_T <- R6::R6Class("Amelia_imputation",lock_objects=FALSE,
out_file=self$param_set$values$out_file)


task$cbind(as.data.table(cbind(targer,data_imputed)))
data_imputed <- cbind(data_imputed,task$row_ids)
colnames(data_imputed)[ncol(data_imputed)] <- task$backend$primary_key
task$cbind(as.data.table(data_imputed))



Expand Down
12 changes: 5 additions & 7 deletions EMMA_package/EMMA/R/PipeOPVIM_IRMI.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
#' Threshold for convergency, default \code{5}.
#' \item \code{maxit} :: \code{integer(1)}\cr
#' Maximum number of iterations, default \code{100}
#' \item \code{col_0_1} :: \code{logical(1)}\cr
#' Decides if add bonus column informing where imputation has been done. 0 - value was in dataset, 1 - value was imputed, default \code{FALSE}.
#' \item \code{step} :: \code{logical(1)}\cr
#' Stepwise model selection is applied when the parameter is set to TRUE, default \code{FALSE}.
#' \item \code{robust} :: \code{logical(1)}\cr
Expand All @@ -38,14 +36,14 @@
PipeOpVIM_IRMI <- R6::R6Class("VIM_IRMI_imputation",lock_objects=FALSE,
inherit = PipeOpImpute, # inherit from PipeOp
public = list(
initialize = function(id = "imput_VIM_IRMI",eps=5,maxit=100,step=FALSE,robust=FALSE,init.method='kNN',force=FALSE,col_0_1= FALSE,
initialize = function(id = "imput_VIM_IRMI",eps=5,maxit=100,step=FALSE,robust=FALSE,init.method='kNN',force=FALSE,
out_file=NULL
) {
super$initialize(id, whole_task_dependent=TRUE, param_vals = list( col_0_1=col_0_1,eps=eps,maxit=maxit,step=step,robust=robust,
super$initialize(id, whole_task_dependent=TRUE, param_vals = list( eps=eps,maxit=maxit,step=step,robust=robust,
init.method=init.method,force=force,out_file=out_file),
param_set= ParamSet$new(list(

'col_0_1'=ParamLgl$new('col_0_1',default = F,tags='VIM_IRMI'),

'eps'=ParamDbl$new('eps',lower = 0,upper = Inf,default = 5,tags = 'VIM_IRMI'),
'maxit'=ParamInt$new('maxit',lower = 10,upper = Inf,default = 100,tags = 'VIM_IRMI'),
'step'=ParamLgl$new('step',default = FALSE,tags = 'VIM_IRMI'),
Expand Down Expand Up @@ -87,7 +85,7 @@ PipeOpVIM_IRMI <- R6::R6Class("VIM_IRMI_imputation",lock_objects=FALSE,


data_imputed <- autotune_VIM_Irmi(data_to_impute,col_type,percent_of_missing,eps = self$param_set$values$eps,maxit = self$param_set$values$maxit,
step = self$param_set$values$step,robust = self$param_set$values$robust,col_0_1 = self$param_set$values$col_0_1,
step = self$param_set$values$step,robust = self$param_set$values$robust,
init.method = self$param_set$values$init.method,force = self$param_set$values$force,
out_file =self$param_set$values$out_file)

Expand Down Expand Up @@ -138,7 +136,7 @@ PipeOpVIM_IRMI <- R6::R6Class("VIM_IRMI_imputation",lock_objects=FALSE,


data_imputed <- autotune_VIM_Irmi(data_to_impute,col_type,percent_of_missing,eps = self$param_set$values$eps,maxit = self$param_set$values$maxit,
step = self$param_set$values$step,robust = self$param_set$values$robust,col_0_1 = self$param_set$values$col_0_1,
step = self$param_set$values$step,robust = self$param_set$values$robust,
init.method = self$param_set$values$init.method,force = self$param_set$values$force,
out_file =self$param_set$values$out_file)

Expand Down
11 changes: 4 additions & 7 deletions EMMA_package/EMMA/R/PipeOPmissForest.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
#' Vector with \emph{number of variables} values randomly sampled at each split, used only when optimize=TRUE, default \code{NULL}.
#' \item \code{parallel} :: \code{logical(1)}\cr
#' If TRUE parallel calculations are used, default \code{FALSE}.
#' \item \code{col_0_1} :: \code{logical(1)}\cr
#' Decides whether to add a bonus column informing where values were imputed. 0 - value was in dataset, 1 - value was imputed, default \code{FALSE}.
#' \item \code{ntree} :: \code{integer(1)}\cr
#' ntree from missForest function, default \code{100}.
#' \item \code{optimize} :: \code{logical(1)}\cr
Expand All @@ -44,17 +42,16 @@ PipeOpmissForest <- R6::R6Class("missForest_imputation",lock_objects=FALSE,
inherit = PipeOpImpute, # inherit from PipeOp
public = list(
initialize = function(id = "imput_missForest", cores=NULL,ntree_set=c(100,200,500,1000),mtry_set=NULL,parallel=F
,col_0_1=FALSE,mtry=NULL,ntree=100,optimize=FALSE,maxiter=20,maxnodes=NULL,out_file=NULL
,mtry=NULL,ntree=100,optimize=FALSE,maxiter=20,maxnodes=NULL,out_file=NULL
) {
super$initialize(id,whole_task_dependent=TRUE,param_vals = list(cores =cores,ntree_set =ntree_set,mtry_set=mtry_set,parallel=parallel,
col_0_1=col_0_1,mtry=mtry,ntree=ntree,optimize=optimize,
mtry=mtry,ntree=ntree,optimize=optimize,
maxiter=maxiter,maxnodes=maxnodes,out_file=out_file),
param_set= ParamSet$new(list(
'ntree_set'=ParamUty$new('ntree_set', default = c(100,200,500,1000), tags = 'missForest'),
'cores'=ParamUty$new('cores',default = NULL,tags='missForest'),
'mtry_set'=ParamUty$new('mtry_set',default = NULL,tags='missForest'),
'parallel'=ParamLgl$new('parallel',default = FALSE,tags = 'missForest'),
'col_0_1'=ParamLgl$new('col_0_1',default = F,tags='missForest'),
'mtry'=ParamUty$new('mtry',default = NULL,tags='missForest'),
'ntree'=ParamInt$new('ntree',lower = 10,upper = Inf,default = 100,tags='missForest'),
'optimize'=ParamLgl$new('optimize',default = FALSE,tags='missForest'),
Expand Down Expand Up @@ -100,7 +97,7 @@ PipeOpmissForest <- R6::R6Class("missForest_imputation",lock_objects=FALSE,
data_imputed <- autotune_missForest(data_to_impute,col_type,percent_of_missing = percent_of_missing,cores = self$param_set$values$cores,
ntree_set = self$param_set$values$ntree_set,mtry_set = self$param_set$values$mtry_set,
parallel = self$param_set$values$parallel,
col_0_1 = self$param_set$values$col_0_1,optimize = self$param_set$values$optimize,
optimize = self$param_set$values$optimize,
ntree = self$param_set$values$ntree,mtry = self$param_set$values$mtry,
maxiter=self$param_set$values$maxiter,maxnodes=self$param_set$values$maxnodes,verbose = F,
out_file =self$param_set$values$out_file)
Expand Down Expand Up @@ -156,7 +153,7 @@ PipeOpmissForest <- R6::R6Class("missForest_imputation",lock_objects=FALSE,
data_imputed <- autotune_missForest(data_to_impute,col_type,percent_of_missing = percent_of_missing,cores = self$param_set$values$cores,
ntree_set = self$param_set$values$ntree_set,mtry_set = self$param_set$values$mtry_set,
parallel = self$param_set$values$parallel,
col_0_1 = self$param_set$values$col_0_1,optimize = self$param_set$values$optimize,
optimize = self$param_set$values$optimize,
ntree = self$param_set$values$ntree,mtry = self$param_set$values$mtry,
maxiter=self$param_set$values$maxiter,maxnodes=self$param_set$values$maxnodes,verbose = F,
out_file =self$param_set$values$out_file)
Expand Down
Loading

0 comments on commit adcbbec

Please sign in to comment.