diff --git a/NEWS.md b/NEWS.md index 8b6ba88da..d78e81ee9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,13 @@ # datawizard 0.9.2 +BREAKING CHANGES + +* The following arguments were deprecated in 0.5.0 and are now removed: + + * in `data_to_wide()`: `colnames_from`, `rows_from`, `sep` + * in `data_to_long()`: `colnames_to` + * in `data_partition()`: `training_proportion` + NEW FUNCTIONS * `data_summary()`, to compute summary statistics of (grouped) data frames. @@ -13,7 +21,7 @@ CHANGES argument, to compute weighted frequency tables. `include_na` allows to include or omit missing values from the table. Furthermore, a `by` argument was added, to compute crosstables (#479, #481). - + # datawizard 0.9.1 CHANGES @@ -121,7 +129,7 @@ CHANGES (similar to other data frame methods of transformation functions), to append recoded variables to the input data frame instead of overwriting existing variables. - + NEW FUNCTIONS * `rowid_as_column()` to complement `rownames_as_column()` (and to mimic diff --git a/R/data_partition.R b/R/data_partition.R index 60eb57e35..5953480c5 100644 --- a/R/data_partition.R +++ b/R/data_partition.R @@ -8,7 +8,6 @@ #' @param proportion Scalar (between 0 and 1) or numeric vector, indicating the #' proportion(s) of the training set(s). The sum of `proportion` must not be #' greater than 1. The remaining part will be used for the test set. -#' @param training_proportion Deprecated, please use `proportion`. #' @param group A character vector indicating the name(s) of the column(s) used #' for stratified partitioning. #' @param seed A random number generator seed. Enter an integer (e.g. 123) so @@ -50,7 +49,6 @@ data_partition <- function(data, seed = NULL, row_id = ".row_id", verbose = TRUE, - training_proportion = proportion, ...) { # validation checks data <- .coerce_to_dataframe(data) @@ -132,7 +130,9 @@ data_partition <- function(data, }) # we need to move all list elements one level higher. - if (!is.null(group)) { + if (is.null(group)) { + training_sets <- training_sets[[1]] + } else { # for grouped training sets, we need to row-bind all sampled training # sets from each group. currently, we have a list of data frames, # grouped by "group"; but we want one data frame per proportion that @@ -140,18 +140,16 @@ data_partition <- function(data, training_sets <- lapply(seq_along(proportion), function(p) { do.call(rbind, lapply(training_sets, function(i) i[[p]])) }) - } else { - # else, just move first list element one level higher - training_sets <- training_sets[[1]] } # use probabilies as element names names(training_sets) <- sprintf("p_%g", proportion) # remove all training set id's from data, add remaining data (= test set) + all_ids <- lapply(training_sets, data_extract, select = row_id, as_data_frame = FALSE) out <- c( training_sets, - list(test = data[-unlist(lapply(training_sets, data_extract, select = row_id, as_data_frame = FALSE), use.names = FALSE), ]) + list(test = data[-unlist(all_ids, use.names = FALSE), ]) ) lapply(out, `row.names<-`, NULL) diff --git a/R/data_to_long.R b/R/data_to_long.R index 6dd92d0a8..711793d14 100644 --- a/R/data_to_long.R +++ b/R/data_to_long.R @@ -25,7 +25,6 @@ #' @param cols Identical to `select`. This argument is here to ensure compatibility #' with `tidyr::pivot_longer()`. If both `select` and `cols` are provided, `cols` #' is used. -#' @param colnames_to Deprecated. Use `names_to` instead. #' #' @return If a tibble was provided as input, `reshape_longer()` also returns a #' tibble. Otherwise, it returns a data frame. @@ -78,17 +77,10 @@ data_to_long <- function(data, ignore_case = FALSE, regex = FALSE, ..., - cols, - colnames_to) { - # Check args - if (!missing(colnames_to)) { - .is_deprecated("colnames_to", "names_to") - if (is.null(names_to)) { - names_to <- colnames_to - } - } + cols) { # nolint # Prefer "cols" over "select" for compat with tidyr::pivot_longer + # nolint start if (!missing(cols)) { select <- substitute(cols) cols <- .select_nse( @@ -115,6 +107,7 @@ data_to_long <- function(data, ) } } + # nolint end # nothing to select? if (length(cols) == 0L) { @@ -205,7 +198,7 @@ data_to_long <- function(data, header = FALSE ) names(tmp) <- paste0("V", seq_len(ncol(tmp))) - tmp[tmp == ""] <- NA + tmp[tmp == ""] <- NA # nolint stacked_data$ind <- NULL stacked_data <- cbind(tmp, stacked_data) diff --git a/R/data_to_wide.R b/R/data_to_wide.R index b85072991..e48314866 100644 --- a/R/data_to_wide.R +++ b/R/data_to_wide.R @@ -25,9 +25,6 @@ #' missing values in the new columns created. #' @param verbose Toggle warnings. #' @param ... Not used for now. -#' @param colnames_from Deprecated. Use `names_from` instead. -#' @param rows_from Deprecated. Use `id_cols` instead. -#' @param sep Deprecated. Use `names_sep` instead. #' #' @return If a tibble was provided as input, `reshape_wider()` also returns a #' tibble. Otherwise, it returns a data frame. @@ -93,29 +90,8 @@ data_to_wide <- function(data, names_glue = NULL, values_fill = NULL, verbose = TRUE, - ..., - colnames_from, - rows_from, - sep) { - if (!missing(colnames_from)) { - .is_deprecated("colnames_from", "names_from") - if (is.null(names_from)) { - names_from <- colnames_from - } - } - if (!missing(rows_from)) { - .is_deprecated("rows_from", "id_cols") + ...) { if (is.null(id_cols)) { - id_cols <- rows_from - } - } - if (!missing(sep)) { - .is_deprecated("sep", "names_sep") - if (is.null(names_sep)) { - names_sep <- sep - } - } - if (is.null(id_cols)) { id_cols <- setdiff(names(data), c(names_from, values_from)) } @@ -218,28 +194,26 @@ data_to_wide <- function(data, if (!is.null(values_fill)) { if (length(values_fill) == 1L) { if (is.numeric(new_data[[values_from]])) { - if (!is.numeric(values_fill)) { - insight::format_error(paste0("`values_fill` must be of type numeric.")) - } else { + if (is.numeric(values_fill)) { new_data <- convert_na_to(new_data, replace_num = values_fill) + } else { + insight::format_error(paste0("`values_fill` must be of type numeric.")) } } else if (is.character(new_data[[values_from]])) { - if (!is.character(values_fill)) { - insight::format_error(paste0("`values_fill` must be of type character.")) - } else { + if (is.character(values_fill)) { new_data <- convert_na_to(new_data, replace_char = values_fill) + } else { + insight::format_error(paste0("`values_fill` must be of type character.")) } } else if (is.factor(new_data[[values_from]])) { - if (!is.factor(values_fill)) { - insight::format_error(paste0("`values_fill` must be of type factor.")) - } else { + if (is.factor(values_fill)) { new_data <- convert_na_to(new_data, replace_fac = values_fill) + } else { + insight::format_error(paste0("`values_fill` must be of type factor.")) } } - } else { - if (verbose) { - insight::format_error("`values_fill` must be of length 1.") - } + } else if (verbose) { + insight::format_error("`values_fill` must be of length 1.") } } diff --git a/man/data_partition.Rd b/man/data_partition.Rd index 49ae37536..8e7cae95b 100644 --- a/man/data_partition.Rd +++ b/man/data_partition.Rd @@ -11,7 +11,6 @@ data_partition( seed = NULL, row_id = ".row_id", verbose = TRUE, - training_proportion = proportion, ... ) } @@ -33,8 +32,6 @@ contains the row-id's.} \item{verbose}{Toggle messages and warnings.} -\item{training_proportion}{Deprecated, please use \code{proportion}.} - \item{...}{Other arguments passed to or from other functions.} } \value{ diff --git a/man/data_to_long.Rd b/man/data_to_long.Rd index cc5989dee..f6abe5f39 100644 --- a/man/data_to_long.Rd +++ b/man/data_to_long.Rd @@ -18,8 +18,7 @@ data_to_long( ignore_case = FALSE, regex = FALSE, ..., - cols, - colnames_to + cols ) reshape_longer( @@ -35,8 +34,7 @@ reshape_longer( ignore_case = FALSE, regex = FALSE, ..., - cols, - colnames_to + cols ) } \arguments{ @@ -113,8 +111,6 @@ functions (see 'Details'), this argument may be used as workaround.} \item{cols}{Identical to \code{select}. This argument is here to ensure compatibility with \code{tidyr::pivot_longer()}. If both \code{select} and \code{cols} are provided, \code{cols} is used.} - -\item{colnames_to}{Deprecated. Use \code{names_to} instead.} } \value{ If a tibble was provided as input, \code{reshape_longer()} also returns a diff --git a/man/data_to_wide.Rd b/man/data_to_wide.Rd index 2eaa72439..38ede8352 100644 --- a/man/data_to_wide.Rd +++ b/man/data_to_wide.Rd @@ -15,10 +15,7 @@ data_to_wide( names_glue = NULL, values_fill = NULL, verbose = TRUE, - ..., - colnames_from, - rows_from, - sep + ... ) reshape_wider( @@ -31,10 +28,7 @@ reshape_wider( names_glue = NULL, values_fill = NULL, verbose = TRUE, - ..., - colnames_from, - rows_from, - sep + ... ) } \arguments{ @@ -68,12 +62,6 @@ missing values in the new columns created.} \item{verbose}{Toggle warnings.} \item{...}{Not used for now.} - -\item{colnames_from}{Deprecated. Use \code{names_from} instead.} - -\item{rows_from}{Deprecated. Use \code{id_cols} instead.} - -\item{sep}{Deprecated. Use \code{names_sep} instead.} } \value{ If a tibble was provided as input, \code{reshape_wider()} also returns a