diff --git a/DESCRIPTION b/DESCRIPTION index fc79e6496..68cfb6741 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.13.0.18 +Version: 0.13.0.19 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531")), diff --git a/NEWS.md b/NEWS.md index 6f85cf237..35e549ffa 100644 --- a/NEWS.md +++ b/NEWS.md @@ -39,6 +39,9 @@ CHANGES * `data_summary()` also accepts the results of `bayestestR::ci()` as summary function (#483). +* `ranktransform()` has a new argument `zeros` to determine how zeros should be + handled when `sign = TRUE` (#573). + BUG FIXES * `describe_distribution()` no longer errors if the sample was too sparse to compute diff --git a/R/ranktransform.R b/R/ranktransform.R index f48c07981..b3ccce87c 100644 --- a/R/ranktransform.R +++ b/R/ranktransform.R @@ -10,6 +10,9 @@ #' @param method Treatment of ties. Can be one of `"average"` (default), #' `"first"`, `"last"`, `"random"`, `"max"` or `"min"`. See [rank()] for #' details. +#' @param zeros How to handle zeros. If `"na"` (default), they are marked as +#' `NA`. If `"signrank"`, they are kept during the ranking and marked as zeros. +#' This is only used when `sign = TRUE`. #' @param ... Arguments passed to or from other methods. #' @inheritParams extract_column_names #' @inheritParams standardize.data.frame @@ -19,8 +22,11 @@ #' @examples #' ranktransform(c(0, 1, 5, -5, -2)) #' -#' # Won't work -#' # ranktransform(c(0, 1, 5, -5, -2), sign = TRUE) +#' # By default, zeros are converted to NA +#' suppressWarnings( +#' ranktransform(c(0, 1, 5, -5, -2), sign = TRUE) +#' ) +#' ranktransform(c(0, 1, 5, -5, -2), sign = TRUE, zeros = "signrank") #' #' head(ranktransform(trees)) #' @return A rank-transformed object. @@ -38,6 +44,7 @@ ranktransform <- function(x, ...) { ranktransform.numeric <- function(x, sign = FALSE, method = "average", + zeros = "na", verbose = TRUE, ...) { # no change if all values are `NA`s @@ -45,6 +52,12 @@ ranktransform.numeric <- function(x, return(x) } + zeros <- match.arg(zeros, c("na", "signrank")) + method <- match.arg( + method, + c("average", "first", "last", "random", "max", "min") + ) + # Warning if only one value and return early if (insight::has_single_value(x)) { if (is.null(names(x))) { @@ -74,12 +87,18 @@ ranktransform.numeric <- function(x, } } - if (sign) { - ZEROES <- x == 0 - if (any(ZEROES) && verbose) insight::format_warning("Zeros detected. These cannot be sign-rank transformed.") - out <- rep(NA, length(x)) - out[!ZEROES] <- sign(x[!ZEROES]) * rank(abs(x[!ZEROES]), ties.method = method, na.last = "keep") + if (zeros == "na") { + out <- rep(NA, length(x)) + ZEROES <- x == 0 + if (any(ZEROES) && verbose) insight::format_warning("Zeros detected. These cannot be sign-rank transformed.") + out[!ZEROES] <- sign(x[!ZEROES]) * rank(abs(x[!ZEROES]), + ties.method = method, + na.last = "keep" + ) + } else if (zeros == "signrank") { + out <- sign(x) * rank(abs(x), ties.method = method, na.last = "keep") + } } else { out <- rank(x, ties.method = method, na.last = "keep") } @@ -102,6 +121,7 @@ ranktransform.grouped_df <- function(x, method = "average", ignore_case = FALSE, regex = FALSE, + zeros = "na", verbose = TRUE, ...) { info <- attributes(x) @@ -143,6 +163,7 @@ ranktransform.data.frame <- function(x, method = "average", ignore_case = FALSE, regex = FALSE, + zeros = "na", verbose = TRUE, ...) { # evaluate arguments diff --git a/man/ranktransform.Rd b/man/ranktransform.Rd index 44b0052c4..6bb7cfdc3 100644 --- a/man/ranktransform.Rd +++ b/man/ranktransform.Rd @@ -8,7 +8,14 @@ \usage{ ranktransform(x, ...) -\method{ranktransform}{numeric}(x, sign = FALSE, method = "average", verbose = TRUE, ...) +\method{ranktransform}{numeric}( + x, + sign = FALSE, + method = "average", + zeros = "na", + verbose = TRUE, + ... +) \method{ranktransform}{data.frame}( x, @@ -18,6 +25,7 @@ ranktransform(x, ...) method = "average", ignore_case = FALSE, regex = FALSE, + zeros = "na", verbose = TRUE, ... ) @@ -33,6 +41,10 @@ ranktransform(x, ...) \code{"first"}, \code{"last"}, \code{"random"}, \code{"max"} or \code{"min"}. See \code{\link[=rank]{rank()}} for details.} +\item{zeros}{How to handle zeros. If \code{"na"} (default), they are marked as +\code{NA}. If \code{"signrank"}, they are kept during the ranking and marked as zeros. +This is only used when \code{sign = TRUE}.} + \item{verbose}{Toggle warnings.} \item{select}{Variables that will be included when performing the required @@ -110,8 +122,11 @@ from the input data frame. \examples{ ranktransform(c(0, 1, 5, -5, -2)) -# Won't work -# ranktransform(c(0, 1, 5, -5, -2), sign = TRUE) +# By default, zeros are converted to NA +suppressWarnings( + ranktransform(c(0, 1, 5, -5, -2), sign = TRUE) +) +ranktransform(c(0, 1, 5, -5, -2), sign = TRUE, zeros = "signrank") head(ranktransform(trees)) } diff --git a/tests/testthat/test-ranktransform.R b/tests/testthat/test-ranktransform.R index 5996ba4f3..fc845cf11 100644 --- a/tests/testthat/test-ranktransform.R +++ b/tests/testthat/test-ranktransform.R @@ -46,6 +46,22 @@ test_that("signed rank works as expected", { )))) }) +test_that("argument 'zeros' works", { + x <- c(-1, 0, 2, -3, 4) + expect_warning( + ranktransform(x, sign = TRUE), + "cannot be sign-rank" + ) + expect_identical( + ranktransform(x, sign = TRUE, zeros = "signrank"), + c(-2, 0, 3, -4, 5) + ) + expect_error( + ranktransform(x, sign = TRUE, zeros = "foo"), + "should be one of" + ) +}) + test_that("ranktransform works with data frames", { set.seed(123) expect_snapshot(ranktransform(BOD))