Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New argument zeros in ranktransform() #573

Merged
merged 3 commits into from
Dec 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 0.13.0.18
Version: 0.13.0.19
Authors@R: c(
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531")),
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ CHANGES
* `data_summary()` also accepts the results of `bayestestR::ci()` as summary
function (#483).

* `ranktransform()` has a new argument `zeros` to determine how zeros should be
handled when `sign = TRUE` (#573).

BUG FIXES

* `describe_distribution()` no longer errors if the sample was too sparse to compute
Expand Down
54 changes: 45 additions & 9 deletions R/ranktransform.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
#' @param method Treatment of ties. Can be one of `"average"` (default),
#' `"first"`, `"last"`, `"random"`, `"max"` or `"min"`. See [rank()] for
#' details.
#' @param zeros How to handle zeros. If `"na"` (default), they are marked as
#' `NA`. If `"signrank"`, they are kept during the ranking and marked as zeros.
#' This is only used when `sign = TRUE`.
#' @param ... Arguments passed to or from other methods.
#' @inheritParams extract_column_names
#' @inheritParams standardize.data.frame
Expand All @@ -19,8 +22,11 @@
#' @examples
#' ranktransform(c(0, 1, 5, -5, -2))
#'
#' # Won't work
#' # ranktransform(c(0, 1, 5, -5, -2), sign = TRUE)
#' # By default, zeros are converted to NA
#' suppressWarnings(
#' ranktransform(c(0, 1, 5, -5, -2), sign = TRUE)
#' )
#' ranktransform(c(0, 1, 5, -5, -2), sign = TRUE, zeros = "signrank")
#'
#' head(ranktransform(trees))
#' @return A rank-transformed object.
Expand All @@ -38,13 +44,20 @@ ranktransform <- function(x, ...) {
ranktransform.numeric <- function(x,
sign = FALSE,
method = "average",
zeros = "na",
verbose = TRUE,
...) {
# no change if all values are `NA`s
if (all(is.na(x))) {
return(x)
}

zeros <- match.arg(zeros, c("na", "signrank"))
method <- match.arg(
method,
c("average", "first", "last", "random", "max", "min")
)

# Warning if only one value and return early
if (insight::has_single_value(x)) {
if (is.null(names(x))) {
Expand All @@ -54,7 +67,13 @@ ranktransform.numeric <- function(x,
}

if (verbose) {
insight::format_warning(paste0("Variable `", name, "` contains only one unique value and will not be normalized."))
insight::format_warning(
paste0(
"Variable `",
name,
"` contains only one unique value and will not be normalized."
)
)
}

return(x)
Expand All @@ -70,16 +89,31 @@ ranktransform.numeric <- function(x,
}

if (verbose) {
insight::format_warning(paste0("Variable `", name, "` contains only two different values. Consider converting it to a factor."))
# nolint
insight::format_warning(
paste0(
"Variable `",
name,
"` contains only two different values. Consider converting it to a factor."
)
)
}
}


if (sign) {
ZEROES <- x == 0
if (any(ZEROES) && verbose) insight::format_warning("Zeros detected. These cannot be sign-rank transformed.")
out <- rep(NA, length(x))
out[!ZEROES] <- sign(x[!ZEROES]) * rank(abs(x[!ZEROES]), ties.method = method, na.last = "keep")
if (zeros == "na") {
out <- rep(NA, length(x))
ZEROES <- x == 0
if (any(ZEROES) && verbose) {
insight::format_warning("Zeros detected. These cannot be sign-rank transformed.") # nolint
}
out[!ZEROES] <- sign(x[!ZEROES]) * rank(abs(x[!ZEROES]),
ties.method = method,
na.last = "keep"
)
} else if (zeros == "signrank") {
out <- sign(x) * rank(abs(x), ties.method = method, na.last = "keep")
}
} else {
out <- rank(x, ties.method = method, na.last = "keep")
}
Expand All @@ -102,6 +136,7 @@ ranktransform.grouped_df <- function(x,
method = "average",
ignore_case = FALSE,
regex = FALSE,
zeros = "na",
verbose = TRUE,
...) {
info <- attributes(x)
Expand Down Expand Up @@ -143,6 +178,7 @@ ranktransform.data.frame <- function(x,
method = "average",
ignore_case = FALSE,
regex = FALSE,
zeros = "na",
verbose = TRUE,
...) {
# evaluate arguments
Expand Down
21 changes: 18 additions & 3 deletions man/ranktransform.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 22 additions & 2 deletions tests/testthat/test-ranktransform.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,22 @@ test_that("signed rank works as expected", {
))))
})

test_that("argument 'zeros' works", {
x <- c(-1, 0, 2, -3, 4)
expect_warning(
ranktransform(x, sign = TRUE),
"cannot be sign-rank"
)
expect_identical(
ranktransform(x, sign = TRUE, zeros = "signrank"),
c(-2, 0, 3, -4, 5)
)
expect_error(
ranktransform(x, sign = TRUE, zeros = "foo"),
"should be one of"
)
})

test_that("ranktransform works with data frames", {
set.seed(123)
expect_snapshot(ranktransform(BOD))
Expand All @@ -58,9 +74,9 @@ test_that("ranktransform works with data frames (grouped data)", {
skip_if_not_installed("poorman")

set.seed(123)
value1 <- sample(1:20, 9, replace = TRUE)
value1 <- sample.int(20, 9, replace = TRUE)
set.seed(456)
value2 <- sample(1:20, 9, replace = TRUE)
value2 <- sample.int(20, 9, replace = TRUE)

test_df <- data.frame(
id = rep(c("A", "B", "C"), each = 3),
Expand All @@ -69,6 +85,7 @@ test_that("ranktransform works with data frames (grouped data)", {
stringsAsFactors = FALSE
)

# nolint start: nested_pipe_linter
expect_identical(
test_df %>%
poorman::group_by(id) %>%
Expand All @@ -81,6 +98,7 @@ test_that("ranktransform works with data frames (grouped data)", {
stringsAsFactors = FALSE
)
)
# nolint end
})


Expand All @@ -99,6 +117,7 @@ test_that("ranktransform works with data frames containing NAs (grouped data)",
stringsAsFactors = FALSE
)

# nolint start: nested_pipe_linter
expect_identical(
test_df %>%
poorman::group_by(id) %>%
Expand All @@ -111,6 +130,7 @@ test_that("ranktransform works with data frames containing NAs (grouped data)",
stringsAsFactors = FALSE
)
)
# nolint end
})

# select helpers ------------------------------
Expand Down
Loading