Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rescale() can expand ranges #467

Merged
merged 7 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 0.9.0.1
Version: 0.9.0.2
Authors@R: c(
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
Expand Down Expand Up @@ -33,7 +33,7 @@ BugReports: https://github.com/easystats/datawizard/issues
Depends:
R (>= 3.6)
Imports:
insight (>= 0.19.4),
insight (>= 0.19.6),
stats,
utils
Suggests:
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# datawizard 0.9.0.9000 (development version)

CHANGES

* `rescale()` gains `multiply` and `add` arguments, to expand ranges by a given
factor or value.

# datawizard 0.9.0

NEW FUNCTIONS
Expand Down
99 changes: 94 additions & 5 deletions R/data_rescale.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,26 @@
#' Rescale Variables to a New Range
#'
#' Rescale variables to a new range.
#' Can also be used to reverse-score variables (change the keying/scoring direction).
#' Rescale variables to a new range. Can also be used to reverse-score variables
#' (change the keying/scoring direction), or to expand a range.
#'
#' @inheritParams categorize
#' @inheritParams find_columns
#' @inheritParams standardize.data.frame
#'
#' @param to Numeric vector of length 2 giving the new range that the variable will have after rescaling.
#' To reverse-score a variable, the range should be given with the maximum value first.
#' See examples.
#' @param to Numeric vector of length 2 giving the new range that the variable
#' will have after rescaling. To reverse-score a variable, the range should
#' be given with the maximum value first. See examples.
#' @param multiply If not `NULL`, `to` is ignored and `multiply` will be used,
#' giving the factor by which the actual range of `x` should be expanded.
#' For example, if a vector ranges from 5 to 15 and `multiply = 1.1`, the current
#' range of 10 will be expanded by the factor of 1.1, giving a new range of
#' 11. Thus, the rescaled vector would range from 4.5 to 15.5.
#' @param add A vector of length 1 or 2. If not `NULL`, `to` is ignored and `add`
#' will be used, giving the amount by which the minimum and maximum of the
#' actual range of `x` should be expanded. For example, if a vector ranges from
#' 5 to 15 and `add = 1`, the range will be expanded from 4 to 16. If `add` is
#' of length 2, then the first value is used for the lower bound and the second
#' value for the upper bound.
#' @param range Initial (old) range of values. If `NULL`, will take the range of
#' the input vector (`range(x)`).
#' @param ... Arguments passed to or from other methods.
Expand Down Expand Up @@ -37,6 +48,21 @@
#' "Sepal.Length" = c(0, 1),
#' "Petal.Length" = c(-1, 0)
#' )))
#'
#' # "expand" ranges by a factor or a given value
#' x <- 5:15
#' x
#' # both will expand the range by 10%
#' rescale(x, multiply = 1.1)
#' rescale(x, add = 0.5)
#'
#' # expand range by different values
#' rescale(x, add = c(1, 3))
#'
#' # Specify list of multipliers
#' d <- data.frame(x = 5:15, y = 5:15)
#' rescale(d, multiply = list(x = 1.1, y = 0.5))
#'
#' @inherit data_rename
#'
#' @return A rescaled object.
Expand Down Expand Up @@ -75,6 +101,8 @@ rescale.default <- function(x, verbose = TRUE, ...) {
#' @export
rescale.numeric <- function(x,
to = c(0, 100),
multiply = NULL,
add = NULL,
range = NULL,
verbose = TRUE,
...) {
Expand All @@ -91,6 +119,9 @@ rescale.numeric <- function(x,
range <- c(min(x, na.rm = TRUE), max(x, na.rm = TRUE))
}

# check if user specified "multiply" or "add", and then update "to"
to <- .update_to(x, to, multiply, add)

# called from "makepredictcal()"? Then we have additional arguments
dot_args <- list(...)
required_dot_args <- c("min_value", "max_value", "new_min", "new_max")
Expand Down Expand Up @@ -144,6 +175,8 @@ rescale.grouped_df <- function(x,
select = NULL,
exclude = NULL,
to = c(0, 100),
multiply = NULL,
add = NULL,
range = NULL,
append = FALSE,
ignore_case = FALSE,
Expand Down Expand Up @@ -188,6 +221,8 @@ rescale.grouped_df <- function(x,
select = select,
exclude = exclude,
to = to,
multiply = multiply,
add = add,
range = range,
append = FALSE, # need to set to FALSE here, else variable will be doubled
add_transform_class = FALSE,
Expand All @@ -207,6 +242,8 @@ rescale.data.frame <- function(x,
select = NULL,
exclude = NULL,
to = c(0, 100),
multiply = NULL,
add = NULL,
range = NULL,
append = FALSE,
ignore_case = FALSE,
Expand Down Expand Up @@ -245,9 +282,61 @@ rescale.data.frame <- function(x,
if (!is.list(to)) {
to <- stats::setNames(rep(list(to), length(select)), select)
}
# Transform the 'multiply' so that it is a list now
if (!is.null(multiply) && !is.list(multiply)) {
multiply <- stats::setNames(rep(list(multiply), length(select)), select)
}
# Transform the 'add' so that it is a list now
if (!is.null(add) && !is.list(add)) {
add <- stats::setNames(rep(list(add), length(select)), select)
}
# update "to" if user specified "multiply" or "add"
to[] <- lapply(names(to), function(i) {
.update_to(x[[i]], to[[i]], multiply[[i]], add[[i]])
})

x[select] <- as.data.frame(sapply(select, function(n) {
rescale(x[[n]], to = to[[n]], range = range[[n]], add_transform_class = FALSE)
}, simplify = FALSE))
x
}


# helper ----------------------------------------------------------------------

# expand the new target range by multiplying or adding
.update_to <- function(x, to, multiply, add) {
# check if user specified "multiply" or "add", and if not, return "to"
if (is.null(multiply) && is.null(add)) {
return(to)
}
# only one of "multiply" or "add" can be specified
if (!is.null(multiply) && !is.null(add)) {
insight::format_error("Only one of `multiply` or `add` can be specified.")
}
# multiply? If yes, calculate the "add" value
if (!is.null(multiply)) {
# check for correct length
if (length(multiply) > 1) {
insight::format_error("The length of `multiply` must be 1.")
}
add <- (diff(range(x, na.rm = TRUE)) * (multiply - 1)) / 2
}
# add?
if (!is.null(add)) {
# add must be of length 1 or 2
if (length(add) > 2) {
insight::format_error("The length of `add` must be 1 or 2.")
}
# if add is of length 2, then the first value is used for the lower bound
# and the second value for the upper bound
if (length(add) == 2) {
add_low <- add[1]
add_high <- add[2]
} else {
add_low <- add_high <- add
}
to <- c(min(x, na.rm = TRUE) - add_low, max(x, na.rm = TRUE) + add_high)
}
to
}
50 changes: 44 additions & 6 deletions man/rescale.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

64 changes: 64 additions & 0 deletions tests/testthat/test-data_rescale.R
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,67 @@ test_that("data_rescale regex", {
ignore_attr = TRUE
)
})


# expanding range ------------------------------
test_that("data_rescale can expand range", {
# for vectors
x <- 5:15
expect_equal(
rescale(x, multiply = 1.1),
c(4.5, 5.6, 6.7, 7.8, 8.9, 10, 11.1, 12.2, 13.3, 14.4, 15.5),
ignore_attr = TRUE
)
expect_equal(rescale(x, multiply = 1.1), rescale(x, add = 0.5), ignore_attr = TRUE)
expect_error(rescale(x, multiply = 0.9, add = 1), regex = "Only one of")
expect_error(rescale(x, multiply = c(1.2, 1.4)), regex = "The length of")

# different values for add
expect_equal(
rescale(x, add = c(1, 3)),
c(4, 5.4, 6.8, 8.2, 9.6, 11, 12.4, 13.8, 15.2, 16.6, 18),
ignore_attr = TRUE
)
expect_error(rescale(x, add = 1:3), regex = "The length of")

# works with NA
expect_equal(
rescale(rep(NA_real_, 3), multiply = 1.1),
rep(NA_real_, 3),
ignore_attr = TRUE
)
expect_equal(
rescale(rep(NA_real_, 3), add = 2),
rep(NA_real_, 3),
ignore_attr = TRUE
)

# for data frames
d <- data.frame(x = 5:15, y = 5:15)
expect_equal(
rescale(d, multiply = 1.1),
rescale(d, add = 0.5),
ignore_attr = TRUE
)
expect_equal(
rescale(d, multiply = list(x = 1.1, y = 0.5)),
rescale(d, add = list(x = 0.5, y = -2.5)),
ignore_attr = TRUE
)
# data frames accept multiple add-values per column
out <- rescale(d, add = list(x = c(1, 3), y = c(2, 4)))
expect_equal(
out$x,
rescale(d$x, add = c(1, 3)),
ignore_attr = TRUE
)
expect_equal(
out$y,
rescale(d$y, add = c(2, 4)),
ignore_attr = TRUE
)

expect_error(rescale(d, multiply = 0.9, add = 1), regex = "Only one of")
expect_error(rescale(d, multiply = list(x = 0.9, y = 2), add = list(y = 1)), regex = "Only one of")
expect_error(rescale(d, multiply = c(0.9, 1.5)), regex = "The length of")
})
Loading