Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow glue-styled pattern for data_rename() #563

Merged
merged 13 commits into from
Nov 27, 2024
48 changes: 32 additions & 16 deletions R/data_rename.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,15 @@
#' - `NULL`, in which case columns are numbered in sequential order.
#' - A string (i.e. character vector of length 1) with a "glue" styled pattern.
#' Currently supported tokens are:
#' - `{col}` (or `{name}`), which will be replaced by the column name, i.e.
#' the corresponding value in `pattern`.
#' - `{col}` which will be replaced by the column name, i.e. the
#' corresponding value in `pattern`.
#' - `{n}` will be replaced by the number of the variable that is replaced.
#' - `{letter}` will be replaced by alphabetical letters in sequential order.
strengejacke marked this conversation as resolved.
Show resolved Hide resolved
#' If more than 26 letters are required, letters are repeated, but have
#' seqential numeric indices (e.g., `a1` to `z1`, followed by `a2` to `z2`).
#' - Finally, the name of a user-defined object that is available in the
#' environment can be used.
#' environment can be used. Note that the object's name is not allowed to
#' be one of the pre-defined tokens, `"col"`, `"n"` and `"letter"`.
#'
#' An example for the use of tokens is...
#' ```r
Expand Down Expand Up @@ -158,9 +161,7 @@ data_rename <- function(data,
}

# check if we have "glue" styled replacement-string
glue_style <- length(replacement) == 1 &&
grepl("{", replacement, fixed = TRUE) &&
length(pattern) > 1
glue_style <- length(replacement) == 1 && grepl("{", replacement, fixed = TRUE)

if (length(replacement) > length(pattern) && verbose) {
insight::format_alert(
Expand Down Expand Up @@ -215,9 +216,23 @@ data_rename <- function(data,
.glue_replacement <- function(pattern, replacement) {
# this function replaces "glue" tokens into their related
# real names/values. Currently, following tokens are accepted:
# - {col}/{name}: replacement is the name of the column (indicated in "pattern")
# - {col}: replacement is the name of the column (indicated in "pattern")
# - {letter}: replacement is lower-case alphabetically letter, in sequential order
# - {n}: replacement is the number of the variable out of n, that should be renamed
strengejacke marked this conversation as resolved.
Show resolved Hide resolved
out <- rep_len("", length(pattern))

# for alphabetical letters, we prepare a string if we have more than
# 26 columns # to rename
if (length(out) > 26) {
long_letters <- paste0(
rep.int(letters[1:26], times = ceiling(length(out) / 26)),
rep(1:ceiling(length(out) / 26), each = 26)
)
} else {
long_letters <- letters[1:26]
}
long_letters <- long_letters[seq_len(length(out))]

for (i in seq_along(out)) {
# prepare pattern
column_name <- pattern[i]
Expand All @@ -228,12 +243,6 @@ data_rename <- function(data,
replacement = paste0("\\1", column_name, "\\3"),
x = out[i]
)
# alias of {col} is {name}
out[i] <- gsub(
"(.*)(\\{name\\})(.*)",
replacement = paste0("\\1", column_name, "\\3"),
x = out[i]
)
# replace second pre-defined token
out[i] <- gsub(
"(.*)(\\{n\\})(.*)",
Expand All @@ -243,7 +252,7 @@ data_rename <- function(data,
# replace third pre-defined token
out[i] <- gsub(
"(.*)(\\{letter\\})(.*)",
replacement = paste0("\\1", letters[i], "\\3"),
replacement = paste0("\\1", long_letters[i], "\\3"),
x = out[i]
)
# extract all non-standard tokens
Expand All @@ -257,7 +266,14 @@ data_rename <- function(data,
# if so, iterate all tokens
for (token in matches) {
# evaluate token-object from the environment
values <- .dynEval(str2lang(gsub("\\{(.*)\\}", "\\1", token)))
values <- tryCatch(
.dynEval(str2lang(gsub("\\{(.*)\\}", "\\1", token))),
error = function(e) {
insight::format_error(paste0(
"The object `", token, "` was not found. Please check if it really exists."
))
}
)
strengejacke marked this conversation as resolved.
Show resolved Hide resolved
# check for correct length
if (length(values) != length(pattern)) {
insight::format_error(paste0(
Expand All @@ -267,7 +283,7 @@ data_rename <- function(data,
))
}
# replace token with values from the object
if (!is.null(values) && length(values)) {
if (length(values)) {
out[i] <- gsub(token, values[i], out[i], fixed = TRUE)
}
}
Expand Down
9 changes: 6 additions & 3 deletions man/data_rename.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 58 additions & 0 deletions tests/testthat/test-data_rename.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,35 @@ test_that("data_rename glue-style", {
expect_named(out, c("new_a", "new_b", "new_c"))
})

test_that("data_rename enough letters", {
data(efc, package = "datawizard")
data(mtcars)
data(iris)
data(ChickWeight)
data(ToothGrowth)
data(USArrests)
data(airquality)
x <- cbind(
mtcars[1:5, ], iris[1:5, ], efc[1:5, ], ChickWeight[1:5, ], ToothGrowth[1:5, ],
USArrests[1:5, ], airquality[1:5, ]
)
expect_names(
data_rename(x, replacement = "long_letter_{letter}"),
c(
"long_letter_a1", "long_letter_b1", "long_letter_c1", "long_letter_d1",
"long_letter_e1", "long_letter_f1", "long_letter_g1", "long_letter_h1",
"long_letter_i1", "long_letter_j1", "long_letter_k1", "long_letter_l1",
"long_letter_m1", "long_letter_n1", "long_letter_o1", "long_letter_p1",
"long_letter_q1", "long_letter_r1", "long_letter_s1", "long_letter_t1",
"long_letter_u1", "long_letter_v1", "long_letter_w1", "long_letter_x1",
"long_letter_y1", "long_letter_z1", "long_letter_a2", "long_letter_b2",
"long_letter_c2", "long_letter_d2", "long_letter_e2", "long_letter_f2",
"long_letter_g2", "long_letter_h2", "long_letter_i2", "long_letter_j2",
"long_letter_k2", "long_letter_l2"
)
)
})

skip_if_not_installed("withr")
withr::with_environment(
new.env(),
Expand All @@ -168,3 +197,32 @@ withr::with_environment(
)
})
)

withr::with_environment(
new.env(),
test_that("data_rename glue-style, object not in environment", {
data(mtcars)
expect_error(
data_rename(mtcars[1:3], c("mpg", "cyl", "disp"), "col_{x}"),
regex = "The object"
)
})
)

withr::with_environment(
new.env(),
test_that("data_rename glue-style, function in environment", {
data(mtcars)
my_fun <- function(cols_to_rename) {
data_rename(head(mtcars)[, 1:6], cols_to_rename, "new_{col}")
}
expect_named(
my_fun(c("mpg", "drat")),
c("new_mpg", "cyl", "disp", "hp", "new_drat", "wt")
)
expect_named(
my_fun("mpg"),
c("new_mpg", "cyl", "disp", "hp", "drat", "wt")
)
})
)
Loading