Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow named character vector in select, to rename variables #512

Merged
merged 13 commits into from
Jun 8, 2024
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 0.11.0.1
Version: 0.11.0.2
Authors@R: c(
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# datawizard 0.11.0.1

## Changes

* `data_select()` can directly rename selected variables when a named vector
is provided in `select`, e.g. `data_select(mtcars, c(new1 = "mpg", new2 = "cyl"))`.

# datawizard 0.11.0

BREAKING CHANGES
Expand Down
6 changes: 6 additions & 0 deletions R/data_select.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ data_select <- function(data,
exclude,
ignore_case = ignore_case,
regex = regex,
allow_rename = TRUE,
verbose = FALSE
)

Expand All @@ -28,6 +29,11 @@ data_select <- function(data,

out <- data[columns]

# for named character vectors, we offer the service to directly rename the columns
if (!is.null(names(columns))) {
colnames(out) <- names(columns)
}

# add back attributes
out <- .replace_attrs(out, a)
out
Expand Down
7 changes: 7 additions & 0 deletions R/extract_column_names.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@
#'
#' @details
#'
#' Specifically for `data_select()`, `select` can also be a named character
#' vector. In this case, the names are used to rename the columns in the
#' output data frame. See 'Examples'.
#'
#' Note that it is possible to either pass an entire select helper or only the
#' pattern inside a select helper as a function argument:
#'
Expand Down Expand Up @@ -124,6 +128,9 @@
#' # find numeric with mean > 3.5
#' numeric_mean_35 <- function(x) is.numeric(x) && mean(x, na.rm = TRUE) > 3.5
#' extract_column_names(iris, numeric_mean_35)
#'
#' # rename returned columns for "data_select()"
#' head(data_select(mtcars, c(`Miles per Gallon` = "mpg", Cylinders = "cyl")))
#' @export
extract_column_names <- function(data,
select = NULL,
Expand Down
6 changes: 3 additions & 3 deletions R/row_means.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,9 @@ row_means <- function(data,
}

# proceed here if min_valid is not NULL
if (!is.null(min_valid)) {
if (is.null(min_valid)) {
out <- rowMeans(data, na.rm = remove_na)
} else {
# is 'min_valid' indicating a proportion?
decimals <- min_valid %% 1
if (decimals != 0) {
Expand All @@ -126,8 +128,6 @@ row_means <- function(data,
to_na <- rowSums(is.na(data)) > ncol(data) - min_valid
out <- rowMeans(data, na.rm = TRUE)
out[to_na] <- NA
} else {
out <- rowMeans(data, na.rm = remove_na)
}

# round, if requested
Expand Down
27 changes: 26 additions & 1 deletion R/select_nse.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
# https://github.com/nathaneastwood/poorman/blob/master/R/select_positions.R

.select_nse <- function(select, data, exclude, ignore_case, regex = FALSE,
remove_group_var = FALSE, verbose = FALSE) {
remove_group_var = FALSE, allow_rename = FALSE,
verbose = FALSE) {
.check_data(data)
columns <- colnames(data)

Expand Down Expand Up @@ -75,6 +76,30 @@
out <- setdiff(out, grp_vars)
}

# for named character vectors, we offer the service to rename the columns
if (allow_rename && typeof(expr_select) == "language") {
# safe evaluation of the expression, to get the named vector from "select"
new_names <- tryCatch(eval(expr_select), error = function(e) NULL)
# check if we really have a named vector
if (!is.null(new_names) && !is.null(names(new_names))) {
# if so, copy names
all_names <- names(new_names)
# if some of the elements don't have a name, we set the value as name
names(new_names)[!nzchar(all_names)] <- new_names[!nzchar(all_names)]
# after inclusion and exclusion, the original values in "select"
# may have changed, so we check that we only add names of valid values
out <- stats::setNames(out, names(new_names)[new_names %in% out])
# check if we have any duplicated names, and if so, give an error
if (anyDuplicated(names(out)) > 0) {
insight::format_error(paste0(
"Following names were duplicated after renaming: ",
strengejacke marked this conversation as resolved.
Show resolved Hide resolved
text_concatenate(names(out)[duplicated(names(out))], enclose = "`"),
". Using duplicated names is no good practice and therefore discouraged. Please provide unique names."
))
}
}
}

out
}

Expand Down
6 changes: 3 additions & 3 deletions R/to_factor.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ to_factor.data.frame <- function(x,
# drop factors, when append is not FALSE
select <- colnames(x[select])[!vapply(x[select], is.factor, FUN.VALUE = logical(1L))]
# process arguments
args <- .process_append(
my_args <- .process_append(
x,
select,
append,
Expand All @@ -125,8 +125,8 @@ to_factor.data.frame <- function(x,
preserve_value_labels = TRUE
)
# update processed arguments
x <- args$x
select <- args$select
x <- my_args$x
select <- my_args$select
}

x[select] <- lapply(x[select], to_factor, verbose = verbose, ...)
Expand Down
18 changes: 9 additions & 9 deletions R/unnormalize.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ unnormalize.numeric <- function(x, verbose = TRUE, ...) {
dots <- match.call(expand.dots = FALSE)[["..."]]
grp_attr_dw <- eval(dots$grp_attr_dw, envir = parent.frame(1L))

if (!is.null(grp_attr_dw)) {
if (is.null(grp_attr_dw)) {
include_bounds <- attr(x, "include_bounds")
min_value <- attr(x, "min_value")
range_difference <- attr(x, "range_difference")
to_range <- attr(x, "to_range")
} else {
names(grp_attr_dw) <- gsub(".*\\.", "", names(grp_attr_dw))
include_bounds <- grp_attr_dw["include_bounds"]
min_value <- grp_attr_dw["min_value"]
Expand All @@ -33,11 +38,6 @@ unnormalize.numeric <- function(x, verbose = TRUE, ...) {
if (is.na(to_range)) {
to_range <- NULL
}
} else {
include_bounds <- attr(x, "include_bounds")
min_value <- attr(x, "min_value")
range_difference <- attr(x, "range_difference")
to_range <- attr(x, "to_range")
}

if (is.null(min_value) || is.null(range_difference)) {
Expand Down Expand Up @@ -78,10 +78,10 @@ unnormalize.data.frame <- function(x,

dots <- match.call(expand.dots = FALSE)[["..."]]

if (!is.null(dots$grp_attr_dw)) {
grp_attr_dw <- eval(dots$grp_attr_dw, envir = parent.frame(1L))
} else {
if (is.null(dots$grp_attr_dw)) {
grp_attr_dw <- NULL
} else {
grp_attr_dw <- eval(dots$grp_attr_dw, envir = parent.frame(1L))
}

for (i in select) {
Expand Down
7 changes: 7 additions & 0 deletions man/extract_column_names.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 39 additions & 4 deletions tests/testthat/test-data_select.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,22 +106,22 @@
test_that("data_select works with negated select-functions", {
expect_identical(
data_select(iris, -is.numeric()),
iris[sapply(iris, function(i) !is.numeric(i))]
iris[sapply(iris, function(i) !is.numeric(i))] # nolint
)

expect_identical(
data_select(iris, -is.numeric),
iris[sapply(iris, function(i) !is.numeric(i))]
iris[sapply(iris, function(i) !is.numeric(i))] # nolint
)

expect_identical(
data_select(iris, -is.factor()),
iris[sapply(iris, function(i) !is.factor(i))]
iris[sapply(iris, function(i) !is.factor(i))] # nolint
)

expect_identical(
data_select(iris, -is.factor),
iris[sapply(iris, function(i) !is.factor(i))]
iris[sapply(iris, function(i) !is.factor(i))] # nolint
)

expect_identical(data_select(iris, -is.logical), iris)
Expand Down Expand Up @@ -363,7 +363,7 @@
)
})

test_that("select helpers work in functions and loops even if there's an object with the same name in the environment above", {

Check warning on line 366 in tests/testthat/test-data_select.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=tests/testthat/test-data_select.R,line=366,col=121,[line_length_linter] Lines should not be more than 120 characters. This line is 127 characters.
i <- "Petal"
foo <- function(data, i) {
extract_column_names(data, select = starts_with(i))
Expand Down Expand Up @@ -402,3 +402,38 @@
c("Sepal.Length", "Sepal.Width")
)
})

test_that("data_select renames variables on the fly", {
data(mtcars)
expect_named(
data_select(mtcars, c(new = "mpg", old = "cyl", hoho = "wt")),
c("new", "old", "hoho")
)
expect_named(
data_select(mtcars, c(new = "mpg", "cyl", hoho = "wt")),
c("new", "cyl", "hoho")
)
expect_named(
data_select(mtcars, c("mpg", "cyl", "wt")),
c("mpg", "cyl", "wt")
)
# don't fail for non-existing columns
expect_named(
data_select(mtcars, c(new = "mpg", "cyl", hoho = "wt", test = "grea")),
c("new", "cyl", "hoho")
)
# check that excluded variables don't cause troubles
expect_named(
data_select(mtcars, c(new = "mpg", "cyl", hoho = "wt"), exclude = "wt"),
c("new", "cyl")
)
# error when names are not unique
expect_error(
data_select(mtcars, c(new = "mpg", old = "cyl", new = "wt")), # nolint
regex = "Following names were duplicated"
strengejacke marked this conversation as resolved.
Show resolved Hide resolved
)
expect_error(
data_select(mtcars, c(new = "mpg", "cyl", cyl = "wt")), # nolint
regex = "Following names were duplicated"
strengejacke marked this conversation as resolved.
Show resolved Hide resolved
)
})
Loading