diff --git a/DESCRIPTION b/DESCRIPTION index 44785810d..2ed1150ae 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.11.0.1 +Version: 0.11.0.2 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), diff --git a/NEWS.md b/NEWS.md index 4b346e9cb..2feffab87 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # datawizard 0.11.0.1 +## Changes + +* `data_select()` can directly rename selected variables when a named vector + is provided in `select`, e.g. `data_select(mtcars, c(new = "mpg", old = "cyl"))`. + # datawizard 0.11.0 BREAKING CHANGES diff --git a/R/data_select.R b/R/data_select.R index c59d41b72..cdfc6e754 100644 --- a/R/data_select.R +++ b/R/data_select.R @@ -28,6 +28,11 @@ data_select <- function(data, out <- data[columns] + # for named character vectors, we offer the service to directly rename the columns + if (!is.null(names(columns))) { + colnames(out) <- names(columns) + } + # add back attributes out <- .replace_attrs(out, a) out diff --git a/R/extract_column_names.R b/R/extract_column_names.R index f1cd68be2..fee70a07c 100644 --- a/R/extract_column_names.R +++ b/R/extract_column_names.R @@ -63,6 +63,10 @@ #' #' @details #' +#' Specifically for `data_select()`, `select` can also be a named character +#' vector. In tis case, the names are used to rename the columns in the +#' output data frame. See 'Examples'. +#' #' Note that it is possible to either pass an entire select helper or only the #' pattern inside a select helper as a function argument: #' @@ -124,6 +128,9 @@ #' # find numeric with mean > 3.5 #' numeric_mean_35 <- function(x) is.numeric(x) && mean(x, na.rm = TRUE) > 3.5 #' extract_column_names(iris, numeric_mean_35) +#' +#' # rename returned columns for "data_select()" +#' head(data_select(mtcars, c(`Miles per Gallon` = "mpg", Cylinders = "cyl"))) #' @export extract_column_names <- function(data, select = NULL, diff --git a/R/select_nse.R b/R/select_nse.R index eb1c6012a..a671c9bfd 100644 --- a/R/select_nse.R +++ b/R/select_nse.R @@ -75,6 +75,22 @@ out <- setdiff(out, grp_vars) } + # for named character vectors, we offer the service to rename the columns + if (typeof(expr_select) == "language") { + # safe evaluation of the expression, to get the named vector from "select" + new_names <- tryCatch(eval(expr_select), error = function(e) NULL) + # check if we really have a named vector + if (!is.null(new_names) && !is.null(names(new_names))) { + # if so, copy names + all_names <- names(new_names) + # if some of the element don't have a name, we set the value as name + names(new_names)[!nzchar(all_names)] <- new_names[!nzchar(all_names)] + # after inclusion and exclusion, the original values in "select" + # may have changed, so we check that we only add names of valid values + out <- stats::setNames(out, names(new_names)[new_names %in% out]) + } + } + out } diff --git a/man/extract_column_names.Rd b/man/extract_column_names.Rd index b8c646789..fc4a54411 100644 --- a/man/extract_column_names.Rd +++ b/man/extract_column_names.Rd @@ -125,6 +125,10 @@ columns. match a certain search pattern, while \code{data_select()} returns the found data. } \details{ +Specifically for \code{data_select()}, \code{select} can also be a named character +vector. In tis case, the names are used to rename the columns in the +output data frame. See 'Examples'. + Note that it is possible to either pass an entire select helper or only the pattern inside a select helper as a function argument: @@ -182,6 +186,9 @@ extract_column_names(iris, starts_with("Sepal"), exclude = contains("Width")) # find numeric with mean > 3.5 numeric_mean_35 <- function(x) is.numeric(x) && mean(x, na.rm = TRUE) > 3.5 extract_column_names(iris, numeric_mean_35) + +# rename returned columns for "data_select()" +head(data_select(mtcars, c(`Miles per Gallon` = "mpg", Cylinders = "cyl"))) } \seealso{ \itemize{ diff --git a/tests/testthat/test-data_select.R b/tests/testthat/test-data_select.R index 2557a1f7b..4168f368c 100644 --- a/tests/testthat/test-data_select.R +++ b/tests/testthat/test-data_select.R @@ -106,22 +106,22 @@ test_that("data_select works with user-defined select-functions", { test_that("data_select works with negated select-functions", { expect_identical( data_select(iris, -is.numeric()), - iris[sapply(iris, function(i) !is.numeric(i))] + iris[sapply(iris, function(i) !is.numeric(i))] # nolint ) expect_identical( data_select(iris, -is.numeric), - iris[sapply(iris, function(i) !is.numeric(i))] + iris[sapply(iris, function(i) !is.numeric(i))] # nolint ) expect_identical( data_select(iris, -is.factor()), - iris[sapply(iris, function(i) !is.factor(i))] + iris[sapply(iris, function(i) !is.factor(i))] # nolint ) expect_identical( data_select(iris, -is.factor), - iris[sapply(iris, function(i) !is.factor(i))] + iris[sapply(iris, function(i) !is.factor(i))] # nolint ) expect_identical(data_select(iris, -is.logical), iris) @@ -402,3 +402,13 @@ test_that("old solution still works", { c("Sepal.Length", "Sepal.Width") ) }) + +test_that("data_select renames variables on the fly", { + data(mtcars) + out <- data_select(mtcars, c(new = "mpg", old = "cyl", hoho = "wt")) + expect_named(out, c("new", "old", "hoho")) + data_select(mtcars, c(new = "mpg", "cyl", hoho = "wt")) + expect_named(out, c("new", "cyl", "hoho")) + data_select(mtcars, c("mpg", "cyl", "wt")) + expect_named(out, c("mpg", "cyl", "wt")) +})