From a18182368f200bd6afa718aed70d5f76e39bc654 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 10 Oct 2024 14:37:21 +0200 Subject: [PATCH] Allow : in select strings --- DESCRIPTION | 2 +- NEWS.md | 4 +++ R/extract_column_names.R | 11 +++++-- R/select_nse.R | 56 +++++++++++++++++++++++++------- man/adjust.Rd | 6 ++-- man/assign_labels.Rd | 6 ++-- man/categorize.Rd | 6 ++-- man/center.Rd | 6 ++-- man/convert_na_to.Rd | 6 ++-- man/convert_to_na.Rd | 6 ++-- man/data_codebook.Rd | 6 ++-- man/data_duplicated.Rd | 6 ++-- man/data_extract.Rd | 6 ++-- man/data_group.Rd | 6 ++-- man/data_peek.Rd | 6 ++-- man/data_relocate.Rd | 6 ++-- man/data_rename.Rd | 6 ++-- man/data_replicate.Rd | 6 ++-- man/data_separate.Rd | 6 ++-- man/data_tabulate.Rd | 6 ++-- man/data_to_long.Rd | 6 ++-- man/data_unique.Rd | 6 ++-- man/data_unite.Rd | 6 ++-- man/describe_distribution.Rd | 6 ++-- man/extract_column_names.Rd | 11 +++++-- man/labels_to_levels.Rd | 6 ++-- man/means_by_group.Rd | 6 ++-- man/normalize.Rd | 6 ++-- man/ranktransform.Rd | 6 ++-- man/recode_values.Rd | 6 ++-- man/rescale.Rd | 6 ++-- man/reverse.Rd | 6 ++-- man/row_means.Rd | 6 ++-- man/slide.Rd | 6 ++-- man/standardize.Rd | 6 ++-- man/to_factor.Rd | 6 ++-- man/to_numeric.Rd | 6 ++-- tests/testthat/test-select_nse.R | 19 +++++++++++ 38 files changed, 212 insertions(+), 83 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8dd92fb7e..4758f601c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.13.0.1 +Version: 0.13.0.2 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531")), diff --git a/NEWS.md b/NEWS.md index c28ecc63f..e7214ee58 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # datawizard (development) +CHANGES + + + BUG FIXES * `describe_distribution()` no longer errors if the sample was too sparse to compute diff --git a/R/extract_column_names.R b/R/extract_column_names.R index c245f4bab..a3d120d3f 100644 --- a/R/extract_column_names.R +++ b/R/extract_column_names.R @@ -9,8 +9,10 @@ #' tasks. Can be either #' #' - a variable specified as a literal variable name (e.g., `column_name`), -#' - a string with the variable name (e.g., `"column_name"`), or a character -#' vector of variable names (e.g., `c("col1", "col2", "col3")`), +#' - a string with the variable name (e.g., `"column_name"`), a character +#' vector of variable names (e.g., `c("col1", "col2", "col3")`), or a +#' character vector of variable names including ranges specified via `:` +#' (e.g., `c("col1:col3", "col5")`), #' - a formula with variable names (e.g., `~column_1 + column_2`), #' - a vector of positive integers, giving the positions counting from the left #' (e.g. `1` or `c(1, 3, 5)`), @@ -116,7 +118,7 @@ #' ``` #' #' @examples -#' # Find columns names by pattern +#' # Find column names by pattern #' extract_column_names(iris, starts_with("Sepal")) #' extract_column_names(iris, ends_with("Width")) #' extract_column_names(iris, regex("\\.")) @@ -129,6 +131,9 @@ #' numeric_mean_35 <- function(x) is.numeric(x) && mean(x, na.rm = TRUE) > 3.5 #' extract_column_names(iris, numeric_mean_35) #' +#' # find range of colum names by range, using character vector +#' extract_column_names(mtcars, c("cyl:hp", "wt")) +#' #' # rename returned columns for "data_select()" #' head(data_select(mtcars, c(`Miles per Gallon` = "mpg", Cylinders = "cyl"))) #' @export diff --git a/R/select_nse.R b/R/select_nse.R index 8f9eba096..63b9a548d 100644 --- a/R/select_nse.R +++ b/R/select_nse.R @@ -146,31 +146,63 @@ # use colnames because names() doesn't work for matrices columns <- colnames(data) if (isTRUE(regex)) { + # string is a regular expression grep(x, columns) } else if (length(x) == 1L && x == "all") { + # string is "all" - select all columns seq_along(data) + } else if (any(grepl(":", x, fixed = TRUE))) { + # special pattern, as string (e.g.select = c("cyl:hp", "am")). However, + # this will first go into `.eval_call()` and thus only single elements + # are passed in `x` - we have never a character *vector* here + # check for valid names + colon_vars <- unlist(strsplit(x, ":", fixed = TRUE)) + colon_match <- match(colon_vars, columns) + if (anyNA(colon_match)) { + .warn_not_found(colon_vars, columns, colon_match, verbose) + matches <- NA + } else { + start <- match(colon_vars[1], columns) + end <- match(colon_vars[2], columns) + if (!is.na(start) && !is.na(end)) { + matches <- start:end + } else { + matches <- NA + } + } + matches[!is.na(matches)] } else if (isTRUE(ignore_case)) { + # find columns, case insensitive matches <- match(toupper(x), toupper(columns)) matches[!is.na(matches)] } else { + # find columns, case sensitive matches <- match(x, columns) - if (anyNA(matches) && verbose) { - insight::format_warning( - paste0( - "Following variable(s) were not found: ", - toString(x[is.na(matches)]) - ), - .misspelled_string( - columns, - x[is.na(matches)], - default_message = "Possibly misspelled?" - ) - ) + if (anyNA(matches)) { + .warn_not_found(x, columns, matches, verbose) } matches[!is.na(matches)] } } +# small helper, to avoid duplicated code +.warn_not_found <- function(x, columns, matches, verbose = TRUE) { + if (verbose) { + insight::format_warning( + paste0( + "Following variable(s) were not found: ", + toString(x[is.na(matches)]) + ), + .misspelled_string( + columns, + x[is.na(matches)], + default_message = "Possibly misspelled?" + ) + ) + } +} + + # 3 types of symbols: # - unquoted variables # - objects that need to be evaluated, e.g data_find(iris, i) where diff --git a/man/adjust.Rd b/man/adjust.Rd index 64e50d9d3..48b321b8f 100644 --- a/man/adjust.Rd +++ b/man/adjust.Rd @@ -43,8 +43,10 @@ out). If \code{NULL} (the default), all variables will be selected.} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/assign_labels.Rd b/man/assign_labels.Rd index cca14cc85..e6fd24252 100644 --- a/man/assign_labels.Rd +++ b/man/assign_labels.Rd @@ -38,8 +38,10 @@ labels are omitted.} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/categorize.Rd b/man/categorize.Rd index ca013ce2b..dbecbf5e6 100644 --- a/man/categorize.Rd +++ b/man/categorize.Rd @@ -89,8 +89,10 @@ group. See 'Examples'.} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/center.Rd b/man/center.Rd index f143f64b2..4774020ab 100644 --- a/man/center.Rd +++ b/man/center.Rd @@ -72,8 +72,10 @@ against the names of the selected variables.} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/convert_na_to.Rd b/man/convert_na_to.Rd index 91121ff94..702e0eb2e 100644 --- a/man/convert_na_to.Rd +++ b/man/convert_na_to.Rd @@ -41,8 +41,10 @@ replace \code{NA}.} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/convert_to_na.Rd b/man/convert_to_na.Rd index 2529294b7..fe308d61e 100644 --- a/man/convert_to_na.Rd +++ b/man/convert_to_na.Rd @@ -44,8 +44,10 @@ by \code{NA}, should unused levels be dropped?} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/data_codebook.Rd b/man/data_codebook.Rd index 4c0f935e7..319f4e5b6 100644 --- a/man/data_codebook.Rd +++ b/man/data_codebook.Rd @@ -34,8 +34,10 @@ data_codebook( tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/data_duplicated.Rd b/man/data_duplicated.Rd index 73c3e8de1..88624c8c8 100644 --- a/man/data_duplicated.Rd +++ b/man/data_duplicated.Rd @@ -20,8 +20,10 @@ data_duplicated( tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/data_extract.Rd b/man/data_extract.Rd index a0cd4e402..0b544e710 100644 --- a/man/data_extract.Rd +++ b/man/data_extract.Rd @@ -27,8 +27,10 @@ and data frame extensions (e.g., tibbles).} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/data_group.Rd b/man/data_group.Rd index 56f5f314e..9cb55de5d 100644 --- a/man/data_group.Rd +++ b/man/data_group.Rd @@ -24,8 +24,10 @@ data_ungroup(data, verbose = TRUE, ...) tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/data_peek.Rd b/man/data_peek.Rd index 4f3f88e8a..9524c70ec 100644 --- a/man/data_peek.Rd +++ b/man/data_peek.Rd @@ -27,8 +27,10 @@ data_peek(x, ...) tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/data_relocate.Rd b/man/data_relocate.Rd index 30e4dbbfe..9949b5d27 100644 --- a/man/data_relocate.Rd +++ b/man/data_relocate.Rd @@ -44,8 +44,10 @@ data_remove( tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/data_rename.Rd b/man/data_rename.Rd index a45095805..2ff779c21 100644 --- a/man/data_rename.Rd +++ b/man/data_rename.Rd @@ -56,8 +56,10 @@ will be ignored then).} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/data_replicate.Rd b/man/data_replicate.Rd index 35448155d..5a427d570 100644 --- a/man/data_replicate.Rd +++ b/man/data_replicate.Rd @@ -27,8 +27,10 @@ column. Note that the variable indicated by \code{expand} must be an integer vec tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/data_separate.Rd b/man/data_separate.Rd index 37528d46e..7c951f81c 100644 --- a/man/data_separate.Rd +++ b/man/data_separate.Rd @@ -30,8 +30,10 @@ data_separate( tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd index 2feadf3a9..032c0b989 100644 --- a/man/data_tabulate.Rd +++ b/man/data_tabulate.Rd @@ -79,8 +79,10 @@ for printing.} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/data_to_long.Rd b/man/data_to_long.Rd index 741725d25..73b54219b 100644 --- a/man/data_to_long.Rd +++ b/man/data_to_long.Rd @@ -45,8 +45,10 @@ rows and fewer columns after the operation.} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/data_unique.Rd b/man/data_unique.Rd index 8a45bfc21..a0a70b92a 100644 --- a/man/data_unique.Rd +++ b/man/data_unique.Rd @@ -21,8 +21,10 @@ data_unique( tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/data_unite.Rd b/man/data_unite.Rd index ba7710a8a..369fd33d8 100644 --- a/man/data_unite.Rd +++ b/man/data_unite.Rd @@ -27,8 +27,10 @@ data_unite( tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/describe_distribution.Rd b/man/describe_distribution.Rd index 369bd9ef6..80b69e115 100644 --- a/man/describe_distribution.Rd +++ b/man/describe_distribution.Rd @@ -86,8 +86,10 @@ vector before the mean is computed.} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/extract_column_names.Rd b/man/extract_column_names.Rd index 6e658ab33..3ea5da7dc 100644 --- a/man/extract_column_names.Rd +++ b/man/extract_column_names.Rd @@ -43,8 +43,10 @@ find_columns( tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), @@ -152,7 +154,7 @@ outer(iris, starts_with("Sep")) }\if{html}{\out{}} } \examples{ -# Find columns names by pattern +# Find column names by pattern extract_column_names(iris, starts_with("Sepal")) extract_column_names(iris, ends_with("Width")) extract_column_names(iris, regex("\\\\.")) @@ -165,6 +167,9 @@ extract_column_names(iris, starts_with("Sepal"), exclude = contains("Width")) numeric_mean_35 <- function(x) is.numeric(x) && mean(x, na.rm = TRUE) > 3.5 extract_column_names(iris, numeric_mean_35) +# find range of colum names by range, using character vector +extract_column_names(mtcars, c("cyl:hp", "wt")) + # rename returned columns for "data_select()" head(data_select(mtcars, c(`Miles per Gallon` = "mpg", Cylinders = "cyl"))) } diff --git a/man/labels_to_levels.Rd b/man/labels_to_levels.Rd index 8024eb2d3..163eb0eaa 100644 --- a/man/labels_to_levels.Rd +++ b/man/labels_to_levels.Rd @@ -33,8 +33,10 @@ allowed.} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/means_by_group.Rd b/man/means_by_group.Rd index ba2a7d0c8..6c06ac3b1 100644 --- a/man/means_by_group.Rd +++ b/man/means_by_group.Rd @@ -51,8 +51,10 @@ point when rounding estimates and values.} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/normalize.Rd b/man/normalize.Rd index 4a9a61a68..c325e98fe 100644 --- a/man/normalize.Rd +++ b/man/normalize.Rd @@ -71,8 +71,10 @@ the normalized vectors are rescaled to a range from \code{0 + include_bounds} to tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/ranktransform.Rd b/man/ranktransform.Rd index c23105735..7046db2b5 100644 --- a/man/ranktransform.Rd +++ b/man/ranktransform.Rd @@ -39,8 +39,10 @@ details.} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/recode_values.Rd b/man/recode_values.Rd index baa7afda9..dece902f7 100644 --- a/man/recode_values.Rd +++ b/man/recode_values.Rd @@ -60,8 +60,10 @@ default value.} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/rescale.Rd b/man/rescale.Rd index 016a6f841..490964777 100644 --- a/man/rescale.Rd +++ b/man/rescale.Rd @@ -67,8 +67,10 @@ the input vector (\code{range(x)}).} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/reverse.Rd b/man/reverse.Rd index 6304dffc6..5767908ff 100644 --- a/man/reverse.Rd +++ b/man/reverse.Rd @@ -45,8 +45,10 @@ usually only makes sense when factor levels are numeric, not characters.} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/row_means.Rd b/man/row_means.Rd index c347fc6f1..4d61e8cf2 100644 --- a/man/row_means.Rd +++ b/man/row_means.Rd @@ -23,8 +23,10 @@ row_means( tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/slide.Rd b/man/slide.Rd index ccc6bd7e9..c26943116 100644 --- a/man/slide.Rd +++ b/man/slide.Rd @@ -34,8 +34,10 @@ factors or character vectors to numeric values.} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/standardize.Rd b/man/standardize.Rd index 4041f2dc0..fcc8c6ae7 100644 --- a/man/standardize.Rd +++ b/man/standardize.Rd @@ -145,8 +145,10 @@ vectors as well.} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/to_factor.Rd b/man/to_factor.Rd index e035769ec..d544bdaae 100644 --- a/man/to_factor.Rd +++ b/man/to_factor.Rd @@ -36,8 +36,10 @@ the values of \code{x} (i.e. as if using \code{as.factor()}).} tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/man/to_numeric.Rd b/man/to_numeric.Rd index 634906e4a..39f04c3a9 100644 --- a/man/to_numeric.Rd +++ b/man/to_numeric.Rd @@ -30,8 +30,10 @@ to_numeric(x, ...) tasks. Can be either \itemize{ \item a variable specified as a literal variable name (e.g., \code{column_name}), -\item a string with the variable name (e.g., \code{"column_name"}), or a character -vector of variable names (e.g., \code{c("col1", "col2", "col3")}), +\item a string with the variable name (e.g., \code{"column_name"}), a character +vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a +character vector of variable names including ranges specified via \code{:} +(e.g., \code{c("col1:col3", "col5")}), \item a formula with variable names (e.g., \code{~column_1 + column_2}), \item a vector of positive integers, giving the positions counting from the left (e.g. \code{1} or \code{c(1, 3, 5)}), diff --git a/tests/testthat/test-select_nse.R b/tests/testthat/test-select_nse.R index c0195ad94..c7c28fd22 100644 --- a/tests/testthat/test-select_nse.R +++ b/tests/testthat/test-select_nse.R @@ -138,3 +138,22 @@ test_that(".select_nse: works with function and namespace", { out <- fun(insight::find_predictors(model, effects = "fixed", flatten = TRUE)) expect_identical(out, iris["Petal.Width"]) }) + +test_that(".select_nse: allow character vector with :", { + data(mtcars) + out <- data_select(mtcars, c("cyl:hp", "wt", "vs:gear")) + expect_named(out, c("cyl", "disp", "hp", "wt", "vs", "am", "gear")) + out <- data_select(mtcars, c("cyl:hp", "wta", "vs:gear")) + expect_named(out, c("cyl", "disp", "hp", "vs", "am", "gear")) + out <- data_select(mtcars, c("cyl:hq", "wt", "vs:gear")) + expect_named(out, c("wt", "vs", "am", "gear")) + + expect_warning( + center(mtcars, c("cyl:hp", "wta", "vs:gear"), verbose = TRUE), + regex = "Did you mean \"wt\"" + ) + expect_warning( + center(mtcars, c("cyl:hq", "wt", "vs:gear"), verbose = TRUE) + regex = "Did you mean one of \"hp\"" + ) +})