diff --git a/R/seek_variables.R b/R/seek_variables.R index 61dddf323..73379d7a7 100644 --- a/R/seek_variables.R +++ b/R/seek_variables.R @@ -17,7 +17,7 @@ #' May also be a character vector of length > 1. `pattern` is searched for in #' column names, variable label and value labels attributes, or factor levels of #' variables in `data`. -#' @param source Character vector, indicating where `pattern` is sought. Use one +#' @param search Character vector, indicating where `pattern` is sought. Use one #' of more of the following options: #' #' - `"name"`: searches in column names. @@ -45,30 +45,30 @@ #' # variable names and labels only, so no match #' seek_variables(efc, "female") #' # when we seek in all sources, we find the variable "e16sex" -#' seek_variables(efc, "female", source = "all") +#' seek_variables(efc, "female", search = "all") #' #' # typo, no match #' seek_variables(iris, "Lenght") #' # typo, fuzzy match #' seek_variables(iris, "Lenght", fuzzy = TRUE) #' @export -seek_variables <- function(data, pattern, source = c("names", "labels"), fuzzy = FALSE) { +seek_variables <- function(data, pattern, search = c("names", "labels"), fuzzy = FALSE) { # check valid args if (!is.data.frame(data)) { insight::format_error("`data` must be a data frame.") } # check valid args - source <- intersect(source, c("names", "labels", "values", "levels", "column_names", "columns", "all")) - if (is.null(source) || !length(source)) { - insight::format_error("`source` must be one of \"names\", \"labels\", \"values\", a combination of these options, or \"all\".") + search <- intersect(search, c("names", "labels", "values", "levels", "column_names", "columns", "all")) + if (is.null(search) || !length(search)) { + insight::format_error("`search` must be one of \"names\", \"labels\", \"values\", a combination of these options, or \"all\".") } pos1 <- pos2 <- pos3 <- NULL pos <- unlist(lapply(pattern, function(search_pattern) { # search in variable names? - if (any(source %in% c("names", "columns", "column_names", "all"))) { + if (any(search %in% c("names", "columns", "column_names", "all"))) { pos1 <- which(grepl(search_pattern, colnames(data))) # find in near distance? if (fuzzy) { @@ -77,7 +77,7 @@ seek_variables <- function(data, pattern, source = c("names", "labels"), fuzzy = } # search in variable labels? - if (any(source %in% c("labels", "all"))) { + if (any(search %in% c("labels", "all"))) { labels <- insight::compact_character(unlist(lapply(data, attr, which = "label", exact = TRUE))) if (!is.null(labels) && length(labels)) { found <- grepl(search_pattern, labels) @@ -93,7 +93,7 @@ seek_variables <- function(data, pattern, source = c("names", "labels"), fuzzy = } # search for pattern in value labels or levels? - if (any(source %in% c("values", "levels", "all"))) { + if (any(search %in% c("values", "levels", "all"))) { values <- insight::compact_list(lapply(data, function(i) { l <- attr(i, "labels", exact = TRUE) if (is.null(l) && is.factor(i)) { diff --git a/man/describe_distribution.Rd b/man/describe_distribution.Rd index a23069eea..fd229567d 100644 --- a/man/describe_distribution.Rd +++ b/man/describe_distribution.Rd @@ -50,9 +50,14 @@ describe_distribution(x, ...) \item{...}{Additional arguments to be passed to or from methods.} -\item{centrality}{The point-estimates (centrality indices) to compute. Character (vector) or list with one or more of these options: \code{"median"}, \code{"mean"}, \code{"MAP"} or \code{"all"}.} +\item{centrality}{The point-estimates (centrality indices) to compute. Character +(vector) or list with one or more of these options: \code{"median"}, \code{"mean"}, \code{"MAP"} +(see \code{\link[bayestestR:map_estimate]{map_estimate()}}), \code{"trimmed"} (which is just \code{mean(x, trim = threshold)}), +\code{"mode"} or \code{"all"}.} -\item{dispersion}{Logical, if \code{TRUE}, computes indices of dispersion related to the estimate(s) (\code{SD} and \code{MAD} for \code{mean} and \code{median}, respectively).} +\item{dispersion}{Logical, if \code{TRUE}, computes indices of dispersion related +to the estimate(s) (\code{SD} and \code{MAD} for \code{mean} and \code{median}, respectively). +Dispersion is not available for \code{"MAP"} or \code{"mode"} centrality indices.} \item{iqr}{Logical, if \code{TRUE}, the interquartile range is calculated (based on \code{\link[stats:IQR]{stats::IQR()}}, using \code{type = 6}).} @@ -71,7 +76,9 @@ the first centrality index (which is typically the median).} \item{iterations}{The number of bootstrap replicates for computing confidence intervals. Only applies when \code{ci} is not \code{NULL}.} -\item{threshold}{For \code{centrality = "trimmed"} (i.e. trimmed mean), indicates the fraction (0 to 0.5) of observations to be trimmed from each end of the vector before the mean is computed.} +\item{threshold}{For \code{centrality = "trimmed"} (i.e. trimmed mean), indicates +the fraction (0 to 0.5) of observations to be trimmed from each end of the +vector before the mean is computed.} \item{verbose}{Toggle warnings and messages.} diff --git a/man/seek_variables.Rd b/man/seek_variables.Rd index 928ccc48f..b1493e10a 100644 --- a/man/seek_variables.Rd +++ b/man/seek_variables.Rd @@ -4,7 +4,7 @@ \alias{seek_variables} \title{Find variables by its name, variable or value labels} \usage{ -seek_variables(data, pattern, source = c("names", "labels"), fuzzy = FALSE) +seek_variables(data, pattern, search = c("names", "labels"), fuzzy = FALSE) } \arguments{ \item{data}{A data frame.} @@ -14,7 +14,7 @@ May also be a character vector of length > 1. \code{pattern} is searched for in column names, variable label and value labels attributes, or factor levels of variables in \code{data}.} -\item{source}{Character vector, indicating where \code{pattern} is sought. Use one +\item{search}{Character vector, indicating where \code{pattern} is sought. Use one of more of the following options: \itemize{ \item \code{"name"}: searches in column names. @@ -57,7 +57,7 @@ seek_variables(efc, "dependency") # variable names and labels only, so no match seek_variables(efc, "female") # when we seek in all sources, we find the variable "e16sex" -seek_variables(efc, "female", source = "all") +seek_variables(efc, "female", search = "all") # typo, no match seek_variables(iris, "Lenght") diff --git a/tests/testthat/test-seek_data.R b/tests/testthat/test-seek_data.R index 45d7bdc35..5d3183906 100644 --- a/tests/testthat/test-seek_data.R +++ b/tests/testthat/test-seek_data.R @@ -16,7 +16,7 @@ test_that("seek_variables - search label attribute", { data(efc) out <- seek_variables(efc, "female") expect_identical(nrow(out), 0L) - out <- seek_variables(efc, "female", source = "all") + out <- seek_variables(efc, "female", search = "all") expect_identical(out$index, which(colnames(efc) == out$column)) expect_identical(out$labels, "elder's gender") }) @@ -32,8 +32,41 @@ test_that("seek_variables - fuzzy match", { test_that("seek_variables - fuzzy match, value labels", { data(efc) - out <- seek_variables(efc, "femlae", source = "all", fuzzy = TRUE) + out <- seek_variables(efc, "femlae", search = "all", fuzzy = TRUE) expect_identical(nrow(out), 1L) expect_identical(out$index, which(colnames(efc) %in% out$column)) expect_identical(out$labels, "elder's gender") }) + +test_that("seek_variables - multiple pattern", { + data(efc) + out <- seek_variables(efc, c("e16", "e42")) + expect_identical(nrow(out), 2L) + expect_identical(out$index, which(colnames(efc) %in% out$column)) + expect_identical(out$labels, c("elder's gender", "elder's dependency")) + # only one match, typo + out <- seek_variables(efc, c("femlae", "dependency")) + expect_identical(nrow(out), 1L) + expect_identical(out$index, which(colnames(efc) %in% out$column)) + expect_identical(out$labels, "elder's dependency") + # only one match, not searching in value labels + out <- seek_variables(efc, c("female", "dependency")) + expect_identical(nrow(out), 1L) + expect_identical(out$index, which(colnames(efc) %in% out$column)) + expect_identical(out$labels, "elder's dependency") + # two matches + out <- seek_variables(efc, c("female", "dependency"), search = "all") + expect_identical(nrow(out), 2L) + expect_identical(out$index, which(colnames(efc) %in% out$column)) + expect_identical(out$labels, c("elder's gender", "elder's dependency")) + # only one match, typo + out <- seek_variables(efc, c("femlae", "dependency"), search = "all") + expect_identical(nrow(out), 1L) + expect_identical(out$index, which(colnames(efc) %in% out$column)) + expect_identical(out$labels, "elder's dependency") + # two matches, despite typo + out <- seek_variables(efc, c("femlae", "dependency"), search = "all", fuzzy = TRUE) + expect_identical(nrow(out), 2L) + expect_identical(out$index, which(colnames(efc) %in% out$column)) + expect_identical(out$labels, c("elder's gender", "elder's dependency")) +})