Skip to content

Commit

Permalink
source -> search
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed Sep 12, 2023
1 parent a18bdbd commit 1043c74
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 17 deletions.
18 changes: 9 additions & 9 deletions R/seek_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#' May also be a character vector of length > 1. `pattern` is searched for in
#' column names, variable label and value labels attributes, or factor levels of
#' variables in `data`.
#' @param source Character vector, indicating where `pattern` is sought. Use one
#' @param search Character vector, indicating where `pattern` is sought. Use one
#' of more of the following options:
#'
#' - `"name"`: searches in column names.
Expand Down Expand Up @@ -45,30 +45,30 @@
#' # variable names and labels only, so no match
#' seek_variables(efc, "female")
#' # when we seek in all sources, we find the variable "e16sex"
#' seek_variables(efc, "female", source = "all")
#' seek_variables(efc, "female", search = "all")
#'
#' # typo, no match
#' seek_variables(iris, "Lenght")
#' # typo, fuzzy match
#' seek_variables(iris, "Lenght", fuzzy = TRUE)
#' @export
seek_variables <- function(data, pattern, source = c("names", "labels"), fuzzy = FALSE) {
seek_variables <- function(data, pattern, search = c("names", "labels"), fuzzy = FALSE) {
# check valid args
if (!is.data.frame(data)) {
insight::format_error("`data` must be a data frame.")
}

# check valid args
source <- intersect(source, c("names", "labels", "values", "levels", "column_names", "columns", "all"))
if (is.null(source) || !length(source)) {
insight::format_error("`source` must be one of \"names\", \"labels\", \"values\", a combination of these options, or \"all\".")
search <- intersect(search, c("names", "labels", "values", "levels", "column_names", "columns", "all"))
if (is.null(search) || !length(search)) {
insight::format_error("`search` must be one of \"names\", \"labels\", \"values\", a combination of these options, or \"all\".")
}

pos1 <- pos2 <- pos3 <- NULL

pos <- unlist(lapply(pattern, function(search_pattern) {
# search in variable names?
if (any(source %in% c("names", "columns", "column_names", "all"))) {
if (any(search %in% c("names", "columns", "column_names", "all"))) {
pos1 <- which(grepl(search_pattern, colnames(data)))
# find in near distance?
if (fuzzy) {
Expand All @@ -77,7 +77,7 @@ seek_variables <- function(data, pattern, source = c("names", "labels"), fuzzy =
}

# search in variable labels?
if (any(source %in% c("labels", "all"))) {
if (any(search %in% c("labels", "all"))) {
labels <- insight::compact_character(unlist(lapply(data, attr, which = "label", exact = TRUE)))
if (!is.null(labels) && length(labels)) {
found <- grepl(search_pattern, labels)
Expand All @@ -93,7 +93,7 @@ seek_variables <- function(data, pattern, source = c("names", "labels"), fuzzy =
}

# search for pattern in value labels or levels?
if (any(source %in% c("values", "levels", "all"))) {
if (any(search %in% c("values", "levels", "all"))) {
values <- insight::compact_list(lapply(data, function(i) {
l <- attr(i, "labels", exact = TRUE)
if (is.null(l) && is.factor(i)) {
Expand Down
13 changes: 10 additions & 3 deletions man/describe_distribution.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/seek_variables.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 35 additions & 2 deletions tests/testthat/test-seek_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ test_that("seek_variables - search label attribute", {
data(efc)
out <- seek_variables(efc, "female")
expect_identical(nrow(out), 0L)
out <- seek_variables(efc, "female", source = "all")
out <- seek_variables(efc, "female", search = "all")
expect_identical(out$index, which(colnames(efc) == out$column))
expect_identical(out$labels, "elder's gender")
})
Expand All @@ -32,8 +32,41 @@ test_that("seek_variables - fuzzy match", {

test_that("seek_variables - fuzzy match, value labels", {
data(efc)
out <- seek_variables(efc, "femlae", source = "all", fuzzy = TRUE)
out <- seek_variables(efc, "femlae", search = "all", fuzzy = TRUE)
expect_identical(nrow(out), 1L)
expect_identical(out$index, which(colnames(efc) %in% out$column))
expect_identical(out$labels, "elder's gender")
})

test_that("seek_variables - multiple pattern", {
data(efc)
out <- seek_variables(efc, c("e16", "e42"))
expect_identical(nrow(out), 2L)
expect_identical(out$index, which(colnames(efc) %in% out$column))
expect_identical(out$labels, c("elder's gender", "elder's dependency"))
# only one match, typo
out <- seek_variables(efc, c("femlae", "dependency"))
expect_identical(nrow(out), 1L)
expect_identical(out$index, which(colnames(efc) %in% out$column))
expect_identical(out$labels, "elder's dependency")
# only one match, not searching in value labels
out <- seek_variables(efc, c("female", "dependency"))
expect_identical(nrow(out), 1L)
expect_identical(out$index, which(colnames(efc) %in% out$column))
expect_identical(out$labels, "elder's dependency")
# two matches
out <- seek_variables(efc, c("female", "dependency"), search = "all")
expect_identical(nrow(out), 2L)
expect_identical(out$index, which(colnames(efc) %in% out$column))
expect_identical(out$labels, c("elder's gender", "elder's dependency"))
# only one match, typo
out <- seek_variables(efc, c("femlae", "dependency"), search = "all")
expect_identical(nrow(out), 1L)
expect_identical(out$index, which(colnames(efc) %in% out$column))
expect_identical(out$labels, "elder's dependency")
# two matches, despite typo
out <- seek_variables(efc, c("femlae", "dependency"), search = "all", fuzzy = TRUE)
expect_identical(nrow(out), 2L)
expect_identical(out$index, which(colnames(efc) %in% out$column))
expect_identical(out$labels, c("elder's gender", "elder's dependency"))
})

0 comments on commit 1043c74

Please sign in to comment.