easystats · strengejacke · Dec 19, 2023 · Dec 18, 2023 · Dec 18, 2023 · Dec 18, 2023
diff --git a/R/data_rotate.R b/R/data_rotate.R
@@ -56,7 +56,7 @@ data_rotate <- function(data, rownames = NULL, colnames = FALSE, verbose = TRUE)
 
   # warning after possible removal of columns
   if (verbose && insight::n_unique(vapply(data, typeof, FUN.VALUE = character(1L))) > 1L) {
-    insight::format_warning("Your data frame contains mixed types of data. After transposition, all variables will be transformed into characters.")
+    insight::format_warning("Your data frame contains mixed types of data. After transposition, all variables will be transformed into characters.") # nolint
   }
 
   # rotate data frame by 90 degrees

diff --git a/R/data_seek.R b/R/data_seek.R
@@ -63,15 +63,15 @@ data_seek <- function(data, pattern, seek = c("names", "labels"), fuzzy = FALSE)
   # check valid args
   seek <- intersect(seek, c("names", "labels", "values", "levels", "column_names", "columns", "all"))
   if (is.null(seek) || !length(seek)) {
-    insight::format_error("`seek` must be one of \"names\", \"labels\", \"values\", a combination of these options, or \"all\".")
+    insight::format_error("`seek` must be one of \"names\", \"labels\", \"values\", a combination of these options, or \"all\".") # nolint
   }
 
   pos1 <- pos2 <- pos3 <- NULL
 
   pos <- unlist(lapply(pattern, function(search_pattern) {
     # search in variable names?
     if (any(seek %in% c("names", "columns", "column_names", "all"))) {
-      pos1 <- which(grepl(search_pattern, colnames(data)))
+      pos1 <- grep(search_pattern, colnames(data))
       # find in near distance?
       if (fuzzy) {
         pos1 <- c(pos1, .fuzzy_grep(x = colnames(data), pattern = search_pattern))
@@ -80,15 +80,15 @@ data_seek <- function(data, pattern, seek = c("names", "labels"), fuzzy = FALSE)
 
     # search in variable labels?
     if (any(seek %in% c("labels", "all"))) {
-      labels <- insight::compact_character(unlist(lapply(data, attr, which = "label", exact = TRUE)))
-      if (!is.null(labels) && length(labels)) {
-        found <- grepl(search_pattern, labels)
-        pos2 <- match(names(labels)[found], colnames(data))
+      var_labels <- insight::compact_character(unlist(lapply(data, attr, which = "label", exact = TRUE)))
+      if (!is.null(var_labels) && length(var_labels)) {
+        found <- grepl(search_pattern, var_labels)
+        pos2 <- match(names(var_labels)[found], colnames(data))
         # find in near distanc?
         if (fuzzy) {
-          found <- .fuzzy_grep(x = labels, pattern = search_pattern)
+          found <- .fuzzy_grep(x = var_labels, pattern = search_pattern)
           if (length(found)) {
-            pos2 <- c(pos2, match(names(labels)[found], colnames(data)))
+            pos2 <- c(pos2, match(names(var_labels)[found], colnames(data)))
           }
         }
       }
@@ -129,7 +129,7 @@ data_seek <- function(data, pattern, seek = c("names", "labels"), fuzzy = FALSE)
   pos <- unique(pos)
 
   # variable labels of matching variables
-  labels <- vapply(
+  var_labels <- vapply(
     colnames(data[pos]),
     function(i) {
       l <- attr(data[[i]], "label", exact = TRUE)
@@ -145,7 +145,7 @@ data_seek <- function(data, pattern, seek = c("names", "labels"), fuzzy = FALSE)
   out <- data.frame(
     index = pos,
     column = colnames(data)[pos],
-    labels = labels,
+    labels = var_labels,
     stringsAsFactors = FALSE
   )
   # no row names

diff --git a/R/data_separate.R b/R/data_separate.R
@@ -254,7 +254,7 @@ data_separate <- function(data,
     # catch error
     if (is.null(separated_columns)) {
       insight::format_error(
-        "Something went wrong. Probably the number of provided column names did not match number of newly created columns?"
+        "Something went wrong. Probably the number of provided column names did not match number of newly created columns?" # nolint
       )
     }
 
@@ -264,14 +264,12 @@ data_separate <- function(data,
     # if no column names provided, use standard names
     if (is.null(new_columns[[sep_column]])) {
       new_column_names <- paste0(sep_column, "_", seq_along(out))
-    } else {
+    } else if (make_unique_colnames) {
       # if we have multiple columns that were separated, we avoid duplicated
       # column names of created variables by appending name of original column
-      if (make_unique_colnames) {
-        new_column_names <- paste0(sep_column, "_", new_columns[[sep_column]])
-      } else {
-        new_column_names <- new_columns[[sep_column]]
-      }
+      new_column_names <- paste0(sep_column, "_", new_columns[[sep_column]])
+    } else {
+      new_column_names <- new_columns[[sep_column]]
     }
 
     colnames(out) <- new_column_names
@@ -338,29 +336,27 @@ data_separate <- function(data,
       out <- rep(NA_character_, times = n_cols)
     } else if (n_values > n_cols) {
       # we have more values than required - drop extra columns
-      if (extra == "drop_left") {
-        out <- i[(n_values - n_cols + 1):n_values]
-      } else if (extra == "drop_right") {
-        out <- i[1:n_cols]
-      } else if (extra == "merge_left") {
-        out <- paste(i[1:(n_values - n_cols + 1)], collapse = " ")
-        out <- c(out, i[(n_values - n_cols + 2):n_values])
-      } else {
-        out <- i[1:(n_cols - 1)]
-        out <- c(out, paste(i[n_cols:n_values], collapse = " "))
-      }
+      out <- switch(extra,
+        drop_left = i[(n_values - n_cols + 1):n_values],
+        drop_right = i[1:n_cols],
+        merge_left = {
+          tmp <- paste(i[1:(n_values - n_cols + 1)], collapse = " ")
+          c(tmp, i[(n_values - n_cols + 2):n_values])
+        },
+        {
+          tmp <- i[1:(n_cols - 1)]
+          c(tmp, paste(i[n_cols:n_values], collapse = " "))
+        }
+      )
       warn_extra <- TRUE
     } else if (n_values < n_cols) {
       # we have fewer values than required - fill columns
-      if (fill == "left") {
-        out <- c(rep(NA_character_, times = n_cols - n_values), i)
-      } else if (fill == "right") {
-        out <- c(i, rep(NA_character_, times = n_cols - n_values))
-      } else if (fill == "value_left") {
-        out <- c(rep(i[1], times = n_cols - n_values), i)
-      } else {
-        out <- c(i, rep(i[length(i)], times = n_cols - n_values))
-      }
+      out <- switch(fill,
+        left = c(rep(NA_character_, times = n_cols - n_values), i),
+        right = c(i, rep(NA_character_, times = n_cols - n_values)),
+        value_left = c(rep(i[1], times = n_cols - n_values), i),
+        c(i, rep(i[length(i)], times = n_cols - n_values))
+      )
       warn_fill <- TRUE
     } else {
       out <- i

diff --git a/R/describe_distribution.R b/R/describe_distribution.R
@@ -82,18 +82,18 @@
   num_el <- which(vapply(x, is.numeric, FUN.VALUE = logical(1L)))
 
   # get elements names as is
-  # ex: list(mtcars$mpg, mtcars$cyl) -> c("mtcars$mpg", "mtcars$cyl")
+  # ex: `list(mtcars$mpg, mtcars$cyl) -> c("mtcars$mpg", "mtcars$cyl")`
   nm <- vapply(sys.call()[[2]], insight::safe_deparse, FUN.VALUE = character(1L))[-1]
 
-  if (!isTRUE(include_factors)) {
-    x <- x[num_el]
+  if (isTRUE(include_factors)) {
+    x <- x[c(num_el, factor_el)]
     if (length(nm) != 0) {
-      nm <- nm[num_el]
+      nm <- nm[c(num_el, factor_el)]
     }
   } else {
-    x <- x[c(num_el, factor_el)]
+    x <- x[num_el]
     if (length(nm) != 0) {
-      nm <- nm[c(num_el, factor_el)]
+      nm <- nm[num_el]
     }
   }
 
@@ -123,12 +123,12 @@
   }))
 
 
-  if (!is.null(names(x))) {
-    empty_names <- which(names(x) == "")
+  if (is.null(names(x))) {
+    new_names <- nm
+  } else {
+    empty_names <- which(!nzchar(names(x), keepNA = TRUE))
     new_names <- names(x)
     new_names[empty_names] <- nm[empty_names]
-  } else {
-    new_names <- nm
   }
 
   out$Variable <- new_names

diff --git a/R/labels_to_levels.R b/R/labels_to_levels.R
@@ -79,7 +79,7 @@ labels_to_levels.data.frame <- function(x,
   # create the new variables and updates "select", so new variables are processed
   if (!isFALSE(append)) {
     # process arguments
-    args <- .process_append(
+    arguments <- .process_append(
       x,
       select,
       append,
@@ -89,8 +89,8 @@ labels_to_levels.data.frame <- function(x,
       keep_character = FALSE
     )
     # update processed arguments
-    x <- args$x
-    select <- args$select
+    x <- arguments$x
+    select <- arguments$select
   }
 
   x[select] <- lapply(

diff --git a/R/to_numeric.R b/R/to_numeric.R
@@ -17,6 +17,12 @@
 #' @inheritParams find_columns
 #' @inheritParams categorize
 #'
+#' @note By default, `to_numeric()` converts factors into "binary" dummies, i.e.
+#' each factor level is converted into a separate column filled with a binary
+#' 0-1 value. If only one column is required, use `dummy_factors = FALSE`. If
+#' you want to preserve the original factor levels (in case these represent
+#' numeric values), use `preserve_levels = TRUE`.
+#'
 #' @section Selection of variables - `select` argument:
 #' For most functions that have a `select` argument the complete input data
 #' frame is returned, even when `select` only selects a range of variables.
@@ -34,6 +40,8 @@
 #' x <- as.factor(mtcars$gear)
 #' to_numeric(x, dummy_factors = FALSE)
 #' to_numeric(x, dummy_factors = FALSE, preserve_levels = TRUE)
+#' # same as:
+#' coerce_to_numeric(x)
 #'
 #' @return A data frame of numeric variables.
 #'
@@ -211,15 +219,13 @@ to_numeric.factor <- function(x,
         # if the first observation was missing, add NA row and bind data frame
         if (i == 1 && na_values[i] == 1) {
           out <- rbind(NA, out)
-        } else {
+        } else if (na_values[i] == rows_x) {
           # if the last observation was NA, add NA row to data frame
-          if (na_values[i] == rows_x) {
-            out <- rbind(out, NA)
-          } else {
-            # else, pick rows from beginning to current NA value, add NA,
-            # and rbind the remaining rows
-            out <- rbind(out[1:(na_values[i] - 1), ], NA, out[na_values[i]:nrow(out), ])
-          }
+          out <- rbind(out, NA)
+        } else {
+          # else, pick rows from beginning to current NA value, add NA,
+          # and rbind the remaining rows
+          out <- rbind(out[1:(na_values[i] - 1), ], NA, out[na_values[i]:nrow(out), ])
         }
       }
       rownames(out) <- NULL

diff --git a/man/to_numeric.Rd b/man/to_numeric.Rd
diff --git a/tests/testthat/_snaps/describe_distribution.md b/tests/testthat/_snaps/describe_distribution.md
@@ -16,3 +16,12 @@
       -----------------------------------------------------------
            |    | [VC, OJ] |        0 |    -2.07 | 60 |         0
 
+# describe_distribution formatting
+
+    Code
+      format(x)
+    Output
+      Mean |   SD |  IQR |        Range |  Quartiles | Skewness | Kurtosis |   n | n_Missing
+      --------------------------------------------------------------------------------------
+      3.06 | 0.44 | 0.52 | [2.00, 4.40] | 2.80, 3.30 |     0.32 |     0.23 | 150 |         0
+