diff --git a/R/row_count.R b/R/row_count.R index b4ac2f385..a7dd509ca 100644 --- a/R/row_count.R +++ b/R/row_count.R @@ -1,7 +1,7 @@ #' @title Count specific values row-wise #' @name row_count #' @description `row_count()` mimics base R's `rowSums()`, with sums for a -#' specific value indicated by `count`. Hence, it is equivalent to +#' specific value indicated by `count`. Hence, it is similar to #' `rowSums(x == count, na.rm = TRUE)`, but offers some more options, including #' strict comparisons. Comparisons using `==` coerce values to atomic vectors, #' thus both `2 == 2` and `"2" == 2` are `TRUE`. In `row_count()`, it is also @@ -14,9 +14,10 @@ #' numeric value, a character string (for factors or character vectors), `NA` or #' `Inf`. #' @param allow_coercion Logical. If `FALSE`, `count` matches only values of same -#' type (i.e. when `count = 2`, the value `"2"` is not counted and vice versa). -#' By default, when `allow_coercion = TRUE`, `count = 2` also matches `"2"`. -#' See 'Examples'. +#' class (i.e. when `count = 2`, the value `"2"` is not counted and vice versa). +#' By default, when `allow_coercion = TRUE`, `count = 2` also matches `"2"`. In +#' order to count factor levels in the data, use `count = factor("level")`. See +#' 'Examples'. #' #' @inheritParams extract_column_names #' @inheritParams row_means @@ -45,7 +46,18 @@ #' # count all 2s and "2"s per row #' row_count(dat, count = 2) #' # only count 2s, but not "2"s -#' row_count(dat, count = 2, allow_coercion = TRUE) +#' row_count(dat, count = 2, allow_coercion = FALSE) +#' +#' dat <- data.frame( +#' c1 = factor(c("1", "2", NA, "3")), +#' c2 = c("2", "1", NA, "3"), +#' c3 = c(NA, 4, NA, NA), +#' c4 = c(2, 3, 7, Inf) +#' ) +#' # find only character "2"s +#' row_count(dat, count = "2", allow_coercion = FALSE) +#' # find only factor level "2"s +#' row_count(dat, count = factor("2"), allow_coercion = FALSE) #' #' @export row_count <- function(data, @@ -84,23 +96,29 @@ row_count <- function(data, if (ncol(data) < 2) { insight::format_error("`data` must be a data frame with at least two numeric columns.") } - # special case: count missing if (is.na(count)) { rowSums(is.na(data)) } else { # comparisons in R using == coerce values into a atomic vector, i.e. # 2 == "2" is TRUE. If `allow_coercion = FALSE`, we only want 2 == 2 or - # "2" == "2". to achieve this, we simply compute the comparison on numeric - # or non-numeric columns only + # "2" == "2" (i.e. we want exact types to be compared only) if (isFALSE(allow_coercion)) { - numeric_columns <- vapply(data, is.numeric, TRUE) - if (is.numeric(count)) { - data <- data[numeric_columns] - } else { - data <- data[!numeric_columns] + # we need the "type" of the count-value - we use class() instead of typeof(), + # because the latter sometimes returns unsuitable classes/types. compare + # typeof(as.Date("2020-01-01")), which returns "double". + count_type <- class(count)[1] + valid_columns <- vapply(data, function(i) identical(class(i)[1], count_type), TRUE) + # check if any columns left? + if (!any(valid_columns)) { + insight::format_error("No column has same type as the value provided in `count`. Set `allow_coercion = TRUE` or specify a valid value for `count`.") # nolint } + data <- data[valid_columns] } + # coerce - we have only valid columns anyway, and we need to coerce factors + # to vectors, else comparison with `==` errors. + count <- as.vector(count) + # finally, count rowSums(data == count, na.rm = TRUE) } } diff --git a/man/row_count.Rd b/man/row_count.Rd index 3bd2a0281..d24404f5f 100644 --- a/man/row_count.Rd +++ b/man/row_count.Rd @@ -62,9 +62,10 @@ numeric value, a character string (for factors or character vectors), \code{NA} \code{Inf}.} \item{allow_coercion}{Logical. If \code{FALSE}, \code{count} matches only values of same -type (i.e. when \code{count = 2}, the value \code{"2"} is not counted and vice versa). -By default, when \code{allow_coercion = TRUE}, \code{count = 2} also matches \code{"2"}. -See 'Examples'.} +class (i.e. when \code{count = 2}, the value \code{"2"} is not counted and vice versa). +By default, when \code{allow_coercion = TRUE}, \code{count = 2} also matches \code{"2"}. In +order to count factor levels in the data, use \code{count = factor("level")}. See +'Examples'.} \item{ignore_case}{Logical, if \code{TRUE} and when one of the select-helpers or a regular expression is used in \code{select}, ignores lower/upper case in the @@ -86,7 +87,7 @@ A vector with row-wise counts of values specified in \code{count}. } \description{ \code{row_count()} mimics base R's \code{rowSums()}, with sums for a -specific value indicated by \code{count}. Hence, it is equivalent to +specific value indicated by \code{count}. Hence, it is similar to \code{rowSums(x == count, na.rm = TRUE)}, but offers some more options, including strict comparisons. Comparisons using \code{==} coerce values to atomic vectors, thus both \code{2 == 2} and \code{"2" == 2} are \code{TRUE}. In \code{row_count()}, it is also @@ -115,6 +116,17 @@ dat <- data.frame( # count all 2s and "2"s per row row_count(dat, count = 2) # only count 2s, but not "2"s -row_count(dat, count = 2, allow_coercion = TRUE) +row_count(dat, count = 2, allow_coercion = FALSE) + +dat <- data.frame( + c1 = factor(c("1", "2", NA, "3")), + c2 = c("2", "1", NA, "3"), + c3 = c(NA, 4, NA, NA), + c4 = c(2, 3, 7, Inf) +) +# find only character "2"s +row_count(dat, count = "2", allow_coercion = FALSE) +# find only factor level "2"s +row_count(dat, count = factor("2"), allow_coercion = FALSE) } diff --git a/tests/testthat/test-row_count.R b/tests/testthat/test-row_count.R index 46125587b..0c7d67691 100644 --- a/tests/testthat/test-row_count.R +++ b/tests/testthat/test-row_count.R @@ -37,4 +37,21 @@ test_that("row_count, allow_coercion match", { expect_identical(row_count(d_mn, count = 2, allow_coercion = TRUE), c(1, 2, 0, 0)) expect_identical(row_count(d_mn, count = 2, allow_coercion = FALSE), c(1, 0, 0, 0)) expect_identical(row_count(d_mn, count = "2", allow_coercion = FALSE), c(0, 2, 0, 0)) + expect_identical(row_count(d_mn, count = factor("2"), allow_coercion = TRUE), c(1, 2, 0, 0)) + expect_error(row_count(d_mn, count = factor("2"), allow_coercion = FALSE), regex = "No column has") + + # mix character / factor + d_mn <- data.frame( + c1 = factor(c("1", "2", NA, "3")), + c2 = c("2", "1", NA, "3"), + c3 = c(NA, 4, NA, NA), + c4 = c(2, 3, 7, Inf), + stringsAsFactors = FALSE + ) + expect_identical(row_count(d_mn, count = 2, allow_coercion = TRUE), c(2, 1, 0, 0)) + expect_identical(row_count(d_mn, count = 2, allow_coercion = FALSE), c(1, 0, 0, 0)) + expect_identical(row_count(d_mn, count = "2", allow_coercion = FALSE), c(1, 0, 0, 0)) + expect_identical(row_count(d_mn, count = "2", allow_coercion = TRUE), c(2, 1, 0, 0)) + expect_identical(row_count(d_mn, count = factor("2"), allow_coercion = FALSE), c(0, 1, 0, 0)) + expect_identical(row_count(d_mn, count = factor("2"), allow_coercion = TRUE), c(2, 1, 0, 0)) })