diff --git a/R/row_count.R b/R/row_count.R index 7ece22514..f6e946952 100644 --- a/R/row_count.R +++ b/R/row_count.R @@ -2,13 +2,22 @@ #' @name row_count #' @description `row_count()` mimics base R's `rowSums()`, with sums for a #' specific value indicated by `count`. Hence, it is equivalent to -#' `rowSums(x == count, na.rm = TRUE)`. +#' `rowSums(x == count, na.rm = TRUE)`, but offers some more options, including +#' strict comparisons: Comparisons using `==` coerce values to atomic vectors, +#' thus both `2 == 2` and `"2" == 2` are `TRUE`. In `row_count()`, it is also +#' possible to make "type safe" comparisons using the `exact` argument, where +#' `"2" == 2` is not treated as identical. #' #' @param data A data frame with at least two columns, where number of specific #' values are counted row-wise. #' @param count The value for which the row sum should be computed. May be a #' numeric value, a character string (for factors or character vectors), `NA` or #' `Inf`. +#' @param exact Logical, if `TRUE`, `count` matches only values of same type +#' (i.e. when `count = 2`, the value `"2"` is not counted and vice versa). +#' By default, when `exact = FALSE`, `count = 2` also matches `"2"`. See +#' 'Examples'. +#' #' @inheritParams extract_column_names #' @inheritParams row_means #' @@ -27,11 +36,23 @@ #' # count all missing values per row #' row_count(dat, count = NA) #' +#' dat <- data.frame( +#' c1 = c("1", "2", NA, "3"), +#' c2 = c(NA, "2", NA, "3"), +#' c3 = c(NA, 4, NA, NA), +#' c4 = c(2, 3, 7, Inf) +#' ) +#' # count all 2s and "2"s per row +#' row_count(dat, count = 2) +#' # only count 2s, but not "2"s +#' row_count(dat, count = 2, exact = TRUE) +#' #' @export row_count <- function(data, select = NULL, exclude = NULL, count = NULL, + exact = FALSE, ignore_case = FALSE, regex = FALSE, verbose = TRUE) { @@ -63,6 +84,18 @@ row_count <- function(data, if (is.na(count)) { rowSums(is.na(data)) } else { + # comparisons in R using == coerce values into a atomic vector, i.e. + # 2 == "2" is TRUE. If `exact = TRUE`, we only want 2 == 2 or "2" == "2". + # to achieve this, we simply compute the comparison on numeric or non-numeric + # columns only + if (isTRUE(exact)) { + numeric_columns <- vapply(data, is.numeric, TRUE) + if (is.numeric(count)) { + data <- data[numeric_columns] + } else { + data <- data[!numeric_columns] + } + } rowSums(data == count, na.rm = TRUE) } } diff --git a/man/row_count.Rd b/man/row_count.Rd index 820baad8c..02389e781 100644 --- a/man/row_count.Rd +++ b/man/row_count.Rd @@ -9,6 +9,7 @@ row_count( select = NULL, exclude = NULL, count = NULL, + exact = FALSE, ignore_case = FALSE, regex = FALSE, verbose = TRUE @@ -60,6 +61,11 @@ excludes no columns.} numeric value, a character string (for factors or character vectors), \code{NA} or \code{Inf}.} +\item{exact}{Logical, if \code{TRUE}, \code{count} matches only values of same type +(i.e. when \code{count = 2}, the value \code{"2"} is not counted and vice versa). +By default, when \code{exact = FALSE}, \code{count = 2} also matches \code{"2"}. See +'Examples'.} + \item{ignore_case}{Logical, if \code{TRUE} and when one of the select-helpers or a regular expression is used in \code{select}, ignores lower/upper case in the search pattern when matching against variable names.} @@ -81,7 +87,11 @@ A vector with row-wise counts of values specified in \code{count}. \description{ \code{row_count()} mimics base R's \code{rowSums()}, with sums for a specific value indicated by \code{count}. Hence, it is equivalent to -\code{rowSums(x == count, na.rm = TRUE)}. +\code{rowSums(x == count, na.rm = TRUE)}, but offers some more options, including +strict comparisons: Comparisons using \code{==} coerce values to atomic vectors, +thus both \code{2 == 2} and \code{"2" == 2} are \code{TRUE}. In \code{row_count()}, it is also +possible to make "type safe" comparisons using the \code{exact} argument, where +\code{"2" == 2} is not treated as identical. } \examples{ dat <- data.frame( @@ -96,4 +106,15 @@ row_count(dat, count = 2) # count all missing values per row row_count(dat, count = NA) +dat <- data.frame( + c1 = c("1", "2", NA, "3"), + c2 = c(NA, "2", NA, "3"), + c3 = c(NA, 4, NA, NA), + c4 = c(2, 3, 7, Inf) +) +# count all 2s and "2"s per row +row_count(dat, count = 2) +# only count 2s, but not "2"s +row_count(dat, count = 2, exact = TRUE) + } diff --git a/tests/testthat/test-row_count.R b/tests/testthat/test-row_count.R index f40c7f69b..741d7fc41 100644 --- a/tests/testthat/test-row_count.R +++ b/tests/testthat/test-row_count.R @@ -17,9 +17,21 @@ test_that("row_count", { expect_identical(row_count(d_mn, count = Inf), c(0, 0, 0, 1)) }) -test_that("row_means, errors or messages", { +test_that("row_count, errors or messages", { data(iris) expect_error(expect_warning(row_count(iris, select = "abc")), regex = "must be a valid") - expect_error(expect_warning(row_count(iris, select = "abc", count = 3)), regex = "no columns") + expect_error(expect_warning(row_count(iris, select = "abc", count = 3)), regex = "No columns") expect_error(row_count(iris[1], count = 3), regex = "with at least") }) + +test_that("row_count, exact match", { + d_mn <- data.frame( + c1 = c("1", "2", NA, "3"), + c2 = c(NA, "2", NA, "3"), + c3 = c(NA, 4, NA, NA), + c4 = c(2, 3, 7, Inf) + ) + expect_identical(row_count(d_mn, count = 2, exact = FALSE), c(1, 2, 0, 0)) + expect_identical(row_count(d_mn, count = 2, exact = TRUE), c(1, 0, 0, 0)) + expect_identical(row_count(d_mn, count = "2", exact = TRUE), c(0, 2, 0, 0)) +})