From e2d6091ecec3be3857910f21d2576e0375e3ddbb Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sat, 22 Jun 2024 15:53:38 +0200
Subject: [PATCH 1/6] Unexpected missing values in `data_tabulate()` Fixes #514

---
 R/data_summary.R                    | 14 +++++++-------
 R/data_tabulate.R                   | 26 ++++++++++++-------------
 R/data_xtabulate.R                  |  6 +++---
 man/data_summary.Rd                 |  4 ++--
 man/data_tabulate.Rd                | 12 ++++++------
 tests/testthat/test-data_summary.R  |  2 +-
 tests/testthat/test-data_tabulate.R | 30 ++++++++++++++---------------
 7 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/data_summary.R b/R/data_summary.R
index 8d15f8483..61cb3a04c 100644
--- a/R/data_summary.R
+++ b/R/data_summary.R
@@ -8,7 +8,7 @@
 #' @param by Optional character string, indicating the name of a variable in `x`.
 #' If supplied, the data will be split by this variable and summary statistics
 #' will be computed for each group.
-#' @param include_na Logical. If `TRUE`, missing values are included as a level
+#' @param remove_na Logical. If `TRUE`, missing values are included as a level
 #' in the grouping variable. If `FALSE`, missing values are omitted from the
 #' grouping variable.
 #' @param ... One or more named expressions that define the new variable name
@@ -57,8 +57,8 @@ data_summary <- function(x, ...) {
 
 
 #' @export
-data_summary.matrix <- function(x, ..., by = NULL, include_na = TRUE) {
-  data_summary(as.data.frame(x), ..., by = by, include_na = include_na)
+data_summary.matrix <- function(x, ..., by = NULL, remove_na = FALSE) {
+  data_summary(as.data.frame(x), ..., by = by, remove_na = remove_na)
 }
 
 
@@ -70,7 +70,7 @@ data_summary.default <- function(x, ...) {
 
 #' @rdname data_summary
 #' @export
-data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) {
+data_summary.data.frame <- function(x, ..., by = NULL, remove_na = FALSE) {
   dots <- eval(substitute(alist(...)))
 
   # do we have any expression at all?
@@ -103,7 +103,7 @@ data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) {
     }
     # split data, add NA levels, if requested
     l <- lapply(x[by], function(i) {
-      if (include_na && anyNA(i)) {
+      if (remove_na && anyNA(i)) {
         addNA(i)
       } else {
         i
@@ -137,7 +137,7 @@ data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) {
 
 
 #' @export
-data_summary.grouped_df <- function(x, ..., by = NULL, include_na = TRUE) {
+data_summary.grouped_df <- function(x, ..., by = NULL, remove_na = FALSE) {
   # extract group variables
   grps <- attr(x, "groups", exact = TRUE)
   group_variables <- data_remove(grps, ".rows")
@@ -148,7 +148,7 @@ data_summary.grouped_df <- function(x, ..., by = NULL, include_na = TRUE) {
   # remove information specific to grouped df's
   attr(x, "groups") <- NULL
   class(x) <- "data.frame"
-  data_summary(x, ..., by = by, include_na = include_na)
+  data_summary(x, ..., by = by, remove_na = remove_na)
 }
 
 
diff --git a/R/data_tabulate.R b/R/data_tabulate.R
index 74f4f2e03..08b03e706 100644
--- a/R/data_tabulate.R
+++ b/R/data_tabulate.R
@@ -15,7 +15,7 @@
 #' factor levels are dropped from the frequency table.
 #' @param name Optional character string, which includes the name that is used
 #' for printing.
-#' @param include_na Logical, if `TRUE`, missing values are included in the
+#' @param remove_na Logical, if `TRUE`, missing values are included in the
 #' frequency or crosstable, else missing values are omitted.
 #' @param collapse Logical, if `TRUE` collapses multiple tables into one larger
 #' table for printing. This affects only printing, not the returned object.
@@ -40,7 +40,7 @@
 #' (missing) values by default. The first column indicates values of `x`, the
 #' first row indicates values of `by` (including missing values). The last row
 #' and column contain the total frequencies for each row and column, respectively.
-#' Setting `include_na = FALSE` will omit missing values from the crosstable.
+#' Setting `remove_na = FALSE` will omit missing values from the crosstable.
 #' Setting `proportions` to `"row"` or `"column"` will add row or column
 #' percentages. Setting `proportions` to `"full"` will add relative frequencies
 #' for the full table.
@@ -62,7 +62,7 @@
 #' data_tabulate(efc$c172code)
 #'
 #' # drop missing values
-#' data_tabulate(efc$c172code, include_na = FALSE)
+#' data_tabulate(efc$c172code, remove_na = FALSE)
 #'
 #' # data frame
 #' data_tabulate(efc, c("e42dep", "c172code"))
@@ -109,7 +109,7 @@
 #'   efc$c172code,
 #'   by = efc$e16sex,
 #'   proportions = "column",
-#'   include_na = FALSE
+#'   remove_na = FALSE
 #' )
 #'
 #' # round percentages
@@ -133,7 +133,7 @@ data_tabulate.default <- function(x,
                                   by = NULL,
                                   drop_levels = FALSE,
                                   weights = NULL,
-                                  include_na = TRUE,
+                                  remove_na = FALSE,
                                   proportions = NULL,
                                   name = NULL,
                                   verbose = TRUE,
@@ -163,7 +163,7 @@ data_tabulate.default <- function(x,
       x,
       by = by,
       weights = weights,
-      include_na = include_na,
+      remove_na = remove_na,
       proportions = proportions,
       obj_name = obj_name,
       group_variable = group_variable
@@ -172,12 +172,12 @@ data_tabulate.default <- function(x,
 
   # frequency table
   if (is.null(weights)) {
-    if (include_na) {
+    if (remove_na) {
       freq_table <- tryCatch(table(addNA(x)), error = function(e) NULL)
     } else {
       freq_table <- tryCatch(table(x), error = function(e) NULL)
     }
-  } else if (include_na) {
+  } else if (remove_na) {
     # weighted frequency table, including NA
     freq_table <- tryCatch(
       stats::xtabs(
@@ -218,7 +218,7 @@ data_tabulate.default <- function(x,
 
   out$`Raw %` <- 100 * out$N / sum(out$N)
   # if we have missing values, we add a row with NA
-  if (include_na) {
+  if (remove_na) {
     out$`Valid %` <- c(100 * out$N[-nrow(out)] / sum(out$N[-nrow(out)]), NA)
     valid_n <- sum(out$N[-length(out$N)], na.rm = TRUE)
   } else {
@@ -271,7 +271,7 @@ data_tabulate.data.frame <- function(x,
                                      by = NULL,
                                      drop_levels = FALSE,
                                      weights = NULL,
-                                     include_na = TRUE,
+                                     remove_na = FALSE,
                                      proportions = NULL,
                                      collapse = FALSE,
                                      verbose = TRUE,
@@ -297,7 +297,7 @@ data_tabulate.data.frame <- function(x,
       proportions = proportions,
       drop_levels = drop_levels,
       weights = weights,
-      include_na = include_na,
+      remove_na = remove_na,
       name = i,
       verbose = verbose,
       ...
@@ -326,7 +326,7 @@ data_tabulate.grouped_df <- function(x,
                                      proportions = NULL,
                                      drop_levels = FALSE,
                                      weights = NULL,
-                                     include_na = TRUE,
+                                     remove_na = FALSE,
                                      collapse = FALSE,
                                      verbose = TRUE,
                                      ...) {
@@ -362,7 +362,7 @@ data_tabulate.grouped_df <- function(x,
       verbose = verbose,
       drop_levels = drop_levels,
       weights = weights,
-      include_na = include_na,
+      remove_na = remove_na,
       by = by,
       proportions = proportions,
       group_variable = group_variable,
diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R
index 5c387ff95..81ba26c59 100644
--- a/R/data_xtabulate.R
+++ b/R/data_xtabulate.R
@@ -3,7 +3,7 @@
 .crosstable <- function(x,
                         by,
                         weights = NULL,
-                        include_na = TRUE,
+                        remove_na = FALSE,
                         proportions = NULL,
                         obj_name = NULL,
                         group_variable = NULL) {
@@ -12,12 +12,12 @@
   }
   # frequency table
   if (is.null(weights)) {
-    if (include_na) {
+    if (remove_na) {
       x_table <- tryCatch(table(addNA(x), addNA(by)), error = function(e) NULL)
     } else {
       x_table <- tryCatch(table(x, by), error = function(e) NULL)
     }
-  } else if (include_na) {
+  } else if (remove_na) {
     # weighted frequency table, including NA
     x_table <- tryCatch(
       stats::xtabs(
diff --git a/man/data_summary.Rd b/man/data_summary.Rd
index ccbf4c524..70d2e2576 100644
--- a/man/data_summary.Rd
+++ b/man/data_summary.Rd
@@ -7,7 +7,7 @@
 \usage{
 data_summary(x, ...)
 
-\method{data_summary}{data.frame}(x, ..., by = NULL, include_na = TRUE)
+\method{data_summary}{data.frame}(x, ..., by = NULL, remove_na = FALSE)
 }
 \arguments{
 \item{x}{A (grouped) data frame.}
@@ -22,7 +22,7 @@ summary function \code{n()} can be used to count the number of observations.}
 If supplied, the data will be split by this variable and summary statistics
 will be computed for each group.}
 
-\item{include_na}{Logical. If \code{TRUE}, missing values are included as a level
+\item{remove_na}{Logical. If \code{TRUE}, missing values are included as a level
 in the grouping variable. If \code{FALSE}, missing values are omitted from the
 grouping variable.}
 }
diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd
index 3f17bb21c..58a0c2954 100644
--- a/man/data_tabulate.Rd
+++ b/man/data_tabulate.Rd
@@ -14,7 +14,7 @@ data_tabulate(x, ...)
   by = NULL,
   drop_levels = FALSE,
   weights = NULL,
-  include_na = TRUE,
+  remove_na = FALSE,
   proportions = NULL,
   name = NULL,
   verbose = TRUE,
@@ -30,7 +30,7 @@ data_tabulate(x, ...)
   by = NULL,
   drop_levels = FALSE,
   weights = NULL,
-  include_na = TRUE,
+  remove_na = FALSE,
   proportions = NULL,
   collapse = FALSE,
   verbose = TRUE,
@@ -62,7 +62,7 @@ factor levels are dropped from the frequency table.}
 \item{weights}{Optional numeric vector of weights. Must be of the same length
 as \code{x}. If \code{weights} is supplied, weighted frequencies are calculated.}
 
-\item{include_na}{Logical, if \code{TRUE}, missing values are included in the
+\item{remove_na}{Logical, if \code{TRUE}, missing values are included in the
 frequency or crosstable, else missing values are omitted.}
 
 \item{proportions}{Optional character string, indicating the type of
@@ -173,7 +173,7 @@ If \code{by} is supplied, a crosstable is created. The crosstable includes \verb
 (missing) values by default. The first column indicates values of \code{x}, the
 first row indicates values of \code{by} (including missing values). The last row
 and column contain the total frequencies for each row and column, respectively.
-Setting \code{include_na = FALSE} will omit missing values from the crosstable.
+Setting \code{remove_na = FALSE} will omit missing values from the crosstable.
 Setting \code{proportions} to \code{"row"} or \code{"column"} will add row or column
 percentages. Setting \code{proportions} to \code{"full"} will add relative frequencies
 for the full table.
@@ -189,7 +189,7 @@ data(efc)
 data_tabulate(efc$c172code)
 
 # drop missing values
-data_tabulate(efc$c172code, include_na = FALSE)
+data_tabulate(efc$c172code, remove_na = FALSE)
 
 # data frame
 data_tabulate(efc, c("e42dep", "c172code"))
@@ -236,7 +236,7 @@ data_tabulate(
   efc$c172code,
   by = efc$e16sex,
   proportions = "column",
-  include_na = FALSE
+  remove_na = FALSE
 )
 
 # round percentages
diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R
index 746d4c51a..c8fac4a88 100644
--- a/tests/testthat/test-data_summary.R
+++ b/tests/testthat/test-data_summary.R
@@ -175,7 +175,7 @@ test_that("data_summary, with NA", {
   data(efc, package = "datawizard")
   out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code")
   expect_snapshot(print(out))
-  out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code", include_na = FALSE)
+  out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code", remove_na = FALSE)
   expect_snapshot(print(out))
   # sorting for multiple groups
   out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = c("e42dep", "c172code"))
diff --git a/tests/testthat/test-data_tabulate.R b/tests/testthat/test-data_tabulate.R
index 5108e29c2..6c443a860 100644
--- a/tests/testthat/test-data_tabulate.R
+++ b/tests/testthat/test-data_tabulate.R
@@ -287,11 +287,11 @@ test_that("data_tabulate exclude/include missing values", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
   out <- data_tabulate(efc$c172code)
   expect_identical(out$N, c(8L, 66L, 16L, 10L))
-  out <- data_tabulate(efc$c172code, include_na = FALSE)
+  out <- data_tabulate(efc$c172code, remove_na = FALSE)
   expect_identical(out$N, c(8L, 66L, 16L))
   out <- data_tabulate(efc$c172code, weights = efc$weights)
   expect_identical(out$N, c(10, 67, 15, 13))
-  out <- data_tabulate(efc$c172code, include_na = FALSE, weights = efc$weights)
+  out <- data_tabulate(efc$c172code, remove_na = FALSE, weights = efc$weights)
   expect_identical(out$N, c(10, 67, 15))
 })
 
@@ -305,17 +305,17 @@ test_that("data_tabulate, cross tables", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
 
   expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full")))
-  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE)))
+  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = FALSE)))
   expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", weights = efc$weights)))
-  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE, weights = efc$weights))) # nolint
+  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = FALSE, weights = efc$weights))) # nolint
   expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row")))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE)))
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = FALSE)))
   expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", weights = efc$weights)))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE, weights = efc$weights))) # nolint
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = FALSE, weights = efc$weights))) # nolint
   expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column")))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", include_na = FALSE)))
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", remove_na = FALSE)))
   expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", weights = "weights")))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", include_na = FALSE, weights = "weights"))) # nolint
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", remove_na = FALSE, weights = "weights"))) # nolint
 })
 
 test_that("data_tabulate, cross tables, HTML", {
@@ -326,11 +326,11 @@ test_that("data_tabulate, cross tables, HTML", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
 
   expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full")), "gt_tbl")
-  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE)), "gt_tbl") # nolint
+  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = FALSE)), "gt_tbl") # nolint
   expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", weights = efc$weights)), "gt_tbl") # nolint
-  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE, weights = efc$weights)), "gt_tbl") # nolint
+  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = FALSE, weights = efc$weights)), "gt_tbl") # nolint
   expect_s3_class(print_html(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row")), "gt_tbl")
-  expect_s3_class(print_html(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE, weights = efc$weights)), "gt_tbl") # nolint
+  expect_s3_class(print_html(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = FALSE, weights = efc$weights)), "gt_tbl") # nolint
 })
 
 test_that("data_tabulate, cross tables, grouped df", {
@@ -377,9 +377,9 @@ test_that("data_tabulate, cross tables, markdown", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
 
   expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full")))
-  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE)))
+  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = FALSE)))
   expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", weights = efc$weights)))
-  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE, weights = efc$weights))) # nolint
+  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = FALSE, weights = efc$weights))) # nolint
 })
 
 
@@ -389,12 +389,12 @@ test_that("data_tabulate, validate against table", {
   data(mtcars)
   # frequency table
   out1 <- as.data.frame(table(mtcars$cyl))
-  out2 <- data_tabulate(mtcars$cyl, include_na = FALSE)
+  out2 <- data_tabulate(mtcars$cyl, remove_na = FALSE)
   expect_identical(out1$Freq, out2$N)
   # crosstable
   out1 <- data_arrange(as.data.frame(table(mtcars$cyl, mtcars$gear)), c("Var1", "Var2"))
   out2 <- data_rename(data_to_long(
-    as.data.frame(data_tabulate(mtcars$cyl, by = mtcars$gear, include_na = FALSE)), 2:4,
+    as.data.frame(data_tabulate(mtcars$cyl, by = mtcars$gear, remove_na = FALSE)), 2:4,
     names_to = "Var2", values_to = "Freq"
   ), "mtcars$cyl", "Var1")
   out1[[2]] <- as.character(out1[[2]])

From f9aa53e10ebacce86235b83e4a939145befed2d6 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sat, 22 Jun 2024 15:57:34 +0200
Subject: [PATCH 2/6] reverse usage

---
 R/data_summary.R                          |  6 +--
 R/data_tabulate.R                         | 30 +++++++-------
 R/data_xtabulate.R                        | 20 ++++-----
 man/data_tabulate.Rd                      |  4 +-
 tests/testthat/_snaps/data_summary.new.md | 50 +++++++++++++++++++++++
 5 files changed, 80 insertions(+), 30 deletions(-)
 create mode 100644 tests/testthat/_snaps/data_summary.new.md

diff --git a/R/data_summary.R b/R/data_summary.R
index 61cb3a04c..f77ebe013 100644
--- a/R/data_summary.R
+++ b/R/data_summary.R
@@ -103,10 +103,10 @@ data_summary.data.frame <- function(x, ..., by = NULL, remove_na = FALSE) {
     }
     # split data, add NA levels, if requested
     l <- lapply(x[by], function(i) {
-      if (remove_na && anyNA(i)) {
-        addNA(i)
-      } else {
+      if (remove_na || !anyNA(i)) {
         i
+      } else {
+        addNA(i)
       }
     })
     split_data <- split(x, l, drop = TRUE)
diff --git a/R/data_tabulate.R b/R/data_tabulate.R
index 08b03e706..4c0aa10b3 100644
--- a/R/data_tabulate.R
+++ b/R/data_tabulate.R
@@ -62,7 +62,7 @@
 #' data_tabulate(efc$c172code)
 #'
 #' # drop missing values
-#' data_tabulate(efc$c172code, remove_na = FALSE)
+#' data_tabulate(efc$c172code, remove_na = TRUE)
 #'
 #' # data frame
 #' data_tabulate(efc, c("e42dep", "c172code"))
@@ -109,7 +109,7 @@
 #'   efc$c172code,
 #'   by = efc$e16sex,
 #'   proportions = "column",
-#'   remove_na = FALSE
+#'   remove_na = TRUE
 #' )
 #'
 #' # round percentages
@@ -173,29 +173,29 @@ data_tabulate.default <- function(x,
   # frequency table
   if (is.null(weights)) {
     if (remove_na) {
-      freq_table <- tryCatch(table(addNA(x)), error = function(e) NULL)
-    } else {
       freq_table <- tryCatch(table(x), error = function(e) NULL)
+    } else {
+      freq_table <- tryCatch(table(addNA(x)), error = function(e) NULL)
     }
   } else if (remove_na) {
-    # weighted frequency table, including NA
+    # weighted frequency table, excluding NA
     freq_table <- tryCatch(
       stats::xtabs(
         weights ~ x,
-        data = data.frame(weights = weights, x = addNA(x)),
-        na.action = stats::na.pass,
-        addNA = TRUE
+        data = data.frame(weights = weights, x = x),
+        na.action = stats::na.omit,
+        addNA = FALSE
       ),
       error = function(e) NULL
     )
   } else {
-    # weighted frequency table, excluding NA
+    # weighted frequency table, including NA
     freq_table <- tryCatch(
       stats::xtabs(
         weights ~ x,
-        data = data.frame(weights = weights, x = x),
-        na.action = stats::na.omit,
-        addNA = FALSE
+        data = data.frame(weights = weights, x = addNA(x)),
+        na.action = stats::na.pass,
+        addNA = TRUE
       ),
       error = function(e) NULL
     )
@@ -219,11 +219,11 @@ data_tabulate.default <- function(x,
   out$`Raw %` <- 100 * out$N / sum(out$N)
   # if we have missing values, we add a row with NA
   if (remove_na) {
-    out$`Valid %` <- c(100 * out$N[-nrow(out)] / sum(out$N[-nrow(out)]), NA)
-    valid_n <- sum(out$N[-length(out$N)], na.rm = TRUE)
-  } else {
     out$`Valid %` <- 100 * out$N / sum(out$N)
     valid_n <- sum(out$N, na.rm = TRUE)
+  } else {
+    out$`Valid %` <- c(100 * out$N[-nrow(out)] / sum(out$N[-nrow(out)]), NA)
+    valid_n <- sum(out$N[-length(out$N)], na.rm = TRUE)
   }
   out$`Cumulative %` <- cumsum(out$`Valid %`)
 
diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R
index 81ba26c59..7943cbab9 100644
--- a/R/data_xtabulate.R
+++ b/R/data_xtabulate.R
@@ -13,29 +13,29 @@
   # frequency table
   if (is.null(weights)) {
     if (remove_na) {
-      x_table <- tryCatch(table(addNA(x), addNA(by)), error = function(e) NULL)
-    } else {
       x_table <- tryCatch(table(x, by), error = function(e) NULL)
+    } else {
+      x_table <- tryCatch(table(addNA(x), addNA(by)), error = function(e) NULL)
     }
   } else if (remove_na) {
-    # weighted frequency table, including NA
+    # weighted frequency table, excluding NA
     x_table <- tryCatch(
       stats::xtabs(
         weights ~ x + by,
-        data = data.frame(weights = weights, x = addNA(x), by = addNA(by)),
-        na.action = stats::na.pass,
-        addNA = TRUE
+        data = data.frame(weights = weights, x = x, by = by),
+        na.action = stats::na.omit,
+        addNA = FALSE
       ),
       error = function(e) NULL
     )
   } else {
-    # weighted frequency table, excluding NA
+    # weighted frequency table, including NA
     x_table <- tryCatch(
       stats::xtabs(
         weights ~ x + by,
-        data = data.frame(weights = weights, x = x, by = by),
-        na.action = stats::na.omit,
-        addNA = FALSE
+        data = data.frame(weights = weights, x = addNA(x), by = addNA(by)),
+        na.action = stats::na.pass,
+        addNA = TRUE
       ),
       error = function(e) NULL
     )
diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd
index 58a0c2954..6f572a05a 100644
--- a/man/data_tabulate.Rd
+++ b/man/data_tabulate.Rd
@@ -189,7 +189,7 @@ data(efc)
 data_tabulate(efc$c172code)
 
 # drop missing values
-data_tabulate(efc$c172code, remove_na = FALSE)
+data_tabulate(efc$c172code, remove_na = TRUE)
 
 # data frame
 data_tabulate(efc, c("e42dep", "c172code"))
@@ -236,7 +236,7 @@ data_tabulate(
   efc$c172code,
   by = efc$e16sex,
   proportions = "column",
-  remove_na = FALSE
+  remove_na = TRUE
 )
 
 # round percentages
diff --git a/tests/testthat/_snaps/data_summary.new.md b/tests/testthat/_snaps/data_summary.new.md
new file mode 100644
index 000000000..070c31ce8
--- /dev/null
+++ b/tests/testthat/_snaps/data_summary.new.md
@@ -0,0 +1,50 @@
+# data_summary, print
+
+    Code
+      print(out)
+    Output
+      am | gear |    MW |   SD
+      ------------------------
+       0 |    3 | 16.11 | 3.37
+       0 |    4 | 21.05 | 3.07
+       1 |    4 | 26.27 | 5.41
+       1 |    5 | 21.38 | 6.66
+
+# data_summary, with NA
+
+    Code
+      print(out)
+    Output
+      c172code |    MW
+      ----------------
+             1 | 87.12
+             2 | 94.05
+             3 | 75.00
+
+---
+
+    Code
+      print(out)
+    Output
+      c172code |    MW
+      ----------------
+             1 | 87.12
+             2 | 94.05
+             3 | 75.00
+
+---
+
+    Code
+      print(out)
+    Output
+      e42dep | c172code |     MW
+      --------------------------
+      1      |        2 |  17.00
+      2      |        2 |  34.25
+      3      |        1 |  39.50
+      3      |        2 |  52.44
+      3      |        3 |  52.00
+      4      |        1 | 134.75
+      4      |        2 | 119.26
+      4      |        3 |  88.80
+

From 388d9a037a65a495dcb55701a5a0ea7c9093dbd2 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sat, 22 Jun 2024 16:00:26 +0200
Subject: [PATCH 3/6] fix tests (FALSE -> TRUE)

---
 tests/testthat/_snaps/data_summary.new.md | 50 -----------------------
 tests/testthat/_snaps/data_tabulate.md    | 16 ++++----
 tests/testthat/test-data_summary.R        |  2 +-
 tests/testthat/test-data_tabulate.R       | 30 +++++++-------
 4 files changed, 24 insertions(+), 74 deletions(-)
 delete mode 100644 tests/testthat/_snaps/data_summary.new.md

diff --git a/tests/testthat/_snaps/data_summary.new.md b/tests/testthat/_snaps/data_summary.new.md
deleted file mode 100644
index 070c31ce8..000000000
--- a/tests/testthat/_snaps/data_summary.new.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# data_summary, print
-
-    Code
-      print(out)
-    Output
-      am | gear |    MW |   SD
-      ------------------------
-       0 |    3 | 16.11 | 3.37
-       0 |    4 | 21.05 | 3.07
-       1 |    4 | 26.27 | 5.41
-       1 |    5 | 21.38 | 6.66
-
-# data_summary, with NA
-
-    Code
-      print(out)
-    Output
-      c172code |    MW
-      ----------------
-             1 | 87.12
-             2 | 94.05
-             3 | 75.00
-
----
-
-    Code
-      print(out)
-    Output
-      c172code |    MW
-      ----------------
-             1 | 87.12
-             2 | 94.05
-             3 | 75.00
-
----
-
-    Code
-      print(out)
-    Output
-      e42dep | c172code |     MW
-      --------------------------
-      1      |        2 |  17.00
-      2      |        2 |  34.25
-      3      |        1 |  39.50
-      3      |        2 |  52.44
-      3      |        3 |  52.00
-      4      |        1 | 134.75
-      4      |        2 | 119.26
-      4      |        3 |  88.80
-
diff --git a/tests/testthat/_snaps/data_tabulate.md b/tests/testthat/_snaps/data_tabulate.md
index 59a20dc01..ffde63088 100644
--- a/tests/testthat/_snaps/data_tabulate.md
+++ b/tests/testthat/_snaps/data_tabulate.md
@@ -259,7 +259,7 @@
 
     Code
       print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full",
-      include_na = FALSE))
+      remove_na = TRUE))
     Output
       efc$c172code |       male |     female | Total
       -------------+------------+------------+------
@@ -288,7 +288,7 @@
 
     Code
       print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full",
-      include_na = FALSE, weights = efc$weights))
+      remove_na = TRUE, weights = efc$weights))
     Output
       efc$c172code |       male |     female | Total
       -------------+------------+------------+------
@@ -317,7 +317,7 @@
 
     Code
       print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row",
-      include_na = FALSE))
+      remove_na = TRUE))
     Output
       c172code |       male |     female | Total
       ---------+------------+------------+------
@@ -348,7 +348,7 @@
 
     Code
       print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row",
-      include_na = FALSE, weights = efc$weights))
+      remove_na = TRUE, weights = efc$weights))
     Output
       c172code |       male |     female | Total
       ---------+------------+------------+------
@@ -378,7 +378,7 @@
 
     Code
       print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column",
-        include_na = FALSE))
+        remove_na = TRUE))
     Output
       c172code |       male |     female | Total
       ---------+------------+------------+------
@@ -409,7 +409,7 @@
 
     Code
       print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column",
-        include_na = FALSE, weights = "weights"))
+        remove_na = TRUE, weights = "weights"))
     Output
       c172code |       male |     female | Total
       ---------+------------+------------+------
@@ -497,7 +497,7 @@
 
     Code
       print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full",
-      include_na = FALSE))
+      remove_na = TRUE))
     Output
       [1] "|efc$c172code |       male|     female| Total|"
       [2] "|:------------|----------:|----------:|-----:|"
@@ -534,7 +534,7 @@
 
     Code
       print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full",
-      include_na = FALSE, weights = efc$weights))
+      remove_na = TRUE, weights = efc$weights))
     Output
       [1] "|efc$c172code |       male|     female| Total|"
       [2] "|:------------|----------:|----------:|-----:|"
diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R
index c8fac4a88..c60b142d2 100644
--- a/tests/testthat/test-data_summary.R
+++ b/tests/testthat/test-data_summary.R
@@ -175,7 +175,7 @@ test_that("data_summary, with NA", {
   data(efc, package = "datawizard")
   out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code")
   expect_snapshot(print(out))
-  out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code", remove_na = FALSE)
+  out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code", remove_na = TRUE)
   expect_snapshot(print(out))
   # sorting for multiple groups
   out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = c("e42dep", "c172code"))
diff --git a/tests/testthat/test-data_tabulate.R b/tests/testthat/test-data_tabulate.R
index 6c443a860..9848d42b9 100644
--- a/tests/testthat/test-data_tabulate.R
+++ b/tests/testthat/test-data_tabulate.R
@@ -287,11 +287,11 @@ test_that("data_tabulate exclude/include missing values", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
   out <- data_tabulate(efc$c172code)
   expect_identical(out$N, c(8L, 66L, 16L, 10L))
-  out <- data_tabulate(efc$c172code, remove_na = FALSE)
+  out <- data_tabulate(efc$c172code, remove_na = TRUE)
   expect_identical(out$N, c(8L, 66L, 16L))
   out <- data_tabulate(efc$c172code, weights = efc$weights)
   expect_identical(out$N, c(10, 67, 15, 13))
-  out <- data_tabulate(efc$c172code, remove_na = FALSE, weights = efc$weights)
+  out <- data_tabulate(efc$c172code, remove_na = TRUE, weights = efc$weights)
   expect_identical(out$N, c(10, 67, 15))
 })
 
@@ -305,17 +305,17 @@ test_that("data_tabulate, cross tables", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
 
   expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full")))
-  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = FALSE)))
+  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE)))
   expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", weights = efc$weights)))
-  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = FALSE, weights = efc$weights))) # nolint
+  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE, weights = efc$weights))) # nolint
   expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row")))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = FALSE)))
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = TRUE)))
   expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", weights = efc$weights)))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = FALSE, weights = efc$weights))) # nolint
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = TRUE, weights = efc$weights))) # nolint
   expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column")))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", remove_na = FALSE)))
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", remove_na = TRUE)))
   expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", weights = "weights")))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", remove_na = FALSE, weights = "weights"))) # nolint
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", remove_na = TRUE, weights = "weights"))) # nolint
 })
 
 test_that("data_tabulate, cross tables, HTML", {
@@ -326,11 +326,11 @@ test_that("data_tabulate, cross tables, HTML", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
 
   expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full")), "gt_tbl")
-  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = FALSE)), "gt_tbl") # nolint
+  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE)), "gt_tbl") # nolint
   expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", weights = efc$weights)), "gt_tbl") # nolint
-  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = FALSE, weights = efc$weights)), "gt_tbl") # nolint
+  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE, weights = efc$weights)), "gt_tbl") # nolint
   expect_s3_class(print_html(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row")), "gt_tbl")
-  expect_s3_class(print_html(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = FALSE, weights = efc$weights)), "gt_tbl") # nolint
+  expect_s3_class(print_html(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = TRUE, weights = efc$weights)), "gt_tbl") # nolint
 })
 
 test_that("data_tabulate, cross tables, grouped df", {
@@ -377,9 +377,9 @@ test_that("data_tabulate, cross tables, markdown", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
 
   expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full")))
-  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = FALSE)))
+  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE)))
   expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", weights = efc$weights)))
-  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = FALSE, weights = efc$weights))) # nolint
+  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE, weights = efc$weights))) # nolint
 })
 
 
@@ -389,12 +389,12 @@ test_that("data_tabulate, validate against table", {
   data(mtcars)
   # frequency table
   out1 <- as.data.frame(table(mtcars$cyl))
-  out2 <- data_tabulate(mtcars$cyl, remove_na = FALSE)
+  out2 <- data_tabulate(mtcars$cyl, remove_na = TRUE)
   expect_identical(out1$Freq, out2$N)
   # crosstable
   out1 <- data_arrange(as.data.frame(table(mtcars$cyl, mtcars$gear)), c("Var1", "Var2"))
   out2 <- data_rename(data_to_long(
-    as.data.frame(data_tabulate(mtcars$cyl, by = mtcars$gear, remove_na = FALSE)), 2:4,
+    as.data.frame(data_tabulate(mtcars$cyl, by = mtcars$gear, remove_na = TRUE)), 2:4,
     names_to = "Var2", values_to = "Freq"
   ), "mtcars$cyl", "Var1")
   out1[[2]] <- as.character(out1[[2]])

From acf8cc45e1bdd375ab9ababcc9da5fcba478568b Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sun, 23 Jun 2024 10:02:20 +0200
Subject: [PATCH 4/6] docs, news

---
 DESCRIPTION          | 2 +-
 NEWS.md              | 5 +++++
 R/data_summary.R     | 6 +++---
 R/data_tabulate.R    | 2 +-
 man/data_summary.Rd  | 6 +++---
 man/data_tabulate.Rd | 2 +-
 6 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 0a7fb9594..c56e3f9dd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: datawizard
 Title: Easy Data Wrangling and Statistical Transformations
-Version: 0.11.0.3
+Version: 0.11.0.4
 Authors@R: c(
     person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut",
            comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
diff --git a/NEWS.md b/NEWS.md
index ada1f08ea..8e5001f77 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,6 +2,11 @@
 
 BREAKING CHANGES
 
+* The argument `include_na` in `data_tabulate()` and `data_summary()` has been
+  renamed into `remove_na`. Consequently, to mimic former behaviour, `FALSE` and
+  `TRUE` need to be switched (i.e. `remove_na = TRUE` is equivalent to the former
+  `include_na 0 FALSE`).
+
 * Class names for objects returned by `data_tabulate()` have been changed to
   `datawizard_table` and `datawizard_crosstable` (resp. the plural forms,
   `*_tables`), to provide a clearer and more consistent naming scheme.
diff --git a/R/data_summary.R b/R/data_summary.R
index f77ebe013..7662d0c94 100644
--- a/R/data_summary.R
+++ b/R/data_summary.R
@@ -8,9 +8,9 @@
 #' @param by Optional character string, indicating the name of a variable in `x`.
 #' If supplied, the data will be split by this variable and summary statistics
 #' will be computed for each group.
-#' @param remove_na Logical. If `TRUE`, missing values are included as a level
-#' in the grouping variable. If `FALSE`, missing values are omitted from the
-#' grouping variable.
+#' @param remove_na Logical. If `TRUE`, missing values are omitted from the
+#' grouping variable. If `FALSE` (default), missing values are included as a
+#' level in the grouping variable.
 #' @param ... One or more named expressions that define the new variable name
 #' and the function to compute the summary statistic. Example:
 #' `mean_sepal_width = mean(Sepal.Width)`. The expression can also be provided
diff --git a/R/data_tabulate.R b/R/data_tabulate.R
index 4c0aa10b3..952f921d9 100644
--- a/R/data_tabulate.R
+++ b/R/data_tabulate.R
@@ -15,7 +15,7 @@
 #' factor levels are dropped from the frequency table.
 #' @param name Optional character string, which includes the name that is used
 #' for printing.
-#' @param remove_na Logical, if `TRUE`, missing values are included in the
+#' @param remove_na Logical, if `FALSE`, missing values are included in the
 #' frequency or crosstable, else missing values are omitted.
 #' @param collapse Logical, if `TRUE` collapses multiple tables into one larger
 #' table for printing. This affects only printing, not the returned object.
diff --git a/man/data_summary.Rd b/man/data_summary.Rd
index 70d2e2576..24cfa1a9f 100644
--- a/man/data_summary.Rd
+++ b/man/data_summary.Rd
@@ -22,9 +22,9 @@ summary function \code{n()} can be used to count the number of observations.}
 If supplied, the data will be split by this variable and summary statistics
 will be computed for each group.}
 
-\item{remove_na}{Logical. If \code{TRUE}, missing values are included as a level
-in the grouping variable. If \code{FALSE}, missing values are omitted from the
-grouping variable.}
+\item{remove_na}{Logical. If \code{TRUE}, missing values are omitted from the
+grouping variable. If \code{FALSE} (default), missing values are included as a
+level in the grouping variable.}
 }
 \value{
 A data frame with the requested summary statistics.
diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd
index 6f572a05a..2feadf3a9 100644
--- a/man/data_tabulate.Rd
+++ b/man/data_tabulate.Rd
@@ -62,7 +62,7 @@ factor levels are dropped from the frequency table.}
 \item{weights}{Optional numeric vector of weights. Must be of the same length
 as \code{x}. If \code{weights} is supplied, weighted frequencies are calculated.}
 
-\item{remove_na}{Logical, if \code{TRUE}, missing values are included in the
+\item{remove_na}{Logical, if \code{FALSE}, missing values are included in the
 frequency or crosstable, else missing values are omitted.}
 
 \item{proportions}{Optional character string, indicating the type of

From 51c8b5f588e3697f71d49e25f30aab61fd4c1597 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sun, 23 Jun 2024 11:17:25 +0200
Subject: [PATCH 5/6] Update NEWS.md

Co-authored-by: Indrajeet Patil <patilindrajeet.science@gmail.com>
---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 8e5001f77..0954f1214 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,7 +5,7 @@ BREAKING CHANGES
 * The argument `include_na` in `data_tabulate()` and `data_summary()` has been
   renamed into `remove_na`. Consequently, to mimic former behaviour, `FALSE` and
   `TRUE` need to be switched (i.e. `remove_na = TRUE` is equivalent to the former
-  `include_na 0 FALSE`).
+  `include_na = FALSE`).
 
 * Class names for objects returned by `data_tabulate()` have been changed to
   `datawizard_table` and `datawizard_crosstable` (resp. the plural forms,

From 17f8736c0c3354c20cf98adf60d9622e9d1b229b Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sun, 23 Jun 2024 12:42:50 +0200
Subject: [PATCH 6/6] add  comment

---
 R/data_tabulate.R  | 4 ++++
 R/data_xtabulate.R | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/R/data_tabulate.R b/R/data_tabulate.R
index 952f921d9..e94fc5d55 100644
--- a/R/data_tabulate.R
+++ b/R/data_tabulate.R
@@ -173,6 +173,10 @@ data_tabulate.default <- function(x,
   # frequency table
   if (is.null(weights)) {
     if (remove_na) {
+      # we have a `.default` and a `.data.frame` method for `data_tabulate()`.
+      # since this is the default, `x` can be an object which cannot be used
+      # with `table()`, that's why we add `tryCatch()` here. Below we give an
+      # informative error message for non-supported objects.
       freq_table <- tryCatch(table(x), error = function(e) NULL)
     } else {
       freq_table <- tryCatch(table(addNA(x)), error = function(e) NULL)
diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R
index 7943cbab9..08be1eeca 100644
--- a/R/data_xtabulate.R
+++ b/R/data_xtabulate.R
@@ -12,6 +12,10 @@
   }
   # frequency table
   if (is.null(weights)) {
+    # we have a `.default` and a `.data.frame` method for `data_tabulate()`.
+    # since this is the default, `x` can be an object which cannot be used
+    # with `table()`, that's why we add `tryCatch()` here. Below we give an
+    # informative error message for non-supported objects.
     if (remove_na) {
       x_table <- tryCatch(table(x, by), error = function(e) NULL)
     } else {