From c5fdc834f68d2b3ee8668712df05b1f191a77195 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 13 Feb 2024 14:05:35 +0100 Subject: [PATCH] fixes update tests --- R/data_tabulate.R | 9 +- R/data_xtabulate.R | 43 ++++- tests/testthat/_snaps/data_tabulate.md | 234 +++++++++++++++++++++++++ tests/testthat/test-data_tabulate.R | 71 ++++++++ 4 files changed, 349 insertions(+), 8 deletions(-) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 2f083e698..82e479489 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -91,10 +91,8 @@ data_tabulate.default <- function(x, x <- droplevels(x) } - # check for correct length of weights - must be equal to "x" - if (!is.null(weights) && length(weights) != length(x)) { - insight::format_error("Length of `weights` must be equal to length of `x`.") - } + # validate "weights" + weights <- .validate_tableweights(weights, x) # we go into another function for crosstables here... if (!is.null(by)) { @@ -225,8 +223,11 @@ data_tabulate.data.frame <- function(x, regex = regex, verbose = verbose ) + # validate "by" by <- .validate_by(by, x) + # validate "weights" + weights <- .validate_tableweights(weights, x) out <- lapply(select, function(i) { data_tabulate( diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 5dd7b239e..7599672b2 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -202,13 +202,13 @@ print.dw_data_xtabulates <- function(x, big_mark = NULL, ...) { # If "by" is a character string, must be of length 1 if (length(by) > 1) { insight::format_error( - "If argument `by` is a character indicating a variable name, `by` must be of length 1.", + "If `by` is a string indicating a variable name, `by` must be of length 1.", "You may use `data_group()` to group by multiple variables, then call `data_tabulate()`." ) } # if "by" is a character, "x" must be a data frame if (!is.data.frame(x)) { - insight::format_error("If argument `by` is a character indicating a variable name, `x` must be a data frame.") + insight::format_error("If `by` is a string indicating a variable name, `x` must be a data frame.") } # is "by" a column in "x"? if (!by %in% colnames(x)) { @@ -221,10 +221,10 @@ print.dw_data_xtabulates <- function(x, big_mark = NULL, ...) { } # is "by" of same length as "x"? if (is.data.frame(x) && length(by) != nrow(x)) { - insight::format_error("The variable specified in `by` must have the same length as rows in `x`.") # nolint + insight::format_error("Length of `by` must be equal to number of rows in `x`.") # nolint } if (!is.data.frame(x) && length(by) != length(x)) { - insight::format_error("The variable specified in `by` must have the same length as `x`.") # nolint + insight::format_error("Length of `by` must be equal to length of `x`.") # nolint } if (!is.factor(by)) { # coerce "by" to factor, including labels @@ -234,3 +234,38 @@ print.dw_data_xtabulates <- function(x, big_mark = NULL, ...) { by } + + +.validate_tableweights <- function(weights, x) { + if (!is.null(weights)) { + if (is.character(weights)) { + # If "weights" is a character string, must be of length 1 + if (length(weights) > 1) { + insight::format_error( + "If `weights` is a string indicating a variable name, `weights` must be of length 1." + ) + } + # if "weights" is a character, "x" must be a data frame + if (!is.data.frame(x)) { + insight::format_error("If `weights` is a string indicating a variable name, `x` must be a data frame.") # nolint + } + # is "by" a column in "x"? + if (!weights %in% colnames(x)) { + insight::format_error(sprintf( + "The variable specified in `weights` was not found in `x`. %s", + .misspelled_string(names(x), weights, "Possibly misspelled?") + )) + } + weights <- x[[weights]] + } + # is "by" of same length as "x"? + if (is.data.frame(x) && length(weights) != nrow(x)) { + insight::format_error("Length of `weights` must be equal to number of rows in `x`.") # nolint + } + if (!is.data.frame(x) && length(weights) != length(x)) { + insight::format_error("Length of `weights` must be equal to length of `x`.") # nolint + } + } + + weights +} diff --git a/tests/testthat/_snaps/data_tabulate.md b/tests/testthat/_snaps/data_tabulate.md index 6fcf1d93b..2564735ce 100644 --- a/tests/testthat/_snaps/data_tabulate.md +++ b/tests/testthat/_snaps/data_tabulate.md @@ -241,3 +241,237 @@ | | | 2 | 3.70 | | ------------------------------------------------------------------- +# data_tabulate, cross tables + + Code + print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell")) + Output + efc$c172code | male | female | NA | Total + -------------+------------+------------+----------+------ + 1 | 5 (5.0%) | 2 (2.0%) | 1 (1.0%) | 8 + 2 | 31 (31.0%) | 33 (33.0%) | 2 (2.0%) | 66 + 3 | 4 (4.0%) | 11 (11.0%) | 1 (1.0%) | 16 + | 5 (5.0%) | 4 (4.0%) | 1 (1.0%) | 10 + -------------+------------+------------+----------+------ + Total | 45 | 50 | 5 | 100 + +--- + + Code + print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell", + include_na = FALSE)) + Output + efc$c172code | male | female | Total + -------------+------------+------------+------ + 1 | 5 (5.8%) | 2 (2.3%) | 7 + 2 | 31 (36.0%) | 33 (38.4%) | 64 + 3 | 4 (4.7%) | 11 (12.8%) | 15 + -------------+------------+------------+------ + Total | 40 | 46 | 86 + +--- + + Code + print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell", + weights = efc$weights)) + Output + efc$c172code | male | female | NA | Total + -------------+------------+------------+----------+------ + 1 | 5 (4.8%) | 3 (2.9%) | 2 (1.9%) | 10 + 2 | 32 (30.5%) | 32 (30.5%) | 3 (2.9%) | 67 + 3 | 3 (2.9%) | 11 (10.5%) | 1 (1.0%) | 15 + | 8 (7.6%) | 5 (4.8%) | 1 (1.0%) | 14 + -------------+------------+------------+----------+------ + Total | 48 | 51 | 7 | 105 + +--- + + Code + print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell", + include_na = FALSE, weights = efc$weights)) + Output + efc$c172code | male | female | Total + -------------+------------+------------+------ + 1 | 5 (5.8%) | 3 (3.5%) | 8 + 2 | 32 (37.2%) | 32 (37.2%) | 64 + 3 | 3 (3.5%) | 11 (12.8%) | 14 + -------------+------------+------------+------ + Total | 40 | 46 | 86 + +--- + + Code + print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row")) + Output + c172code | male | female | NA | Total + ---------+------------+------------+-----------+------ + 1 | 5 (62.5%) | 2 (25.0%) | 1 (12.5%) | 8 + 2 | 31 (47.0%) | 33 (50.0%) | 2 (3.0%) | 66 + 3 | 4 (25.0%) | 11 (68.8%) | 1 (6.2%) | 16 + | 5 (50.0%) | 4 (40.0%) | 1 (10.0%) | 10 + ---------+------------+------------+-----------+------ + Total | 45 | 50 | 5 | 100 + + +--- + + Code + print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", + include_na = FALSE)) + Output + c172code | male | female | Total + ---------+------------+------------+------ + 1 | 5 (71.4%) | 2 (28.6%) | 7 + 2 | 31 (48.4%) | 33 (51.6%) | 64 + 3 | 4 (26.7%) | 11 (73.3%) | 15 + ---------+------------+------------+------ + Total | 40 | 46 | 86 + + +--- + + Code + print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", + weights = efc$weights)) + Output + c172code | male | female | NA | Total + ---------+------------+------------+-----------+------ + 1 | 5 (50.0%) | 3 (30.0%) | 2 (20.0%) | 10 + 2 | 32 (47.8%) | 32 (47.8%) | 3 (4.5%) | 67 + 3 | 3 (20.0%) | 11 (73.3%) | 1 (6.7%) | 15 + | 8 (57.1%) | 5 (35.7%) | 1 (7.1%) | 14 + ---------+------------+------------+-----------+------ + Total | 48 | 51 | 7 | 105 + + +--- + + Code + print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", + include_na = FALSE, weights = efc$weights)) + Output + c172code | male | female | Total + ---------+------------+------------+------ + 1 | 5 (62.5%) | 3 (37.5%) | 8 + 2 | 32 (50.0%) | 32 (50.0%) | 64 + 3 | 3 (21.4%) | 11 (78.6%) | 14 + ---------+------------+------------+------ + Total | 40 | 46 | 86 + + +--- + + Code + print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column")) + Output + c172code | male | female | NA | Total + ---------+------------+------------+-----------+------ + 1 | 5 (11.1%) | 2 (4.0%) | 1 (20.0%) | 8 + 2 | 31 (68.9%) | 33 (66.0%) | 2 (40.0%) | 66 + 3 | 4 (8.9%) | 11 (22.0%) | 1 (20.0%) | 16 + | 5 (11.1%) | 4 (8.0%) | 1 (20.0%) | 10 + ---------+------------+------------+-----------+------ + Total | 45 | 50 | 5 | 100 + + +--- + + Code + print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", + include_na = FALSE)) + Output + c172code | male | female | Total + ---------+------------+------------+------ + 1 | 5 (12.5%) | 2 (4.3%) | 7 + 2 | 31 (77.5%) | 33 (71.7%) | 64 + 3 | 4 (10.0%) | 11 (23.9%) | 15 + ---------+------------+------------+------ + Total | 40 | 46 | 86 + + +--- + + Code + print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", + weights = "weights")) + Output + c172code | male | female | NA | Total + ---------+------------+------------+-----------+------ + 1 | 5 (10.4%) | 3 (5.9%) | 2 (28.6%) | 10 + 2 | 32 (66.7%) | 32 (62.7%) | 3 (42.9%) | 67 + 3 | 3 (6.2%) | 11 (21.6%) | 1 (14.3%) | 15 + | 8 (16.7%) | 5 (9.8%) | 1 (14.3%) | 14 + ---------+------------+------------+-----------+------ + Total | 48 | 51 | 7 | 105 + + +--- + + Code + print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", + include_na = FALSE, weights = "weights")) + Output + c172code | male | female | Total + ---------+------------+------------+------ + 1 | 5 (12.5%) | 3 (6.5%) | 8 + 2 | 32 (80.0%) | 32 (69.6%) | 64 + 3 | 3 (7.5%) | 11 (23.9%) | 14 + ---------+------------+------------+------ + Total | 40 | 46 | 86 + + +# data_tabulate, cross tables, grouped df + + Code + print(data_tabulate(grp, "c172code", by = "e16sex", proportions = "row")) + Output + Grouped by e42dep (1) + + c172code | male | NA | Total + ---------+------------+----------+------ + 2 | 2 (100.0%) | 0 (0.0%) | 2 + | 0 (NaN%) | 0 (NaN%) | 0 + ---------+------------+----------+------ + Total | 2 | 0 | 2 + + Grouped by e42dep (2) + + c172code | male | female | NA | Total + ---------+-----------+-----------+----------+------ + 2 | 2 (50.0%) | 2 (50.0%) | 0 (0.0%) | 4 + | 0 (NaN%) | 0 (NaN%) | 0 (NaN%) | 0 + ---------+-----------+-----------+----------+------ + Total | 2 | 2 | 0 | 4 + + Grouped by e42dep (3) + + c172code | male | female | NA | Total + ---------+-----------+------------+-----------+------ + 1 | 2 (50.0%) | 2 (50.0%) | 0 (0.0%) | 4 + 2 | 4 (25.0%) | 11 (68.8%) | 1 (6.2%) | 16 + 3 | 1 (16.7%) | 5 (83.3%) | 0 (0.0%) | 6 + | 1 (50.0%) | 0 (0.0%) | 1 (50.0%) | 2 + ---------+-----------+------------+-----------+------ + Total | 8 | 18 | 2 | 28 + + Grouped by e42dep (4) + + c172code | male | female | NA | Total + ---------+------------+------------+-----------+------ + 1 | 3 (75.0%) | 0 (0.0%) | 1 (25.0%) | 4 + 2 | 23 (54.8%) | 18 (42.9%) | 1 (2.4%) | 42 + 3 | 3 (30.0%) | 6 (60.0%) | 1 (10.0%) | 10 + | 3 (42.9%) | 4 (57.1%) | 0 (0.0%) | 7 + ---------+------------+------------+-----------+------ + Total | 32 | 28 | 3 | 63 + + Grouped by e42dep (NA) + + c172code | male | female | NA | Total + ---------+------------+------------+----------+------ + 2 | 0 (0.0%) | 2 (100.0%) | 0 (0.0%) | 2 + | 1 (100.0%) | 0 (0.0%) | 0 (0.0%) | 1 + ---------+------------+------------+----------+------ + Total | 1 | 2 | 0 | 3 + + diff --git a/tests/testthat/test-data_tabulate.R b/tests/testthat/test-data_tabulate.R index cd6e6df3f..7ed76e7c9 100644 --- a/tests/testthat/test-data_tabulate.R +++ b/tests/testthat/test-data_tabulate.R @@ -252,3 +252,74 @@ test_that("data_tabulate regex", { data_tabulate(mtcars, select = "carb") ) }) + + +test_that("data_tabulate exclude/include missing values", { + data(efc, package = "datawizard") + set.seed(123) + efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5)) + efc$e16sex[sample.int(nrow(efc), 5)] <- NA + out <- data_tabulate(efc$c172code) + expect_identical(out$N, c(8L, 66L, 16L, 10L)) + out <- data_tabulate(efc$c172code, include_na = FALSE) + expect_identical(out$N, c(8L, 66L, 16L)) + out <- data_tabulate(efc$c172code, weights = efc$weights) + expect_identical(out$N, c(10, 67, 15, 13)) + out <- data_tabulate(efc$c172code, include_na = FALSE, weights = efc$weights) + expect_identical(out$N, c(10, 67, 15)) +}) + + +# cross tables ------------------------------ +test_that("data_tabulate, cross tables", { + data(efc, package = "datawizard") + set.seed(123) + efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5)) + efc$e16sex[sample.int(nrow(efc), 5)] <- NA + + expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell"))) + expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell", include_na = FALSE))) + expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell", weights = efc$weights))) + expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell", include_na = FALSE, weights = efc$weights))) # nolint + expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row"))) + expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE))) + expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", weights = efc$weights))) + expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE, weights = efc$weights))) # nolint + expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column"))) + expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", include_na = FALSE))) + expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", weights = "weights"))) + expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", include_na = FALSE, weights = "weights"))) # nolint +}) + +test_that("data_tabulate, cross tables, grouped df", { + data(efc, package = "datawizard") + set.seed(123) + efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5)) + efc$e16sex[sample.int(nrow(efc), 5)] <- NA + grp <- data_group(efc, "e42dep") + expect_snapshot(print(data_tabulate(grp, "c172code", by = "e16sex", proportions = "row"))) +}) + +test_that("data_tabulate, cross tables, errors", { + data(efc, package = "datawizard") + set.seed(123) + efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5)) + efc$e16sex[sample.int(nrow(efc), 5)] <- NA + expect_error(data_tabulate(efc$c172code, by = "e16sex"), regex = "If `by` is a string") + expect_error(data_tabulate(efc$c172code, by = efc$e16sex[-1]), regex = "Length of `by`") + expect_error(data_tabulate(efc, "c172code", by = efc$e16sex[-1]), regex = "Length of `by`") + expect_error(data_tabulate(efc, "c172code", by = "c16sex"), regex = "not found") + expect_error(data_tabulate(efc, "c172code", by = c("e16sex", "e42dep")), regex = "You may use") +}) + +test_that("data_tabulate, cross tables, errors", { + data(efc, package = "datawizard") + set.seed(123) + efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5)) + efc$e16sex[sample.int(nrow(efc), 5)] <- NA + expect_error(data_tabulate(efc$c172code, weights = "weights"), regex = "If `weights`") + expect_error(data_tabulate(efc$c172code, weights = efc$weights[-1]), regex = "Length of `weights`") + expect_error(data_tabulate(efc, "c172code", weights = efc$weights[-1]), regex = "Length of `weights`") + expect_error(data_tabulate(efc, "c172code", weights = "weigths"), regex = "not found") + expect_error(data_tabulate(efc, "c172code", weights = c("e16sex", "e42dep")), regex = "length 1") +})