Skip to content

Commit

Permalink
fixes update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed Feb 13, 2024
1 parent 5de775d commit c5fdc83
Show file tree
Hide file tree
Showing 4 changed files with 349 additions and 8 deletions.
9 changes: 5 additions & 4 deletions R/data_tabulate.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,8 @@ data_tabulate.default <- function(x,
x <- droplevels(x)
}

# check for correct length of weights - must be equal to "x"
if (!is.null(weights) && length(weights) != length(x)) {
insight::format_error("Length of `weights` must be equal to length of `x`.")
}
# validate "weights"
weights <- .validate_tableweights(weights, x)

# we go into another function for crosstables here...
if (!is.null(by)) {
Expand Down Expand Up @@ -225,8 +223,11 @@ data_tabulate.data.frame <- function(x,
regex = regex,
verbose = verbose
)

# validate "by"
by <- .validate_by(by, x)
# validate "weights"
weights <- .validate_tableweights(weights, x)

out <- lapply(select, function(i) {
data_tabulate(
Expand Down
43 changes: 39 additions & 4 deletions R/data_xtabulate.R
Original file line number Diff line number Diff line change
Expand Up @@ -202,13 +202,13 @@ print.dw_data_xtabulates <- function(x, big_mark = NULL, ...) {
# If "by" is a character string, must be of length 1
if (length(by) > 1) {
insight::format_error(
"If argument `by` is a character indicating a variable name, `by` must be of length 1.",
"If `by` is a string indicating a variable name, `by` must be of length 1.",
"You may use `data_group()` to group by multiple variables, then call `data_tabulate()`."
)
}
# if "by" is a character, "x" must be a data frame
if (!is.data.frame(x)) {
insight::format_error("If argument `by` is a character indicating a variable name, `x` must be a data frame.")
insight::format_error("If `by` is a string indicating a variable name, `x` must be a data frame.")
}
# is "by" a column in "x"?
if (!by %in% colnames(x)) {
Expand All @@ -221,10 +221,10 @@ print.dw_data_xtabulates <- function(x, big_mark = NULL, ...) {
}
# is "by" of same length as "x"?
if (is.data.frame(x) && length(by) != nrow(x)) {
insight::format_error("The variable specified in `by` must have the same length as rows in `x`.") # nolint
insight::format_error("Length of `by` must be equal to number of rows in `x`.") # nolint
}
if (!is.data.frame(x) && length(by) != length(x)) {
insight::format_error("The variable specified in `by` must have the same length as `x`.") # nolint
insight::format_error("Length of `by` must be equal to length of `x`.") # nolint
}
if (!is.factor(by)) {
# coerce "by" to factor, including labels
Expand All @@ -234,3 +234,38 @@ print.dw_data_xtabulates <- function(x, big_mark = NULL, ...) {

by
}


.validate_tableweights <- function(weights, x) {
if (!is.null(weights)) {
if (is.character(weights)) {
# If "weights" is a character string, must be of length 1
if (length(weights) > 1) {
insight::format_error(
"If `weights` is a string indicating a variable name, `weights` must be of length 1."
)
}
# if "weights" is a character, "x" must be a data frame
if (!is.data.frame(x)) {
insight::format_error("If `weights` is a string indicating a variable name, `x` must be a data frame.") # nolint
}
# is "by" a column in "x"?
if (!weights %in% colnames(x)) {
insight::format_error(sprintf(
"The variable specified in `weights` was not found in `x`. %s",
.misspelled_string(names(x), weights, "Possibly misspelled?")
))
}
weights <- x[[weights]]
}
# is "by" of same length as "x"?
if (is.data.frame(x) && length(weights) != nrow(x)) {
insight::format_error("Length of `weights` must be equal to number of rows in `x`.") # nolint
}
if (!is.data.frame(x) && length(weights) != length(x)) {
insight::format_error("Length of `weights` must be equal to length of `x`.") # nolint
}
}

weights
}
234 changes: 234 additions & 0 deletions tests/testthat/_snaps/data_tabulate.md
Original file line number Diff line number Diff line change
Expand Up @@ -241,3 +241,237 @@
| | <NA> | 2 | 3.70 | <NA> | <NA>
-------------------------------------------------------------------

# data_tabulate, cross tables

Code
print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell"))
Output
efc$c172code | male | female | NA | Total
-------------+------------+------------+----------+------
1 | 5 (5.0%) | 2 (2.0%) | 1 (1.0%) | 8
2 | 31 (31.0%) | 33 (33.0%) | 2 (2.0%) | 66
3 | 4 (4.0%) | 11 (11.0%) | 1 (1.0%) | 16
<NA> | 5 (5.0%) | 4 (4.0%) | 1 (1.0%) | 10
-------------+------------+------------+----------+------
Total | 45 | 50 | 5 | 100

---

Code
print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell",
include_na = FALSE))
Output
efc$c172code | male | female | Total
-------------+------------+------------+------
1 | 5 (5.8%) | 2 (2.3%) | 7
2 | 31 (36.0%) | 33 (38.4%) | 64
3 | 4 (4.7%) | 11 (12.8%) | 15
-------------+------------+------------+------
Total | 40 | 46 | 86

---

Code
print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell",
weights = efc$weights))
Output
efc$c172code | male | female | NA | Total
-------------+------------+------------+----------+------
1 | 5 (4.8%) | 3 (2.9%) | 2 (1.9%) | 10
2 | 32 (30.5%) | 32 (30.5%) | 3 (2.9%) | 67
3 | 3 (2.9%) | 11 (10.5%) | 1 (1.0%) | 15
<NA> | 8 (7.6%) | 5 (4.8%) | 1 (1.0%) | 14
-------------+------------+------------+----------+------
Total | 48 | 51 | 7 | 105

---

Code
print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "cell",
include_na = FALSE, weights = efc$weights))
Output
efc$c172code | male | female | Total
-------------+------------+------------+------
1 | 5 (5.8%) | 3 (3.5%) | 8
2 | 32 (37.2%) | 32 (37.2%) | 64
3 | 3 (3.5%) | 11 (12.8%) | 14
-------------+------------+------------+------
Total | 40 | 46 | 86

---

Code
print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row"))
Output
c172code | male | female | NA | Total
---------+------------+------------+-----------+------
1 | 5 (62.5%) | 2 (25.0%) | 1 (12.5%) | 8
2 | 31 (47.0%) | 33 (50.0%) | 2 (3.0%) | 66
3 | 4 (25.0%) | 11 (68.8%) | 1 (6.2%) | 16
<NA> | 5 (50.0%) | 4 (40.0%) | 1 (10.0%) | 10
---------+------------+------------+-----------+------
Total | 45 | 50 | 5 | 100

---

Code
print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row",
include_na = FALSE))
Output
c172code | male | female | Total
---------+------------+------------+------
1 | 5 (71.4%) | 2 (28.6%) | 7
2 | 31 (48.4%) | 33 (51.6%) | 64
3 | 4 (26.7%) | 11 (73.3%) | 15
---------+------------+------------+------
Total | 40 | 46 | 86

---

Code
print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row",
weights = efc$weights))
Output
c172code | male | female | NA | Total
---------+------------+------------+-----------+------
1 | 5 (50.0%) | 3 (30.0%) | 2 (20.0%) | 10
2 | 32 (47.8%) | 32 (47.8%) | 3 (4.5%) | 67
3 | 3 (20.0%) | 11 (73.3%) | 1 (6.7%) | 15
<NA> | 8 (57.1%) | 5 (35.7%) | 1 (7.1%) | 14
---------+------------+------------+-----------+------
Total | 48 | 51 | 7 | 105

---

Code
print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row",
include_na = FALSE, weights = efc$weights))
Output
c172code | male | female | Total
---------+------------+------------+------
1 | 5 (62.5%) | 3 (37.5%) | 8
2 | 32 (50.0%) | 32 (50.0%) | 64
3 | 3 (21.4%) | 11 (78.6%) | 14
---------+------------+------------+------
Total | 40 | 46 | 86

---

Code
print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column"))
Output
c172code | male | female | NA | Total
---------+------------+------------+-----------+------
1 | 5 (11.1%) | 2 (4.0%) | 1 (20.0%) | 8
2 | 31 (68.9%) | 33 (66.0%) | 2 (40.0%) | 66
3 | 4 (8.9%) | 11 (22.0%) | 1 (20.0%) | 16
<NA> | 5 (11.1%) | 4 (8.0%) | 1 (20.0%) | 10
---------+------------+------------+-----------+------
Total | 45 | 50 | 5 | 100

---

Code
print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column",
include_na = FALSE))
Output
c172code | male | female | Total
---------+------------+------------+------
1 | 5 (12.5%) | 2 (4.3%) | 7
2 | 31 (77.5%) | 33 (71.7%) | 64
3 | 4 (10.0%) | 11 (23.9%) | 15
---------+------------+------------+------
Total | 40 | 46 | 86

---

Code
print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column",
weights = "weights"))
Output
c172code | male | female | NA | Total
---------+------------+------------+-----------+------
1 | 5 (10.4%) | 3 (5.9%) | 2 (28.6%) | 10
2 | 32 (66.7%) | 32 (62.7%) | 3 (42.9%) | 67
3 | 3 (6.2%) | 11 (21.6%) | 1 (14.3%) | 15
<NA> | 8 (16.7%) | 5 (9.8%) | 1 (14.3%) | 14
---------+------------+------------+-----------+------
Total | 48 | 51 | 7 | 105

---

Code
print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column",
include_na = FALSE, weights = "weights"))
Output
c172code | male | female | Total
---------+------------+------------+------
1 | 5 (12.5%) | 3 (6.5%) | 8
2 | 32 (80.0%) | 32 (69.6%) | 64
3 | 3 (7.5%) | 11 (23.9%) | 14
---------+------------+------------+------
Total | 40 | 46 | 86

# data_tabulate, cross tables, grouped df

Code
print(data_tabulate(grp, "c172code", by = "e16sex", proportions = "row"))
Output
Grouped by e42dep (1)
c172code | male | NA | Total
---------+------------+----------+------
2 | 2 (100.0%) | 0 (0.0%) | 2
<NA> | 0 (NaN%) | 0 (NaN%) | 0
---------+------------+----------+------
Total | 2 | 0 | 2
Grouped by e42dep (2)
c172code | male | female | NA | Total
---------+-----------+-----------+----------+------
2 | 2 (50.0%) | 2 (50.0%) | 0 (0.0%) | 4
<NA> | 0 (NaN%) | 0 (NaN%) | 0 (NaN%) | 0
---------+-----------+-----------+----------+------
Total | 2 | 2 | 0 | 4
Grouped by e42dep (3)
c172code | male | female | NA | Total
---------+-----------+------------+-----------+------
1 | 2 (50.0%) | 2 (50.0%) | 0 (0.0%) | 4
2 | 4 (25.0%) | 11 (68.8%) | 1 (6.2%) | 16
3 | 1 (16.7%) | 5 (83.3%) | 0 (0.0%) | 6
<NA> | 1 (50.0%) | 0 (0.0%) | 1 (50.0%) | 2
---------+-----------+------------+-----------+------
Total | 8 | 18 | 2 | 28
Grouped by e42dep (4)
c172code | male | female | NA | Total
---------+------------+------------+-----------+------
1 | 3 (75.0%) | 0 (0.0%) | 1 (25.0%) | 4
2 | 23 (54.8%) | 18 (42.9%) | 1 (2.4%) | 42
3 | 3 (30.0%) | 6 (60.0%) | 1 (10.0%) | 10
<NA> | 3 (42.9%) | 4 (57.1%) | 0 (0.0%) | 7
---------+------------+------------+-----------+------
Total | 32 | 28 | 3 | 63
Grouped by e42dep (NA)
c172code | male | female | NA | Total
---------+------------+------------+----------+------
2 | 0 (0.0%) | 2 (100.0%) | 0 (0.0%) | 2
<NA> | 1 (100.0%) | 0 (0.0%) | 0 (0.0%) | 1
---------+------------+------------+----------+------
Total | 1 | 2 | 0 | 3

Loading

0 comments on commit c5fdc83

Please sign in to comment.