Skip to content

Commit

Permalink
include NA, sort output
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed Mar 3, 2024
1 parent b3c7628 commit ce578d5
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 9 deletions.
29 changes: 21 additions & 8 deletions R/data_summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#' @param by Optional character string, indicating the name of a variable in `x`.
#' If supplied, the data will be split by this variable and summary statistics
#' will be computed for each group.
#' @param include_na Logical, if `TRUE`, missing values are included as a level
#' in the grouping variable. If `FALSE`, missing values are omitted from the
#' grouping variable.
#' @param ... One or more named expressions that define the new variable name
#' and the function to compute the summary statistic. Example:
#' `mean_sepal_width = mean(Sepal.Width)`. The expression can also be provided
Expand Down Expand Up @@ -42,8 +45,8 @@ data_summary <- function(x, ...) {


#' @export
data_summary.matrix <- function(x, ..., by = NULL) {
data_summary(as.data.frame(x), ..., by = by)
data_summary.matrix <- function(x, ..., by = NULL, include_na = TRUE) {
data_summary(as.data.frame(x), ..., by = by, include_na = include_na)

Check warning on line 49 in R/data_summary.R

View check run for this annotation

Codecov / codecov/patch

R/data_summary.R#L49

Added line #L49 was not covered by tests
}


Expand All @@ -55,7 +58,7 @@ data_summary.default <- function(x, ...) {

#' @rdname data_summary
#' @export
data_summary.data.frame <- function(x, ..., by = NULL) {
data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) {
dots <- eval(substitute(alist(...)))

# do we have any expression at all?
Expand All @@ -81,8 +84,15 @@ data_summary.data.frame <- function(x, ..., by = NULL) {
.misspelled_string(colnames(x), by_not_found, "Possibly misspelled?")
)
}
# split data
split_data <- split(x, x[by])
# split data, add NA levels, if requested
l <- lapply(x[by], function(i) {
if (include_na) {
addNA(i)
} else {
i
}
})
split_data <- split(x, l, drop = TRUE)
out <- lapply(split_data, function(s) {
# no data for combination? Return NULL
if (nrow(s) == 0) {
Expand All @@ -100,14 +110,17 @@ data_summary.data.frame <- function(x, ..., by = NULL) {
})
out <- do.call(rbind, out)
}
# sort data
out <- data_arrange(out, select = by)
# data attributes
class(out) <- c("dw_data_summary", "data.frame")
rownames(out) <- NULL
out
}


#' @export
data_summary.grouped_df <- function(x, ..., by = NULL) {
data_summary.grouped_df <- function(x, ..., by = NULL, include_na = TRUE) {
# extract group variables
grps <- attr(x, "groups", exact = TRUE)
group_variables <- data_remove(grps, ".rows")
Expand All @@ -118,7 +131,7 @@ data_summary.grouped_df <- function(x, ..., by = NULL) {
# remove information specific to grouped df's
attr(x, "groups") <- NULL
class(x) <- "data.frame"
data_summary(x, ..., by = by)
data_summary(x, ..., by = by, include_na = include_na)
}


Expand Down Expand Up @@ -172,6 +185,6 @@ print.dw_data_summary <- function(x, ...) {
if (nrow(x) == 0) {
cat("No matches found.\n")

Check warning on line 186 in R/data_summary.R

View check run for this annotation

Codecov / codecov/patch

R/data_summary.R#L186

Added line #L186 was not covered by tests
} else {
cat(insight::export_table(x, ...))
cat(insight::export_table(x, missing = "<NA>", ...))
}
}
6 changes: 5 additions & 1 deletion man/data_summary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions tests/testthat/_snaps/data_summary.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,46 @@
1 | 4 | 26.27 | 5.41
1 | 5 | 21.38 | 6.66

# data_summary, with NA

Code
print(out)
Output
c172code | MW
----------------
1 | 87.12
2 | 94.05
3 | 75.00
<NA> | 47.80

---

Code
print(out)
Output
c172code | MW
----------------
1 | 87.12
2 | 94.05
3 | 75.00

---

Code
print(out)
Output
e42dep | c172code | MW
--------------------------
1 | 2 | 17.00
2 | 2 | 34.25
3 | 1 | 39.50
3 | 2 | 52.44
3 | 3 | 52.00
3 | <NA> | 84.00
4 | 1 | 134.75
4 | 2 | 119.26
4 | 3 | 88.80
4 | <NA> | 43.29
<NA> | 2 | <NA>
<NA> | <NA> | 7.00

12 changes: 12 additions & 0 deletions tests/testthat/test-data_summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,18 @@ test_that("data_summary, print", {
})


test_that("data_summary, with NA", {
data(efc, package = "datawizard")
out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code")
expect_snapshot(print(out))
out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code", include_na = FALSE)
expect_snapshot(print(out))
# sorting for multiple groups
out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = c("e42dep", "c172code"))
expect_snapshot(print(out))
})


test_that("data_summary, inside functions", {
foo1 <- function(x, ...) {
datawizard::data_summary(x, ..., by = "Species")
Expand Down

0 comments on commit ce578d5

Please sign in to comment.