Skip to content

Commit

Permalink
Fix data tab (#577)
Browse files Browse the repository at this point in the history
* Fix `data_tabulate()` for multiple `by` in HTML

* add

* news

* fix

* comments

* fix

* fix

* styler

* styler

* ...

* fix

* fix

* add test

* fix
  • Loading branch information
strengejacke authored Dec 23, 2024
1 parent 3533bba commit a58a031
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 131 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 0.13.0.19
Version: 0.13.0.20
Authors@R: c(
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ S3method(print_html,datawizard_tables)
S3method(print_html,dw_data_peek)
S3method(print_md,data_codebook)
S3method(print_md,datawizard_crosstab)
S3method(print_md,datawizard_crosstabs)
S3method(print_md,datawizard_table)
S3method(print_md,datawizard_tables)
S3method(print_md,dw_data_peek)
Expand Down
9 changes: 7 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,23 @@ BREAKING CHANGES AND DEPRECATIONS

* *datawizard* now requires R >= 4.0 (#515).

* Argument `drop_na` in `data_match()` is deprecated now. Please use
* Argument `drop_na` in `data_match()` is deprecated now. Please use
`remove_na` instead.

* In `data_rename()` (#567):
- argument `pattern` is deprecated. Use `select` instead.
- argument `safe` is deprecated. The function now errors when `select`
- argument `safe` is deprecated. The function now errors when `select`
contains unknown column names.
- when `replacement` is `NULL`, an error is now thrown (previously, column
indices were used as new names).
- if `select` (previously `pattern`) is a named vector, then all elements
must be named, e.g. `c(length = "Sepal.Length", "Sepal.Width")` errors.

* `print()` methods for `data_tabulate()` with multiple sub-tables (i.e. when
length of `by` was > 1) were revised. Now, an integrated table instead of
multiple tables is returned. Furthermore, `print_html()` did not work, which
was also fixed now.

CHANGES

* The `select` argument, which is available in different functions to select
Expand Down
235 changes: 159 additions & 76 deletions R/data_xtabulate.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,18 +90,23 @@


#' @export
format.datawizard_crosstab <- function(x, format = "text", digits = 1, big_mark = NULL, ...) {
format.datawizard_crosstab <- function(x,
format = "text",
digits = 1,
big_mark = NULL,
include_total_row = TRUE,
...) {
# convert to character manually, else, for large numbers,
# format_table() returns scientific notation
x <- as.data.frame(x)

# remove group variable
x$Group <- NULL
# find numeric columns, only for these we need row/column sums
numeric_columns <- vapply(x, is.numeric, logical(1))

# compute total N for rows and colummns
# compute total N for rows and columns
total_n <- attributes(x)$total_n
total_column <- rowSums(x[, -1], na.rm = TRUE)
total_row <- c(colSums(x[, -1], na.rm = TRUE), total_n)
total_column <- rowSums(x[numeric_columns], na.rm = TRUE)
total_row <- c(colSums(x[numeric_columns], na.rm = TRUE), total_n)

# proportions?
props <- attributes(x)$proportions
Expand All @@ -113,16 +118,16 @@ format.datawizard_crosstab <- function(x, format = "text", digits = 1, big_mark
tmp <- x
if (identical(props, "row")) {
for (i in seq_len(nrow(x))) {
row_sum <- sum(x[i, -1], na.rm = TRUE)
row_sum <- sum(x[i, numeric_columns], na.rm = TRUE)
if (row_sum == 0) {
row_sum_string <- "(0%)"
} else {
row_sum_string <- sprintf("(%.*f%%)", digits, 100 * x[i, -1] / row_sum)
row_sum_string <- sprintf("(%.*f%%)", digits, 100 * x[i, numeric_columns] / row_sum)
}
tmp[i, -1] <- paste(format(x[i, -1]), format(row_sum_string, justify = "right"))
tmp[i, numeric_columns] <- paste(format(x[i, numeric_columns]), format(row_sum_string, justify = "right"))
}
} else if (identical(props, "column")) {
for (i in seq_len(ncol(x))[-1]) {
for (i in seq_len(ncol(x))[numeric_columns]) {
col_sum <- sum(x[, i], na.rm = TRUE)
if (col_sum == 0) {
col_sum_string <- "(0%)"
Expand All @@ -132,7 +137,7 @@ format.datawizard_crosstab <- function(x, format = "text", digits = 1, big_mark
tmp[, i] <- paste(format(x[, i]), format(col_sum_string, justify = "right"))
}
} else if (identical(props, "full")) {
for (i in seq_len(ncol(x))[-1]) {
for (i in seq_len(ncol(x))[numeric_columns]) {
tmp[, i] <- paste(
format(x[, i]),
format(sprintf("(%.*f%%)", digits, 100 * x[, i] / total_n), justify = "right")
Expand All @@ -154,22 +159,29 @@ format.datawizard_crosstab <- function(x, format = "text", digits = 1, big_mark
})
# Remove ".00" from numbers
ftab$Total <- gsub("\\.00$", "", as.character(total_column))
# for text format, insert "empty row" before last total row
if (identical(format, "text") || identical(format, "markdown")) {
empty_row <- as.data.frame(t(data.frame(
rep("", ncol(ftab)),
c("Total", as.character(total_row)),
stringsAsFactors = FALSE
)))
} else {
empty_row <- as.data.frame(t(data.frame(
c("Total", as.character(total_row)),
stringsAsFactors = FALSE
)))

# add final total row to each sub-table. For multiple, collapsed table
# (i.e. when length of `by` > 1), we don't want multiple total rows in the
# table, so we would set include_total_row = FALSE for objects of class
# `datawizard_crosstabs` (note plural s!)
if (include_total_row) {
# for text format, insert "empty row" before last total row
if (identical(format, "text") || identical(format, "markdown")) {
empty_row <- as.data.frame(t(data.frame(
rep("", ncol(ftab)),
c("Total", as.character(total_row)),
stringsAsFactors = FALSE
)))
} else {
empty_row <- as.data.frame(t(data.frame(
c("Total", as.character(total_row)),
stringsAsFactors = FALSE
)))
}
colnames(empty_row) <- colnames(ftab)
ftab <- rbind(ftab, empty_row)
ftab[nrow(ftab), ] <- gsub("\\.00$", "", ftab[nrow(ftab), ])
}
colnames(empty_row) <- colnames(ftab)
ftab <- rbind(ftab, empty_row)
ftab[nrow(ftab), ] <- gsub("\\.00$", "", ftab[nrow(ftab), ])

# insert big marks?
ftab$Total <- .add_commas_in_numbers(ftab$Total, big_mark)
Expand All @@ -182,31 +194,30 @@ format.datawizard_crosstab <- function(x, format = "text", digits = 1, big_mark
}



# print, datawizard_crosstab ---------------------


#' @export
print.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
# grouped data? if yes, add information on grouping factor
if (is.null(x[["Group"]])) {
caption <- NULL
} else {
caption <- paste0("Grouped by ", x[["Group"]][1])
x$Group <- NULL
}

# print table
cat(insight::export_table(
format(x, big_mark = big_mark, ...),
cross = "+",
missing = "<NA>",
caption = caption,
empty_line = "-",
...
))
.print_text_table(x, big_mark, format = "text", ...)
invisible(x)
}


#' @export
print_md.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
.print_text_table(x, big_mark, format = "markdown", ...)
}


#' @export
print_html.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
.print_text_table(x, big_mark, format = "html", ...)
}


.print_text_table <- function(x, big_mark = NULL, format = "text", ...) {
# grouped data? if yes, add information on grouping factor
if (is.null(x[["Group"]])) {
caption <- NULL
Expand All @@ -215,75 +226,147 @@ print_md.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
x$Group <- NULL
}

# print table
insight::export_table(
format(x, format = "markdown", big_mark = big_mark, ...),
cross = "+",
missing = "<NA>",
# prepare table arguments
fun_args <- list(
format(x, big_mark = big_mark, format = format, ...),
caption = caption,
empty_line = "-",
format = "markdown"
format = format
)
}


#' @export
print_html.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
# grouped data? if yes, add information on grouping factor
if (!is.null(x[["Group"]])) {
x$groups <- paste0("Grouped by ", x[["Group"]][1])
x$Group <- NULL
if (format != "html") {
fun_args$cross <- "+"
fun_args$empty_line <- "-"
}
if (format == "text") {
fun_args$missing <- "<NA>"
} else {
fun_args$missing <- "(NA)"
}
out <- do.call(insight::export_table, c(fun_args, list(...)))

# print table
insight::export_table(
format(x, big_mark = big_mark, format = "html", ...),
missing = "(NA)",
format = "html",
by = "groups"
)
if (identical(format, "text")) {
cat(out)
} else {
out
}
}


# print, datawizard_crosstabs ---------------------


#' @export
print.datawizard_crosstabs <- function(x, big_mark = NULL, ...) {
for (i in seq_along(x)) {
print(x[[i]], big_mark = big_mark, ...)
cat("\n")
}
.print_text_tables(x, big_mark, format = "text", ...)
invisible(x)
}


#' @export
print_md.datawizard_crosstabs <- function(x, big_mark = NULL, ...) {
.print_text_tables(x, big_mark, format = "markdown", ...)
}


#' @export
print_html.datawizard_crosstabs <- function(x, big_mark = NULL, ...) {
.print_text_tables(x, big_mark, format = "html", ...)
}


.print_text_tables <- function(x, big_mark = NULL, format = "text", ...) {
if (length(x) == 1) {
print_html(x[[1]], big_mark = big_mark, ...)
.print_text_table(x[[1]], big_mark = big_mark, format = format, ...)
} else {
x <- lapply(x, function(i) {
# grouped data? if yes, add information on grouping factor
if (!is.null(i[["Group"]])) {
i$groups <- paste0("Grouped by ", i[["Group"]][1])
i$Group <- NULL
}
format(i, format = "html", big_mark = big_mark, ...)
# if we don't have the gt-grouping variable "groups" yet, we use it now
# for grouping. Else, we use a new column named "Variable", to avoid
# overwriting the groups-variable from grouped data frames
if (is.null(i$groups) && identical(format, "html")) {
grp_variable <- "groups"
} else {
grp_variable <- "Variable"
}
# first variable differs for each data frame, so we harmonize it here
i[[grp_variable]] <- colnames(i)[1]
colnames(i)[1] <- "Value"
# move column to first position
i <- data_relocate(i, select = grp_variable, before = 1)
# format data frame
format(i, format = format, big_mark = big_mark, include_total_row = FALSE, ...)
})
# now bind, but we need to check for equal number of columns
if (all(lengths(x) == max(length(x)))) {
out <- do.call(rbind, x)
} else {
# if not all tables have identical columns, we can use "data_merge()",
# which safely row-binds all data frames. However, the column order can be
# messed up, so we save column order here and restore it later
col_order <- colnames(x[[which.max(lengths(x))]])
out <- data_merge(x, join = "bind")[col_order]
}

out <- do.call(rbind, x)
# split tables for grouped data frames
if (!is.null(out$groups)) {
out <- split(out, out$groups)
out <- lapply(out, function(subtable) {
# for text and markdown, if we split tables, we remove the "groups"
# variable. we need to keep it for HTML tables.
if (!identical(format, "html")) {
attr(subtable, "table_caption") <- c(unique(subtable$groups), "blue")
subtable$groups <- NULL
}
# remove duplicated names
for (grpvars in c("Variable", "Group")) {
if (!is.null(subtable[[grpvars]])) {
subtable[[grpvars]][duplicated(subtable[[grpvars]])] <- ""
}
}
subtable
})
# no splitting of grouped data frames into list for HTML format,
# because splitting is done by the `by` argument later
if (identical(format, "html")) {
out <- do.call(rbind, out)
}
}

# print table
insight::export_table(
# prepare table arguments
fun_args <- list(
out,
missing = "(NA)",
format = "html",
format = format,
by = "groups"
)
if (format != "html") {
fun_args$cross <- "+"
fun_args$empty_line <- "-"
}
if (format == "text") {
fun_args$missing <- "<NA>"
} else {
fun_args$missing <- "(NA)"
}
out <- do.call(insight::export_table, c(fun_args, list(...)))

# print table
if (identical(format, "text")) {
cat(out)
} else {
out
}
}
}



# helper ---------------------


.validate_by <- function(by, x) {
if (!is.null(by)) {
if (is.character(by)) {
Expand Down
12 changes: 5 additions & 7 deletions R/select_nse.R
Original file line number Diff line number Diff line change
Expand Up @@ -198,13 +198,11 @@
}

# small helper, to avoid duplicated code
.action_if_not_found <- function(
x,
columns,
matches,
verbose,
ifnotfound
) {
.action_if_not_found <- function(x,
columns,
matches,
verbose,
ifnotfound) {
msg <- paste0(
"Following variable(s) were not found: ",
toString(x[is.na(matches)])
Expand Down
Loading

0 comments on commit a58a031

Please sign in to comment.