diff --git a/DESCRIPTION b/DESCRIPTION index 2ed1150ae..0a7fb9594 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.11.0.2 +Version: 0.11.0.3 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), diff --git a/NEWS.md b/NEWS.md index 1e49e91a4..69c228291 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,11 @@ * `data_select()` can directly rename selected variables when a named vector is provided in `select`, e.g. `data_select(mtcars, c(new1 = "mpg", new2 = "cyl"))`. +* `data_tabulate()` gains an `as.data.frame()` method, to return the frequency + table as a data frame. The structure of the returned object is a nested data + frame, where the first column contains name of the variable for which + frequencies were calculated, and the second column contains the frequency table. + # datawizard 0.11.0 BREAKING CHANGES diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 2319b5933..18b7c0a8d 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -25,10 +25,19 @@ #' percentages to be calculated. Only applies to crosstables, i.e. when `by` is #' not `NULL`. Can be `"row"` (row percentages), `"column"` (column percentages) #' or `"full"` (to calculate relative frequencies for the full table). -#' @param add_total Add total. +#' @param add_total For crosstables (i.e. when `by` is not `NULL`), a row and +#' column with the total N values are added to the data frame. `add_total` has +#' no effect in `as.data.frame()` for simple frequency tables. #' @param ... not used. #' @inheritParams extract_column_names #' +#' @details +#' There is an `as.data.frame()` method, to return the frequency tables as a +#' data frame. The structure of the returned object is a nested data frame, +#' where the first column contains name of the variable for which frequencies +#' were calculated, and the second column is a list column, contains the +#' frequency tables as data frame. See 'Examples'. +#' #' @section Crosstables: #' If `by` is supplied, a crosstable is created. The crosstable includes `` #' (missing) values by default. The first column indicates values of `x`, the @@ -109,6 +118,12 @@ #' # round percentages #' out <- data_tabulate(efc, "c172code", by = "e16sex", proportions = "column") #' print(out, digits = 0) +#' +#' # coerce to data frames +#' result <- data_tabulate(efc, "c172code", by = "e16sex") +#' as.data.frame(result) +#' as.data.frame(result)$table +#' as.data.frame(result, add_total = TRUE)$table #' @export data_tabulate <- function(x, ...) { UseMethod("data_tabulate") @@ -388,7 +403,7 @@ as.data.frame.datawizard_tables <- function(x, add_total = FALSE, ...) { selected_vars <- unlist(lapply(x, function(i) attributes(i)$varname)) # coerce to data frame, remove rownames data_frames <- lapply(x, function(i) { - class(i) <- "data.frame" + # class(i) <- "data.frame" if (add_total) { out <- as.data.frame(format(i)) for (cols in 2:ncol(out)) { diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd index 7c2657278..3020c8c35 100644 --- a/man/data_tabulate.Rd +++ b/man/data_tabulate.Rd @@ -120,7 +120,9 @@ functions (see 'Details'), this argument may be used as workaround.} \item{collapse}{Logical, if \code{TRUE} collapses multiple tables into one larger table for printing. This affects only printing, not the returned object.} -\item{add_total}{Add total.} +\item{add_total}{For crosstables (i.e. when \code{by} is not \code{NULL}), a row and +column with the total N values are added to the data frame. \code{add_total} has +no effect in \code{as.data.frame()} for simple frequency tables.} } \value{ A data frame, or a list of data frames, with one frequency table @@ -132,6 +134,13 @@ including the number of levels/values as well as the distribution of raw, valid and cumulative percentages. For crosstables, row, column and cell percentages can be calculated. } +\details{ +There is an \code{as.data.frame()} method, to return the frequency tables as a +data frame. The structure of the returned object is a nested data frame, +where the first column contains name of the variable for which frequencies +were calculated, and the second column is a list column, contains the +frequency tables as data frame. See 'Examples'. +} \note{ There are \code{print_html()} and \code{print_md()} methods available for printing frequency or crosstables in HTML and markdown format, e.g. @@ -212,5 +221,11 @@ data_tabulate( # round percentages out <- data_tabulate(efc, "c172code", by = "e16sex", proportions = "column") print(out, digits = 0) + +# coerce to data frames +result <- data_tabulate(efc, "c172code", by = "e16sex") +as.data.frame(result) +as.data.frame(result)$table +as.data.frame(result, add_total = TRUE)$table \dontshow{\}) # examplesIf} }