From 071d72d931095cf11eb0f9b935aaea89be34ac8e Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 21 May 2024 18:30:17 +0200 Subject: [PATCH] update docs --- R/data_to_long.R | 39 ++++++++++++++++++++++++++++++++++----- man/coef_var.Rd | 4 ++++ man/data_to_long.Rd | 39 ++++++++++++++++++++++++++++++++++----- 3 files changed, 72 insertions(+), 10 deletions(-) diff --git a/R/data_to_long.R b/R/data_to_long.R index e4ca2119e..45020744f 100644 --- a/R/data_to_long.R +++ b/R/data_to_long.R @@ -8,12 +8,15 @@ #' rows and fewer columns after the operation. #' @param names_to The name of the new column (variable) that will contain the #' _names_ from columns in `select` as values, to identify the source of the -#' values. +#' values. `names_to` can be a character vector with more than one column name, +#' in which case `names_sep` or `names_pattern` must be provided in order to +#' identify which parts of the column names go into newley created columns. +#' See also 'Examples'. #' @param names_prefix A regular expression used to remove matching text from #' the start of each variable name. #' @param names_sep,names_pattern If `names_to` contains multiple values, this -#' argument controls how the column name is broken up. -#' `names_pattern` takes a regular expression containing matching groups, i.e. "()". +#' argument controls how the column name is broken up. `names_pattern` takes a +#' regular expression containing matching groups, i.e. "()". #' @param values_to The name of the new column that will contain the _values_ of #' the columns in `select`. #' @param values_drop_na If `TRUE`, will drop rows that contain only `NA` in the @@ -41,13 +44,16 @@ #' - The columns that contain the repeated measurements (`select`). #' - The name of the newly created column that will contain the names of the #' columns in `select` (`names_to`), to identify the source of the values. +#' `names_to` can also be a character vector with more than one column name, +#' in which case `names_sep` or `names_pattern` must be provided to specify +#' which parts of the column names go into the newly created columns. #' - The name of the newly created column that contains the values of the #' columns in `select` (`values_to`). #' #' In other words: repeated measurements that are spread across several columns #' will be gathered into a single column (`values_to`), with the original column -#' names, that identify the source of the gathered values, stored in a new column -#' (`names_to`). +#' names, that identify the source of the gathered values, stored in one or more +#' new columns (`names_to`). #' #' @return If a tibble was provided as input, `reshape_longer()` also returns a #' tibble. Otherwise, it returns a data frame. @@ -72,6 +78,29 @@ #' values_to = "Score" #' ) #' +#' # Reshape multiple columns into long format. +#' mydat <- data.frame( +#' age = c(20, 30, 40), +#' sex = c("Female", "Male", "Male"), +#' score_t1 = c(30, 35, 32), +#' score_t2 = c(33, 34, 37), +#' score_t3 = c(36, 35, 38), +#' speed_t1 = c(2, 3, 1), +#' speed_t2 = c(3, 4, 5), +#' speed_t3 = c(1, 8, 6) +#' ) +#' # The column names are split into two columns: "type" and "time". The +#' # pattern for splitting column names is provided in `names_pattern`. Values +#' # of all "score_*" and "speed_*" columns are gathered into a single column +#' # named "count". +#' data_to_long( +#' mydat, +#' select = 3:8, +#' names_to = c("type", "time"), +#' names_pattern = "(score|speed)_t(\\d+)", +#' values_to = "count" +#' ) +#' #' # Full example #' # ------------------ #' data <- psych::bfi # Wide format with one row per participant's personality test diff --git a/man/coef_var.Rd b/man/coef_var.Rd index 92274ca59..0f0965076 100644 --- a/man/coef_var.Rd +++ b/man/coef_var.Rd @@ -79,10 +79,14 @@ This means that CV is \strong{NOT} invariant to shifting, but it is to scaling: \if{html}{\out{
}}\preformatted{sandwiches <- c(0, 4, 15, 0, 0, 5, 2, 7) coef_var(sandwiches) #> [1] 1.239094 +}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{ coef_var(sandwiches / 2) # same #> [1] 1.239094 +}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{ coef_var(sandwiches + 4) # different! 0 is no longer meaningful! #> [1] 0.6290784 }\if{html}{\out{
}} diff --git a/man/data_to_long.Rd b/man/data_to_long.Rd index 3de75fe00..7256a9280 100644 --- a/man/data_to_long.Rd +++ b/man/data_to_long.Rd @@ -75,14 +75,17 @@ will just return \code{"Species"}.} \item{names_to}{The name of the new column (variable) that will contain the \emph{names} from columns in \code{select} as values, to identify the source of the -values.} +values. \code{names_to} can be a character vector with more than one column name, +in which case \code{names_sep} or \code{names_pattern} must be provided in order to +identify which parts of the column names go into newley created columns. +See also 'Examples'.} \item{names_prefix}{A regular expression used to remove matching text from the start of each variable name.} \item{names_sep, names_pattern}{If \code{names_to} contains multiple values, this -argument controls how the column name is broken up. -\code{names_pattern} takes a regular expression containing matching groups, i.e. "()".} +argument controls how the column name is broken up. \code{names_pattern} takes a +regular expression containing matching groups, i.e. "()".} \item{values_to}{The name of the new column that will contain the \emph{values} of the columns in \code{select}.} @@ -136,14 +139,17 @@ The necessary information for \code{data_to_long()} is: \item The columns that contain the repeated measurements (\code{select}). \item The name of the newly created column that will contain the names of the columns in \code{select} (\code{names_to}), to identify the source of the values. +\code{names_to} can also be a character vector with more than one column name, +in which case \code{names_sep} or \code{names_pattern} must be provided to specify +which parts of the column names go into the newly created columns. \item The name of the newly created column that contains the values of the columns in \code{select} (\code{values_to}). } In other words: repeated measurements that are spread across several columns will be gathered into a single column (\code{values_to}), with the original column -names, that identify the source of the gathered values, stored in a new column -(\code{names_to}). +names, that identify the source of the gathered values, stored in one or more +new columns (\code{names_to}). } \examples{ \dontshow{if (requireNamespace("psych") && requireNamespace("tidyr")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} @@ -166,6 +172,29 @@ data_to_long( values_to = "Score" ) +# Reshape multiple columns into long format. +mydat <- data.frame( + age = c(20, 30, 40), + sex = c("Female", "Male", "Male"), + score_t1 = c(30, 35, 32), + score_t2 = c(33, 34, 37), + score_t3 = c(36, 35, 38), + speed_t1 = c(2, 3, 1), + speed_t2 = c(3, 4, 5), + speed_t3 = c(1, 8, 6) +) +# The column names are split into two columns: "type" and "time". The +# pattern for splitting column names is provided in `names_pattern`. Values +# of all "score_*" and "speed_*" columns are gathered into a single column +# named "count". +data_to_long( + mydat, + select = 3:8, + names_to = c("type", "time"), + names_pattern = "(score|speed)_t(\\\\d+)", + values_to = "count" +) + # Full example # ------------------ data <- psych::bfi # Wide format with one row per participant's personality test