From ad303358eed36a4f4350a20baeb9565143d35202 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 16 May 2024 10:47:57 +0200 Subject: [PATCH] fix rescale_weights --- NEWS.md | 1 + R/rescale_weights.R | 32 ++++++++++++++---------- man/rescale_weights.Rd | 12 +++++---- tests/testthat/_snaps/rescale_weights.md | 2 +- tests/testthat/test-rescale_weights.R | 18 +++++++------ 5 files changed, 38 insertions(+), 27 deletions(-) diff --git a/NEWS.md b/NEWS.md index 9abe08e93..9ea8245bc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,7 @@ BREAKING CHANGES * `data_partition()` * `demean()` and `degroup()` * `means_by_group()` + * `rescale_weights()` CHANGES diff --git a/R/rescale_weights.R b/R/rescale_weights.R index 6f82acdad..9622e8599 100644 --- a/R/rescale_weights.R +++ b/R/rescale_weights.R @@ -10,16 +10,17 @@ #' models, which then can be used for multilevel modelling. #' #' @param data A data frame. -#' @param group Variable names (as character vector, or as formula), indicating +#' @param by Variable names (as character vector, or as formula), indicating #' the grouping structure (strata) of the survey data (level-2-cluster #' variable). It is also possible to create weights for multiple group #' variables; in such cases, each created weighting variable will be suffixed #' by the name of the group variable. #' @param probability_weights Variable indicating the probability (design or #' sampling) weights of the survey data (level-1-weight). -#' @param nest Logical, if `TRUE` and `group` indicates at least two +#' @param nest Logical, if `TRUE` and `by` indicates at least two #' group variables, then groups are "nested", i.e. groups are now a -#' combination from each group level of the variables in `group`. +#' combination from each group level of the variables in `by`. +#' @param group Deprecated. Use `by` instead. #' #' @return `data`, including the new weighting variables: `pweights_a` #' and `pweights_b`, which represent the rescaled design weights to use @@ -71,7 +72,7 @@ #' # or nested structures. #' x <- rescale_weights( #' data = nhanes_sample, -#' group = c("SDMVSTRA", "SDMVPSU"), +#' by = c("SDMVSTRA", "SDMVPSU"), #' probability_weights = "WTINT2YR", #' nest = TRUE #' ) @@ -87,9 +88,14 @@ #' ) #' } #' @export -rescale_weights <- function(data, group, probability_weights, nest = FALSE) { - if (inherits(group, "formula")) { - group <- all.vars(group) +rescale_weights <- function(data, by, probability_weights, nest = FALSE, group = NULL) { + ## TODO: deprecate later + if (!is.null(group)) { + by <- group + } + + if (inherits(by, "formula")) { + by <- all.vars(by) } # check if weight has missings. we need to remove them first, @@ -107,22 +113,22 @@ rescale_weights <- function(data, group, probability_weights, nest = FALSE) { # sort id data_tmp$.bamboozled <- seq_len(nrow(data_tmp)) - if (nest && length(group) < 2) { + if (nest && length(by) < 2) { insight::format_warning( sprintf( - "Only one group variable selected, no nested structure possible. Rescaling weights for grout '%s' now.", - group + "Only one group variable selected in `by`, no nested structure possible. Rescaling weights for grout '%s' now.", + by ) ) nest <- FALSE } if (nest) { - out <- .rescale_weights_nested(data_tmp, group, probability_weights, nrow(data), weight_non_na) + out <- .rescale_weights_nested(data_tmp, group = by, probability_weights, nrow(data), weight_non_na) } else { - out <- lapply(group, function(i) { + out <- lapply(by, function(i) { x <- .rescale_weights(data_tmp, i, probability_weights, nrow(data), weight_non_na) - if (length(group) > 1) { + if (length(by) > 1) { colnames(x) <- sprintf(c("pweight_a_%s", "pweight_b_%s"), i) } x diff --git a/man/rescale_weights.Rd b/man/rescale_weights.Rd index 4a005eb99..4a67d4100 100644 --- a/man/rescale_weights.Rd +++ b/man/rescale_weights.Rd @@ -4,12 +4,12 @@ \alias{rescale_weights} \title{Rescale design weights for multilevel analysis} \usage{ -rescale_weights(data, group, probability_weights, nest = FALSE) +rescale_weights(data, by, probability_weights, nest = FALSE, group = NULL) } \arguments{ \item{data}{A data frame.} -\item{group}{Variable names (as character vector, or as formula), indicating +\item{by}{Variable names (as character vector, or as formula), indicating the grouping structure (strata) of the survey data (level-2-cluster variable). It is also possible to create weights for multiple group variables; in such cases, each created weighting variable will be suffixed @@ -18,9 +18,11 @@ by the name of the group variable.} \item{probability_weights}{Variable indicating the probability (design or sampling) weights of the survey data (level-1-weight).} -\item{nest}{Logical, if \code{TRUE} and \code{group} indicates at least two +\item{nest}{Logical, if \code{TRUE} and \code{by} indicates at least two group variables, then groups are "nested", i.e. groups are now a -combination from each group level of the variables in \code{group}.} +combination from each group level of the variables in \code{by}.} + +\item{group}{Deprecated. Use \code{by} instead.} } \value{ \code{data}, including the new weighting variables: \code{pweights_a} @@ -73,7 +75,7 @@ if (require("lme4")) { # or nested structures. x <- rescale_weights( data = nhanes_sample, - group = c("SDMVSTRA", "SDMVPSU"), + by = c("SDMVSTRA", "SDMVPSU"), probability_weights = "WTINT2YR", nest = TRUE ) diff --git a/tests/testthat/_snaps/rescale_weights.md b/tests/testthat/_snaps/rescale_weights.md index d158070a8..5de6d489a 100644 --- a/tests/testthat/_snaps/rescale_weights.md +++ b/tests/testthat/_snaps/rescale_weights.md @@ -34,7 +34,7 @@ # rescale_weights nested works as expected Code - rescale_weights(data = head(nhanes_sample, n = 30), group = c("SDMVSTRA", + rescale_weights(data = head(nhanes_sample, n = 30), by = c("SDMVSTRA", "SDMVPSU"), probability_weights = "WTINT2YR", nest = TRUE) Output total age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA WTINT2YR pweights_a diff --git a/tests/testthat/test-rescale_weights.R b/tests/testthat/test-rescale_weights.R index bcd279355..504157180 100644 --- a/tests/testthat/test-rescale_weights.R +++ b/tests/testthat/test-rescale_weights.R @@ -13,19 +13,21 @@ test_that("rescale_weights nested works as expected", { expect_snapshot( rescale_weights( data = head(nhanes_sample, n = 30), - group = c("SDMVSTRA", "SDMVPSU"), + by = c("SDMVSTRA", "SDMVPSU"), probability_weights = "WTINT2YR", nest = TRUE ) ) expect_warning( - x <- rescale_weights( - data = head(nhanes_sample), - group = "SDMVPSU", - probability_weights = "WTINT2YR", - nest = TRUE - ), + { + x <- rescale_weights( + data = head(nhanes_sample), + by = "SDMVPSU", + probability_weights = "WTINT2YR", + nest = TRUE + ) + }, "Only one group variable selected" ) @@ -33,7 +35,7 @@ test_that("rescale_weights nested works as expected", { x, rescale_weights( data = head(nhanes_sample), - group = "SDMVPSU", + by = "SDMVPSU", probability_weights = "WTINT2YR" ) )