From ad303358eed36a4f4350a20baeb9565143d35202 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Thu, 16 May 2024 10:47:57 +0200
Subject: [PATCH] fix rescale_weights

---
 NEWS.md                                  |  1 +
 R/rescale_weights.R                      | 32 ++++++++++++++----------
 man/rescale_weights.Rd                   | 12 +++++----
 tests/testthat/_snaps/rescale_weights.md |  2 +-
 tests/testthat/test-rescale_weights.R    | 18 +++++++------
 5 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 9abe08e93..9ea8245bc 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -8,6 +8,7 @@ BREAKING CHANGES
   * `data_partition()`
   * `demean()` and `degroup()`
   * `means_by_group()`
+  * `rescale_weights()`
 
 CHANGES
 
diff --git a/R/rescale_weights.R b/R/rescale_weights.R
index 6f82acdad..9622e8599 100644
--- a/R/rescale_weights.R
+++ b/R/rescale_weights.R
@@ -10,16 +10,17 @@
 #'   models, which then can be used for multilevel modelling.
 #'
 #' @param data A data frame.
-#' @param group Variable names (as character vector, or as formula), indicating
+#' @param by Variable names (as character vector, or as formula), indicating
 #'   the grouping structure (strata) of the survey data (level-2-cluster
 #'   variable). It is also possible to create weights for multiple group
 #'   variables; in such cases, each created weighting variable will be suffixed
 #'   by the name of the group variable.
 #' @param probability_weights Variable indicating the probability (design or
 #'   sampling) weights of the survey data (level-1-weight).
-#' @param nest Logical, if `TRUE` and `group` indicates at least two
+#' @param nest Logical, if `TRUE` and `by` indicates at least two
 #'   group variables, then groups are "nested", i.e. groups are now a
-#'   combination from each group level of the variables in `group`.
+#'   combination from each group level of the variables in `by`.
+#' @param group Deprecated. Use `by` instead.
 #'
 #' @return `data`, including the new weighting variables: `pweights_a`
 #'   and `pweights_b`, which represent the rescaled design weights to use
@@ -71,7 +72,7 @@
 #'   # or nested structures.
 #'   x <- rescale_weights(
 #'     data = nhanes_sample,
-#'     group = c("SDMVSTRA", "SDMVPSU"),
+#'     by = c("SDMVSTRA", "SDMVPSU"),
 #'     probability_weights = "WTINT2YR",
 #'     nest = TRUE
 #'   )
@@ -87,9 +88,14 @@
 #'   )
 #' }
 #' @export
-rescale_weights <- function(data, group, probability_weights, nest = FALSE) {
-  if (inherits(group, "formula")) {
-    group <- all.vars(group)
+rescale_weights <- function(data, by, probability_weights, nest = FALSE, group = NULL) {
+  ## TODO: deprecate later
+  if (!is.null(group)) {
+    by <- group
+  }
+
+  if (inherits(by, "formula")) {
+    by <- all.vars(by)
   }
 
   # check if weight has missings. we need to remove them first,
@@ -107,22 +113,22 @@ rescale_weights <- function(data, group, probability_weights, nest = FALSE) {
   # sort id
   data_tmp$.bamboozled <- seq_len(nrow(data_tmp))
 
-  if (nest && length(group) < 2) {
+  if (nest && length(by) < 2) {
     insight::format_warning(
       sprintf(
-        "Only one group variable selected, no nested structure possible. Rescaling weights for grout '%s' now.",
-        group
+        "Only one group variable selected in `by`, no nested structure possible. Rescaling weights for grout '%s' now.",
+        by
       )
     )
     nest <- FALSE
   }
 
   if (nest) {
-    out <- .rescale_weights_nested(data_tmp, group, probability_weights, nrow(data), weight_non_na)
+    out <- .rescale_weights_nested(data_tmp, group = by, probability_weights, nrow(data), weight_non_na)
   } else {
-    out <- lapply(group, function(i) {
+    out <- lapply(by, function(i) {
       x <- .rescale_weights(data_tmp, i, probability_weights, nrow(data), weight_non_na)
-      if (length(group) > 1) {
+      if (length(by) > 1) {
         colnames(x) <- sprintf(c("pweight_a_%s", "pweight_b_%s"), i)
       }
       x
diff --git a/man/rescale_weights.Rd b/man/rescale_weights.Rd
index 4a005eb99..4a67d4100 100644
--- a/man/rescale_weights.Rd
+++ b/man/rescale_weights.Rd
@@ -4,12 +4,12 @@
 \alias{rescale_weights}
 \title{Rescale design weights for multilevel analysis}
 \usage{
-rescale_weights(data, group, probability_weights, nest = FALSE)
+rescale_weights(data, by, probability_weights, nest = FALSE, group = NULL)
 }
 \arguments{
 \item{data}{A data frame.}
 
-\item{group}{Variable names (as character vector, or as formula), indicating
+\item{by}{Variable names (as character vector, or as formula), indicating
 the grouping structure (strata) of the survey data (level-2-cluster
 variable). It is also possible to create weights for multiple group
 variables; in such cases, each created weighting variable will be suffixed
@@ -18,9 +18,11 @@ by the name of the group variable.}
 \item{probability_weights}{Variable indicating the probability (design or
 sampling) weights of the survey data (level-1-weight).}
 
-\item{nest}{Logical, if \code{TRUE} and \code{group} indicates at least two
+\item{nest}{Logical, if \code{TRUE} and \code{by} indicates at least two
 group variables, then groups are "nested", i.e. groups are now a
-combination from each group level of the variables in \code{group}.}
+combination from each group level of the variables in \code{by}.}
+
+\item{group}{Deprecated. Use \code{by} instead.}
 }
 \value{
 \code{data}, including the new weighting variables: \code{pweights_a}
@@ -73,7 +75,7 @@ if (require("lme4")) {
   # or nested structures.
   x <- rescale_weights(
     data = nhanes_sample,
-    group = c("SDMVSTRA", "SDMVPSU"),
+    by = c("SDMVSTRA", "SDMVPSU"),
     probability_weights = "WTINT2YR",
     nest = TRUE
   )
diff --git a/tests/testthat/_snaps/rescale_weights.md b/tests/testthat/_snaps/rescale_weights.md
index d158070a8..5de6d489a 100644
--- a/tests/testthat/_snaps/rescale_weights.md
+++ b/tests/testthat/_snaps/rescale_weights.md
@@ -34,7 +34,7 @@
 # rescale_weights nested works as expected
 
     Code
-      rescale_weights(data = head(nhanes_sample, n = 30), group = c("SDMVSTRA",
+      rescale_weights(data = head(nhanes_sample, n = 30), by = c("SDMVSTRA",
         "SDMVPSU"), probability_weights = "WTINT2YR", nest = TRUE)
     Output
          total  age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA  WTINT2YR pweights_a
diff --git a/tests/testthat/test-rescale_weights.R b/tests/testthat/test-rescale_weights.R
index bcd279355..504157180 100644
--- a/tests/testthat/test-rescale_weights.R
+++ b/tests/testthat/test-rescale_weights.R
@@ -13,19 +13,21 @@ test_that("rescale_weights nested works as expected", {
   expect_snapshot(
     rescale_weights(
       data = head(nhanes_sample, n = 30),
-      group = c("SDMVSTRA", "SDMVPSU"),
+      by = c("SDMVSTRA", "SDMVPSU"),
       probability_weights = "WTINT2YR",
       nest = TRUE
     )
   )
 
   expect_warning(
-    x <- rescale_weights(
-      data = head(nhanes_sample),
-      group = "SDMVPSU",
-      probability_weights = "WTINT2YR",
-      nest = TRUE
-    ),
+    {
+      x <- rescale_weights(
+        data = head(nhanes_sample),
+        by = "SDMVPSU",
+        probability_weights = "WTINT2YR",
+        nest = TRUE
+      )
+    },
     "Only one group variable selected"
   )
 
@@ -33,7 +35,7 @@ test_that("rescale_weights nested works as expected", {
     x,
     rescale_weights(
       data = head(nhanes_sample),
-      group = "SDMVPSU",
+      by = "SDMVPSU",
       probability_weights = "WTINT2YR"
     )
   )