fix

easystats · Dec 18, 2024 · 8214df0 · 8214df0
1 parent f466642
commit 8214df0
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 14 deletions.
diff --git a/R/rescale_weights.R b/R/rescale_weights.R
@@ -15,13 +15,15 @@
 #' the grouping structure (strata) of the survey data (level-2-cluster
 #' variable). It is also possible to create weights for multiple group
 #' variables; in such cases, each created weighting variable will be suffixed
-#' by the name of the group variable.
+#' by the name of the group variable. Argument `by` only applies to the default
+#' rescaling-method (`method = "carle"`), not to `method = "kish"`.
 #' @param probability_weights Variable indicating the probability (design or
 #' sampling) weights of the survey data (level-1-weight).
 #' @param nest Logical, if `TRUE` and `by` indicates at least two
 #' group variables, then groups are "nested", i.e. groups are now a
 #' combination from each group level of the variables in `by`.
-#' @param method `"carle"` or `"kish"`.
+#' @param method String, indicating which rescale-method is used for rescaling
+#' weights. Can be either `"carle"` (default) or `"kish"`. See 'Details'.
 #'
 #' @return `data`, including the new weighting variable(s). For
 #' `method = "carle"`, new columns `pweights_a` and `pweights_b` are returned,
@@ -137,10 +139,16 @@ rescale_weights <- function(data,
   }
 
   # check for existing variable names
-  if (any(c("pweights_a", "pweights_b", "pweights") %in% colnames(data))) {
+  if ((method == "carle" && any(c("pweights_a", "pweights_b") %in% colnames(data))) ||
+    (method == "kish" && "pweights" %in% colnames(data))) {
     insight::format_warning("The variable name for the rescaled weights already exists in the data. Returned columns will be renamed into unique names.") # nolint
   }
 
+  # need probability_weights
+  if (is.null(probability_weights)) {
+    insight::format_error("The argument `probability_weights` is missing, but required to rescale weights.")
+  }
+
   # check if weight has missings. we need to remove them first,
   # and add back weights to correct cases later
 
@@ -172,6 +180,10 @@ rescale_weights <- function(data,
 # rescale weights, method Kish ----------------------------
 
 .rescale_weights_kish <- function(nest, probability_weights, data_tmp, data, by, weight_non_na) {
+  # check argument
+  if (!is.null(by)) {
+    insight::format_warning("The `by` argument is not used for `method = \"kish\" and will be ignored.")
+  }
   p_weights <- data_tmp[[probability_weights]]
   # design effect according to Kish
   deff <- mean(p_weights^2) / (mean(p_weights)^2)

diff --git a/man/rescale_weights.Rd b/man/rescale_weights.Rd
diff --git a/tests/testthat/_snaps/rescale_weights.md b/tests/testthat/_snaps/rescale_weights.md
@@ -36,15 +36,13 @@
     Code
       head(rescale_weights(nhanes_sample, probability_weights = "WTINT2YR", method = "kish"))
     Output
-      # A tibble: 6 x 8
-        total   age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA WTINT2YR pweights
-        <dbl> <dbl>    <dbl>    <dbl>   <dbl>    <dbl>    <dbl>    <dbl>
-      1     1  2.2         1        3       2       31   97594.    1.40 
-      2     7  2.08        2        3       1       29   39599.    0.566
-      3     3  1.48        2        1       2       42   26620.    0.381
-      4     4  1.32        2        4       2       33   34999.    0.500
-      5     1  2           2        1       1       41   14746.    0.211
-      6     6  2.2         2        4       1       38   28232.    0.404
+        total  age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA WTINT2YR  pweights
+      1     1 2.20        1        3       2       31 97593.68 1.3952529
+      2     7 2.08        2        3       1       29 39599.36 0.5661343
+      3     3 1.48        2        1       2       42 26619.83 0.3805718
+      4     4 1.32        2        4       2       33 34998.53 0.5003582
+      5     1 2.00        2        1       1       41 14746.45 0.2108234
+      6     6 2.20        2        4       1       38 28232.10 0.4036216
 
 # rescale_weights nested works as expected
 

diff --git a/tests/testthat/test-rescale_weights.R b/tests/testthat/test-rescale_weights.R
@@ -1,5 +1,7 @@
 test_that("rescale_weights works as expected", {
   data(nhanes_sample)
+  # convert tibble into data frame, so check-hard GHA works
+  nhanes_sample <- as.data.frame(nhanes_sample)
 
   expect_snapshot(head(rescale_weights(nhanes_sample, "SDMVSTRA", "WTINT2YR")))
 
@@ -17,6 +19,8 @@ test_that("rescale_weights works as expected", {
 
 test_that("rescale_weights nested works as expected", {
   data(nhanes_sample)
+  # convert tibble into data frame, so check-hard GHA works
+  nhanes_sample <- as.data.frame(nhanes_sample)
 
   expect_snapshot(
     rescale_weights(
@@ -60,6 +64,14 @@ test_that("rescale_weights errors and warnings", {
     ),
     regex = "The following"
   )
+  expect_error(
+    rescale_weights(
+      data = head(nhanes_sample, n = 30),
+      by = "SDMVSTRA",
+      probability_weights = NULL
+    ),
+    regex = "is missing, but required"
+  )
   expect_error(
     rescale_weights(
       data = head(nhanes_sample, n = 30),
@@ -68,6 +80,16 @@ test_that("rescale_weights errors and warnings", {
     ),
     regex = "must be specified"
   )
+  expect_warning(
+    rescale_weights(
+      data = head(nhanes_sample, n = 30),
+      by = "SDMVSTRA",
+      probability_weights = "WTINT2YR",
+      method = "kish"
+    ),
+    regex = "is not used"
+  )
+
   nhanes_sample$pweights_a <- 1
   expect_warning(
     {