diff --git a/R/rescale_weights.R b/R/rescale_weights.R index bc10ca249..5113a281e 100644 --- a/R/rescale_weights.R +++ b/R/rescale_weights.R @@ -15,13 +15,15 @@ #' the grouping structure (strata) of the survey data (level-2-cluster #' variable). It is also possible to create weights for multiple group #' variables; in such cases, each created weighting variable will be suffixed -#' by the name of the group variable. +#' by the name of the group variable. Argument `by` only applies to the default +#' rescaling-method (`method = "carle"`), not to `method = "kish"`. #' @param probability_weights Variable indicating the probability (design or #' sampling) weights of the survey data (level-1-weight). #' @param nest Logical, if `TRUE` and `by` indicates at least two #' group variables, then groups are "nested", i.e. groups are now a #' combination from each group level of the variables in `by`. -#' @param method `"carle"` or `"kish"`. +#' @param method String, indicating which rescale-method is used for rescaling +#' weights. Can be either `"carle"` (default) or `"kish"`. See 'Details'. #' #' @return `data`, including the new weighting variable(s). For #' `method = "carle"`, new columns `pweights_a` and `pweights_b` are returned, @@ -137,10 +139,16 @@ rescale_weights <- function(data, } # check for existing variable names - if (any(c("pweights_a", "pweights_b", "pweights") %in% colnames(data))) { + if ((method == "carle" && any(c("pweights_a", "pweights_b") %in% colnames(data))) || + (method == "kish" && "pweights" %in% colnames(data))) { insight::format_warning("The variable name for the rescaled weights already exists in the data. Returned columns will be renamed into unique names.") # nolint } + # need probability_weights + if (is.null(probability_weights)) { + insight::format_error("The argument `probability_weights` is missing, but required to rescale weights.") + } + # check if weight has missings. we need to remove them first, # and add back weights to correct cases later @@ -172,6 +180,10 @@ rescale_weights <- function(data, # rescale weights, method Kish ---------------------------- .rescale_weights_kish <- function(nest, probability_weights, data_tmp, data, by, weight_non_na) { + # check argument + if (!is.null(by)) { + insight::format_warning("The `by` argument is not used for `method = \"kish\" and will be ignored.") + } p_weights <- data_tmp[[probability_weights]] # design effect according to Kish deff <- mean(p_weights^2) / (mean(p_weights)^2) diff --git a/man/rescale_weights.Rd b/man/rescale_weights.Rd index 45dd460b1..41e932990 100644 --- a/man/rescale_weights.Rd +++ b/man/rescale_weights.Rd @@ -19,7 +19,8 @@ rescale_weights( the grouping structure (strata) of the survey data (level-2-cluster variable). It is also possible to create weights for multiple group variables; in such cases, each created weighting variable will be suffixed -by the name of the group variable.} +by the name of the group variable. Argument \code{by} only applies to the default +rescaling-method (\code{method = "carle"}), not to \code{method = "kish"}.} \item{probability_weights}{Variable indicating the probability (design or sampling) weights of the survey data (level-1-weight).} @@ -28,7 +29,8 @@ sampling) weights of the survey data (level-1-weight).} group variables, then groups are "nested", i.e. groups are now a combination from each group level of the variables in \code{by}.} -\item{method}{\code{"carle"} or \code{"kish"}.} +\item{method}{String, indicating which rescale-method is used for rescaling +weights. Can be either \code{"carle"} (default) or \code{"kish"}. See 'Details'.} } \value{ \code{data}, including the new weighting variable(s). For diff --git a/tests/testthat/_snaps/rescale_weights.md b/tests/testthat/_snaps/rescale_weights.md index 48185e7cc..ecdefdd06 100644 --- a/tests/testthat/_snaps/rescale_weights.md +++ b/tests/testthat/_snaps/rescale_weights.md @@ -36,15 +36,13 @@ Code head(rescale_weights(nhanes_sample, probability_weights = "WTINT2YR", method = "kish")) Output - # A tibble: 6 x 8 - total age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA WTINT2YR pweights - - 1 1 2.2 1 3 2 31 97594. 1.40 - 2 7 2.08 2 3 1 29 39599. 0.566 - 3 3 1.48 2 1 2 42 26620. 0.381 - 4 4 1.32 2 4 2 33 34999. 0.500 - 5 1 2 2 1 1 41 14746. 0.211 - 6 6 2.2 2 4 1 38 28232. 0.404 + total age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA WTINT2YR pweights + 1 1 2.20 1 3 2 31 97593.68 1.3952529 + 2 7 2.08 2 3 1 29 39599.36 0.5661343 + 3 3 1.48 2 1 2 42 26619.83 0.3805718 + 4 4 1.32 2 4 2 33 34998.53 0.5003582 + 5 1 2.00 2 1 1 41 14746.45 0.2108234 + 6 6 2.20 2 4 1 38 28232.10 0.4036216 # rescale_weights nested works as expected diff --git a/tests/testthat/test-rescale_weights.R b/tests/testthat/test-rescale_weights.R index c604c7dc5..9c2415626 100644 --- a/tests/testthat/test-rescale_weights.R +++ b/tests/testthat/test-rescale_weights.R @@ -1,5 +1,7 @@ test_that("rescale_weights works as expected", { data(nhanes_sample) + # convert tibble into data frame, so check-hard GHA works + nhanes_sample <- as.data.frame(nhanes_sample) expect_snapshot(head(rescale_weights(nhanes_sample, "SDMVSTRA", "WTINT2YR"))) @@ -17,6 +19,8 @@ test_that("rescale_weights works as expected", { test_that("rescale_weights nested works as expected", { data(nhanes_sample) + # convert tibble into data frame, so check-hard GHA works + nhanes_sample <- as.data.frame(nhanes_sample) expect_snapshot( rescale_weights( @@ -60,6 +64,14 @@ test_that("rescale_weights errors and warnings", { ), regex = "The following" ) + expect_error( + rescale_weights( + data = head(nhanes_sample, n = 30), + by = "SDMVSTRA", + probability_weights = NULL + ), + regex = "is missing, but required" + ) expect_error( rescale_weights( data = head(nhanes_sample, n = 30), @@ -68,6 +80,16 @@ test_that("rescale_weights errors and warnings", { ), regex = "must be specified" ) + expect_warning( + rescale_weights( + data = head(nhanes_sample, n = 30), + by = "SDMVSTRA", + probability_weights = "WTINT2YR", + method = "kish" + ), + regex = "is not used" + ) + nhanes_sample$pweights_a <- 1 expect_warning( {