Skip to content

Commit

Permalink
fix rescale_weights
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed May 16, 2024
1 parent 7eca077 commit ad30335
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 27 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ BREAKING CHANGES
* `data_partition()`
* `demean()` and `degroup()`
* `means_by_group()`
* `rescale_weights()`

CHANGES

Expand Down
32 changes: 19 additions & 13 deletions R/rescale_weights.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,17 @@
#' models, which then can be used for multilevel modelling.
#'
#' @param data A data frame.
#' @param group Variable names (as character vector, or as formula), indicating
#' @param by Variable names (as character vector, or as formula), indicating
#' the grouping structure (strata) of the survey data (level-2-cluster
#' variable). It is also possible to create weights for multiple group
#' variables; in such cases, each created weighting variable will be suffixed
#' by the name of the group variable.
#' @param probability_weights Variable indicating the probability (design or
#' sampling) weights of the survey data (level-1-weight).
#' @param nest Logical, if `TRUE` and `group` indicates at least two
#' @param nest Logical, if `TRUE` and `by` indicates at least two
#' group variables, then groups are "nested", i.e. groups are now a
#' combination from each group level of the variables in `group`.
#' combination from each group level of the variables in `by`.
#' @param group Deprecated. Use `by` instead.
#'
#' @return `data`, including the new weighting variables: `pweights_a`
#' and `pweights_b`, which represent the rescaled design weights to use
Expand Down Expand Up @@ -71,7 +72,7 @@
#' # or nested structures.
#' x <- rescale_weights(
#' data = nhanes_sample,
#' group = c("SDMVSTRA", "SDMVPSU"),
#' by = c("SDMVSTRA", "SDMVPSU"),
#' probability_weights = "WTINT2YR",
#' nest = TRUE
#' )
Expand All @@ -87,9 +88,14 @@
#' )
#' }
#' @export
rescale_weights <- function(data, group, probability_weights, nest = FALSE) {
if (inherits(group, "formula")) {
group <- all.vars(group)
rescale_weights <- function(data, by, probability_weights, nest = FALSE, group = NULL) {
## TODO: deprecate later
if (!is.null(group)) {
by <- group
}

if (inherits(by, "formula")) {
by <- all.vars(by)
}

# check if weight has missings. we need to remove them first,
Expand All @@ -107,22 +113,22 @@ rescale_weights <- function(data, group, probability_weights, nest = FALSE) {
# sort id
data_tmp$.bamboozled <- seq_len(nrow(data_tmp))

if (nest && length(group) < 2) {
if (nest && length(by) < 2) {
insight::format_warning(
sprintf(
"Only one group variable selected, no nested structure possible. Rescaling weights for grout '%s' now.",
group
"Only one group variable selected in `by`, no nested structure possible. Rescaling weights for grout '%s' now.",
by
)
)
nest <- FALSE
}

if (nest) {
out <- .rescale_weights_nested(data_tmp, group, probability_weights, nrow(data), weight_non_na)
out <- .rescale_weights_nested(data_tmp, group = by, probability_weights, nrow(data), weight_non_na)
} else {
out <- lapply(group, function(i) {
out <- lapply(by, function(i) {
x <- .rescale_weights(data_tmp, i, probability_weights, nrow(data), weight_non_na)
if (length(group) > 1) {
if (length(by) > 1) {
colnames(x) <- sprintf(c("pweight_a_%s", "pweight_b_%s"), i)
}
x
Expand Down
12 changes: 7 additions & 5 deletions man/rescale_weights.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/_snaps/rescale_weights.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
# rescale_weights nested works as expected

Code
rescale_weights(data = head(nhanes_sample, n = 30), group = c("SDMVSTRA",
rescale_weights(data = head(nhanes_sample, n = 30), by = c("SDMVSTRA",
"SDMVPSU"), probability_weights = "WTINT2YR", nest = TRUE)
Output
total age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA WTINT2YR pweights_a
Expand Down
18 changes: 10 additions & 8 deletions tests/testthat/test-rescale_weights.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,29 @@ test_that("rescale_weights nested works as expected", {
expect_snapshot(
rescale_weights(
data = head(nhanes_sample, n = 30),
group = c("SDMVSTRA", "SDMVPSU"),
by = c("SDMVSTRA", "SDMVPSU"),
probability_weights = "WTINT2YR",
nest = TRUE
)
)

expect_warning(
x <- rescale_weights(
data = head(nhanes_sample),
group = "SDMVPSU",
probability_weights = "WTINT2YR",
nest = TRUE
),
{
x <- rescale_weights(
data = head(nhanes_sample),
by = "SDMVPSU",
probability_weights = "WTINT2YR",
nest = TRUE
)
},
"Only one group variable selected"
)

expect_identical(
x,
rescale_weights(
data = head(nhanes_sample),
group = "SDMVPSU",
by = "SDMVPSU",
probability_weights = "WTINT2YR"
)
)
Expand Down

0 comments on commit ad30335

Please sign in to comment.