From 2a5db3ec2efa4ed148c042184ca0869cffe905d8 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 4 Jan 2024 12:27:52 +0100 Subject: [PATCH] pool_parameters df_error Infinity (#935) Fixes #934 --- R/pool_parameters.R | 40 ++++++++++++++++++--------- man/pool_parameters.Rd | 40 +++++++++++++++++++-------- tests/testthat/test-pool_parameters.R | 18 ++++++++++++ 3 files changed, 73 insertions(+), 25 deletions(-) diff --git a/R/pool_parameters.R b/R/pool_parameters.R index 8e278ddf6..f985a84af 100644 --- a/R/pool_parameters.R +++ b/R/pool_parameters.R @@ -8,7 +8,9 @@ #' @param x A list of `parameters_model` objects, as returned by #' [model_parameters()], or a list of model-objects that is supported by #' `model_parameters()`. -#' @param ... Currently not used. +#' @param ... Arguments passed down to `model_parameters()`, if `x` is a list +#' of model-objects. Can be used, for instance, to specify arguments like +#' `ci` or `ci_method` etc. #' @inheritParams model_parameters.default #' @inheritParams bootstrap_model #' @inheritParams model_parameters.merMod @@ -35,20 +37,32 @@ #' multiple imputation. Biometrika, 86, 948-955. Rubin, D.B. (1987). Multiple #' Imputation for Nonresponse in Surveys. New York: John Wiley and Sons. #' -#' @examples +#' @examplesIf require("mice") && require("datawizard") #' # example for multiple imputed datasets -#' if (require("mice")) { -#' data("nhanes2") -#' imp <- mice(nhanes2, printFlag = FALSE) -#' models <- lapply(1:5, function(i) { -#' lm(bmi ~ age + hyp + chl, data = complete(imp, action = i)) -#' }) -#' pool_parameters(models) +#' data("nhanes2", package = "mice") +#' imp <- mice::mice(nhanes2, printFlag = FALSE) +#' models <- lapply(1:5, function(i) { +#' lm(bmi ~ age + hyp + chl, data = mice::complete(imp, action = i)) +#' }) +#' pool_parameters(models) #' -#' # should be identical to: -#' m <- with(data = imp, exp = lm(bmi ~ age + hyp + chl)) -#' summary(pool(m)) -#' } +#' # should be identical to: +#' m <- with(data = imp, exp = lm(bmi ~ age + hyp + chl)) +#' summary(mice::pool(m)) +#' +#' # For glm, mice used residual df, while `pool_parameters()` uses `Inf` +#' nhanes2$hyp <- datawizard::slide(as.numeric(nhanes2$hyp)) +#' imp <- mice::mice(nhanes2, printFlag = FALSE) +#' models <- lapply(1:5, function(i) { +#' glm(hyp ~ age + chl, family = binomial, data = mice::complete(imp, action = i)) +#' }) +#' m <- with(data = imp, exp = glm(hyp ~ age + chl, family = binomial)) +#' # residual df +#' summary(mice::pool(m))$df +#' # df = Inf +#' pool_parameters(models)$df_error +#' # use residual df instead +#' pool_parameters(models, ci_method = "residual")$df_error #' @return A data frame of indices related to the model's parameters. #' @export pool_parameters <- function(x, diff --git a/man/pool_parameters.Rd b/man/pool_parameters.Rd index 87d6a9c42..d677cd968 100644 --- a/man/pool_parameters.Rd +++ b/man/pool_parameters.Rd @@ -44,7 +44,9 @@ may be one of \code{"conditional"}, \code{"zi"}, \code{"zero-inflated"}, \code{" \item{verbose}{Toggle warnings and messages.} -\item{...}{Currently not used.} +\item{...}{Arguments passed down to \code{model_parameters()}, if \code{x} is a list +of model-objects. Can be used, for instance, to specify arguments like +\code{ci} or \code{ci_method} etc.} } \value{ A data frame of indices related to the model's parameters. @@ -74,19 +76,33 @@ Some model objects do not return standard errors (e.g. objects of class are returned. } \examples{ +\dontshow{if (require("mice") && require("datawizard")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # example for multiple imputed datasets -if (require("mice")) { - data("nhanes2") - imp <- mice(nhanes2, printFlag = FALSE) - models <- lapply(1:5, function(i) { - lm(bmi ~ age + hyp + chl, data = complete(imp, action = i)) - }) - pool_parameters(models) +data("nhanes2", package = "mice") +imp <- mice::mice(nhanes2, printFlag = FALSE) +models <- lapply(1:5, function(i) { + lm(bmi ~ age + hyp + chl, data = mice::complete(imp, action = i)) +}) +pool_parameters(models) - # should be identical to: - m <- with(data = imp, exp = lm(bmi ~ age + hyp + chl)) - summary(pool(m)) -} +# should be identical to: +m <- with(data = imp, exp = lm(bmi ~ age + hyp + chl)) +summary(mice::pool(m)) + +# For glm, mice used residual df, while `pool_parameters()` uses `Inf` +nhanes2$hyp <- datawizard::slide(as.numeric(nhanes2$hyp)) +imp <- mice::mice(nhanes2, printFlag = FALSE) +models <- lapply(1:5, function(i) { + glm(hyp ~ age + chl, family = binomial, data = mice::complete(imp, action = i)) +}) +m <- with(data = imp, exp = glm(hyp ~ age + chl, family = binomial)) +# residual df +summary(mice::pool(m))$df +# df = Inf +pool_parameters(models)$df_error +# use residual df instead +pool_parameters(models, ci_method = "residual")$df_error +\dontshow{\}) # examplesIf} } \references{ Barnard, J. and Rubin, D.B. (1999). Small sample degrees of freedom with diff --git a/tests/testthat/test-pool_parameters.R b/tests/testthat/test-pool_parameters.R index e08a76b56..0feb24009 100644 --- a/tests/testthat/test-pool_parameters.R +++ b/tests/testthat/test-pool_parameters.R @@ -10,3 +10,21 @@ test_that("pooled parameters", { expect_equal(pp$df_error, c(9.2225, 8.1903, 3.6727, 10.264, 6.4385), tolerance = 1e-3) expect_snapshot(print(pp)) }) + +test_that("pooled parameters", { + skip_if_not_installed("mice") + skip_if_not_installed("datawizard") + data("nhanes2", package = "mice") + nhanes2$hyp <- datawizard::slide(as.numeric(nhanes2$hyp)) + set.seed(123) + imp <- mice::mice(nhanes2, printFlag = FALSE) + models <- lapply(1:5, function(i) { + glm(hyp ~ age + chl, family = binomial, data = mice::complete(imp, action = i)) + }) + pp1 <- pool_parameters(models) + expect_equal(pp1$df_error, c(Inf, Inf, Inf, Inf), tolerance = 1e-3) + pp2 <- pool_parameters(models, ci_method = "residual") + m_mice <- with(data = imp, exp = glm(hyp ~ age + chl, family = binomial)) + pp3 <- summary(mice::pool(m_mice)) + expect_equal(pp2$df_error, pp3$df, tolerance = 1e-3) +})