fix ZI

easystats · Mar 16, 2024 · 3575e6a · 3575e6a
1 parent 6d7ee1c
commit 3575e6a
Show file tree

Hide file tree

Showing 6 changed files with 115 additions and 92 deletions.
diff --git a/R/check_overdispersion.R b/R/check_overdispersion.R
@@ -27,16 +27,20 @@
 #' For Poisson models, the overdispersion test is based on the code from
 #' _Gelman and Hill (2007), page 115_.
 #'
+#' @section Overdispersion in Negative Binomial or Zero-Inflated Models:
+#' For negative binomial (mixed) models or models with zero-inflation component,
+#' the overdispersion test is based simulated residuals (see [`simulate_residuals()`]).
+#'
 #' @section Overdispersion in Mixed Models:
 #' For `merMod`- and `glmmTMB`-objects, `check_overdispersion()`
 #' is based on the code in the
 #' [GLMM FAQ](http://bbolker.github.io/mixedmodels-misc/glmmFAQ.html),
 #' section *How can I deal with overdispersion in GLMMs?*. Note that this
 #' function only returns an *approximate* estimate of an overdispersion
-#' parameter, and is probably inaccurate for zero-inflated mixed models (fitted
-#' with `glmmTMB`). In such cases, it is recommended to use `simulate_residuals()`
-#' first, followed by `check_overdispersion()` to check for overdispersion, e.g.:
-#' `check_overdispersion(simulate_residuals(model))`.
+#' parameter. Using this approach would be inaccurate for zero-inflated or
+#' negative binomial mixed models (fitted with `glmmTMB`), thus, in such cases,
+#' the overdispersion test is based on [`simulate_residuals()`] (which is identical
+#' to `check_overdispersion(simulate_residuals(model))`).
 #'
 #' @section How to fix Overdispersion:
 #' Overdispersion can be fixed by either modeling the dispersion parameter, or
@@ -154,30 +158,14 @@ print.check_overdisp <- function(x, digits = 3, ...) {
 
 #' @export
 check_overdispersion.glm <- function(x, verbose = TRUE, ...) {
-  # for warning message
-  model_name <- insight::safe_deparse(substitute(x))
-
-  # check if we have poisson
+  # model info
   info <- insight::model_info(x)
-  if (!info$is_count && !info$is_binomial) {
-    insight::format_error(paste0(
-      "Overdispersion checks can only be used for models from Poisson families or binomial families with trials > 1. ",
-      "You may try to use `check_overdispersion()` on `simulated_residuals()`, e.g. ",
-      "`check_overdispersion(simulate_residuals(", model_name, "))`."
-    ))
-  }
 
-  # check for Bernoulli
-  if (info$is_bernoulli) {
-    insight::format_error(paste0(
-      "Overdispersion checks cannot be used for Bernoulli models. ",
-      "You may try to use `check_overdispersion()` on `simulated_residuals()`, e.g. ",
-      "`check_overdispersion(simulate_residuals(", model_name, "))`."
-    ))
-  }
+  # for certain distributions, simulated residuals are more accurate
+  use_simulated <- info$is_bernoulli || info$is_binomial || (!info$is_count && !info$is_binomial) || info$is_negbin
 
-  if (info$is_binomial) {
-    return(check_overdispersion.merMod(x, ...))
+  if (use_simulated) {
+    return(check_overdispersion(simulate_residuals(x, ...), ...))
   }
 
   yhat <- stats::fitted(x)
@@ -239,42 +227,28 @@ check_overdispersion.model_fit <- check_overdispersion.poissonmfx
 
 #' @export
 check_overdispersion.merMod <- function(x, ...) {
-  # for warning message
-  model_name <- insight::safe_deparse(substitute(x))
-
-  # check if we have poisson or binomial
+  # for certain distributions, simulated residuals are more accurate
   info <- insight::model_info(x)
-  if (!info$is_count && !info$is_binomial) {
-    insight::format_error(paste0(
-      "Overdispersion checks can only be used for models from Poisson families or binomial families with trials > 1. ",
-      "You may try to use `check_overdispersion()` on `simulated_residuals()`, e.g. ",
-      "`check_overdispersion(simulate_residuals(", model_name, "))`."
-    ))
-  }
 
-  # check for Bernoulli
-  if (info$is_bernoulli) {
-    insight::format_error(paste0(
-      "Overdispersion checks cannot be used for Bernoulli models. ",
-      "You may try to use `check_overdispersion()` on `simulated_residuals()`, e.g. ",
-      "`check_overdispersion(simulate_residuals(", model_name, "))`."
-    ))
+  # for certain distributions, simulated residuals are more accurate
+  use_simulated <- info$is_zero_inflated || info$is_bernoulli || info$is_binomial || (!info$is_count && !info$is_binomial) || info$is_negbin
+
+  if (use_simulated) {
+    return(check_overdispersion(simulate_residuals(x, ...), ...))
   }
 
   rdf <- stats::df.residual(x)
   rp <- insight::get_residuals(x, type = "pearson")
+
+  # check if pearson residuals are available
   if (insight::is_empty_object(rp)) {
-    insight::format_error(paste0(
-      "Cannot test for overdispersion, because pearson residuals are not implemented for models with zero-inflation or variable dispersion. ", # nolint
-      "You may try to use `check_overdispersion()` on `simulated_residuals()`, e.g. ",
-      "`check_overdispersion(simulate_residuals(", model_name, "))`."
-    ))
-  } else {
-    Pearson.chisq <- sum(rp^2)
-    prat <- Pearson.chisq / rdf
-    pval <- stats::pchisq(Pearson.chisq, df = rdf, lower.tail = FALSE)
+    return(check_overdispersion(simulate_residuals(x, ...), ...))
   }
 
+  Pearson.chisq <- sum(rp^2)
+  prat <- Pearson.chisq / rdf
+  pval <- stats::pchisq(Pearson.chisq, df = rdf, lower.tail = FALSE)
+
   out <- list(
     chisq_statistic = Pearson.chisq,
     dispersion_ratio = prat,
@@ -311,7 +285,7 @@ check_overdispersion.performance_simres <- function(x, alternative = c("two.side
   result <- .simres_statistics(x, statistic_fun = dispersion, alternative = alternative)
 
   out <- list(
-    dispersion_ratio = result$observed / mean(result$simulated),
+    dispersion_ratio = mean(result$simulated) / result$observed,
     p_value = result$p
   )
 

diff --git a/R/check_zeroinflation.R b/R/check_zeroinflation.R
@@ -24,12 +24,9 @@
 #' negative binomial or zero-inflated models.
 #'
 #' In case of negative binomial models, models with zero-inflation component,
-#' or hurdle models, the results from `check_zeroinflation()` are likely to be
-#' unreliable. In such cases, it is recommended to use `simulate_residuals()`
-#' first, followed by `check_zeroinflation()` to check for zero-inflation,
-#' e.g.: `check_zeroinflation(simulate_residuals(model))`. Usually, such models
-#' are detected automatically and `check_zeroinflation()` internally calls
-#' `simulate_residuals()` if necessary.
+#' or hurdle models, the results from `check_zeroinflation()` are based on
+#' [`simulate_residuals()`], i.e. `check_zeroinflation(simulate_residuals(model))`
+#' is internally called if necessary.
 #'
 #' @family functions to check model assumptions and and assess model quality
 #'
@@ -71,9 +68,9 @@ check_zeroinflation.default <- function(x, tolerance = 0.05, ...) {
     return(NULL)
   }
 
-  # for glmmTMB models with zero-inflation component or nbinom families,
+  # for models with zero-inflation component or negative binomial families,
   # we use simulated_residuals()
-  if (inherits(x, "glmmTMB") && (model_info$is_zero_inflated || model_info$is_negbin)) {
+  if (model_info$is_zero_inflated || model_info$is_negbin) {
     if (missing(tolerance)) {
       tolerance <- 0.1
     }
@@ -82,26 +79,8 @@ check_zeroinflation.default <- function(x, tolerance = 0.05, ...) {
 
   # get predictions of outcome
   mu <- stats::fitted(x)
-
-  # get overdispersion parameters
-  if (model_info$is_negbin) {
-    if (methods::is(x, "glmmTMB")) {
-      theta <- stats::sigma(x)
-    } else if (methods::is(x, "glmerMod")) {
-      theta <- environment(x@resp$family$aic)[[".Theta"]]
-    } else {
-      theta <- x$theta
-    }
-  } else {
-    theta <- NULL
-  }
-
   # get predicted zero-counts
-  if (is.null(theta)) {
-    pred.zero <- round(sum(stats::dpois(x = 0, lambda = mu)))
-  } else {
-    pred.zero <- round(sum(stats::dnbinom(x = 0, size = theta, mu = mu)))
-  }
+  pred.zero <- round(sum(stats::dpois(x = 0, lambda = mu)))
 
   # proportion
   structure(

diff --git a/man/check_overdispersion.Rd b/man/check_overdispersion.Rd
diff --git a/man/check_zeroinflation.Rd b/man/check_zeroinflation.Rd
diff --git a/tests/testthat/test-check_overdispersion.R b/tests/testthat/test-check_overdispersion.R
@@ -45,3 +45,67 @@ test_that("check_overdispersion", {
     tolerance = 1e-3
   )
 })
+
+
+test_that("check_overdispersion, zero-inflated and negbin", {
+  skip_if_not_installed("glmmTMB")
+  skip_if_not_installed("DHARMa")
+  skip_if_not(getRversion() >= "4.0.0")
+  data(Salamanders, package = "glmmTMB")
+
+  m1 <- glmmTMB::glmmTMB(
+    count ~ spp + mined,
+    ziformula = ~ spp + mined,
+    family = poisson,
+    data = Salamanders
+  )
+  m2 <- glmmTMB::glmmTMB(
+    count ~ spp + mined,
+    family = poisson,
+    data = Salamanders
+  )
+  m3 <- glmmTMB::glmmTMB(
+    count ~ spp + mined,
+    family = glmmTMB::nbinom1(),
+    data = Salamanders
+  )
+  expect_equal(
+    check_overdispersion(m1),
+    structure(
+      list(
+        dispersion_ratio = 0.504903379544268,
+        p_value = 0
+      ),
+      class = c("check_overdisp", "see_check_overdisp")
+    ),
+    tolerance = 1e-4,
+    ignore_attr = TRUE
+  )
+  expect_equal(
+    check_overdispersion(m2),
+    structure(
+      list(
+        chisq_statistic = 1873.7105986433,
+        dispersion_ratio = 2.94608584692342,
+        residual_df = 636L,
+        p_value = 3.26556213101505e-122
+      ),
+      class = c("check_overdisp", "see_check_overdisp"),
+      object_name = "m1"
+    ),
+    tolerance = 1e-4,
+    ignore_attr = TRUE
+  )
+  expect_equal(
+    check_overdispersion(m1),
+    structure(
+      list(
+        dispersion_ratio = 0.504903379544268,
+        p_value = 0
+      ),
+      class = c("check_overdisp", "see_check_overdisp")
+    ),
+    tolerance = 1e-4,
+    ignore_attr = TRUE
+  )
+})
diff --git a/tests/testthat/test-check_zeroinflation.R b/tests/testthat/test-check_zeroinflation.R
@@ -93,8 +93,11 @@ test_that("check_zeroinflation, glmer.nb", {
     check_zeroinflation(m),
     structure(
       list(
-        predicted.zeros = 153, observed.zeros = 155L,
-        ratio = 0.987096774193548, tolerance = 0.05
+        predicted.zeros = 153,
+        observed.zeros = 155L,
+        ratio = 0.987329032258065,
+        tolerance = 0.1,
+        p.value = 0.944
       ),
       class = "check_zi"
     ),