Skip to content

Commit

Permalink
fix ZI
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed Mar 16, 2024
1 parent 6d7ee1c commit 3575e6a
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 92 deletions.
80 changes: 27 additions & 53 deletions R/check_overdispersion.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,20 @@
#' For Poisson models, the overdispersion test is based on the code from
#' _Gelman and Hill (2007), page 115_.
#'
#' @section Overdispersion in Negative Binomial or Zero-Inflated Models:
#' For negative binomial (mixed) models or models with zero-inflation component,
#' the overdispersion test is based simulated residuals (see [`simulate_residuals()`]).
#'
#' @section Overdispersion in Mixed Models:
#' For `merMod`- and `glmmTMB`-objects, `check_overdispersion()`
#' is based on the code in the
#' [GLMM FAQ](http://bbolker.github.io/mixedmodels-misc/glmmFAQ.html),
#' section *How can I deal with overdispersion in GLMMs?*. Note that this
#' function only returns an *approximate* estimate of an overdispersion
#' parameter, and is probably inaccurate for zero-inflated mixed models (fitted
#' with `glmmTMB`). In such cases, it is recommended to use `simulate_residuals()`
#' first, followed by `check_overdispersion()` to check for overdispersion, e.g.:
#' `check_overdispersion(simulate_residuals(model))`.
#' parameter. Using this approach would be inaccurate for zero-inflated or
#' negative binomial mixed models (fitted with `glmmTMB`), thus, in such cases,
#' the overdispersion test is based on [`simulate_residuals()`] (which is identical
#' to `check_overdispersion(simulate_residuals(model))`).
#'
#' @section How to fix Overdispersion:
#' Overdispersion can be fixed by either modeling the dispersion parameter, or
Expand Down Expand Up @@ -154,30 +158,14 @@ print.check_overdisp <- function(x, digits = 3, ...) {

#' @export
check_overdispersion.glm <- function(x, verbose = TRUE, ...) {
# for warning message
model_name <- insight::safe_deparse(substitute(x))

# check if we have poisson
# model info
info <- insight::model_info(x)
if (!info$is_count && !info$is_binomial) {
insight::format_error(paste0(
"Overdispersion checks can only be used for models from Poisson families or binomial families with trials > 1. ",
"You may try to use `check_overdispersion()` on `simulated_residuals()`, e.g. ",
"`check_overdispersion(simulate_residuals(", model_name, "))`."
))
}

# check for Bernoulli
if (info$is_bernoulli) {
insight::format_error(paste0(
"Overdispersion checks cannot be used for Bernoulli models. ",
"You may try to use `check_overdispersion()` on `simulated_residuals()`, e.g. ",
"`check_overdispersion(simulate_residuals(", model_name, "))`."
))
}
# for certain distributions, simulated residuals are more accurate
use_simulated <- info$is_bernoulli || info$is_binomial || (!info$is_count && !info$is_binomial) || info$is_negbin

if (info$is_binomial) {
return(check_overdispersion.merMod(x, ...))
if (use_simulated) {
return(check_overdispersion(simulate_residuals(x, ...), ...))
}

yhat <- stats::fitted(x)
Expand Down Expand Up @@ -239,42 +227,28 @@ check_overdispersion.model_fit <- check_overdispersion.poissonmfx

#' @export
check_overdispersion.merMod <- function(x, ...) {
# for warning message
model_name <- insight::safe_deparse(substitute(x))

# check if we have poisson or binomial
# for certain distributions, simulated residuals are more accurate
info <- insight::model_info(x)
if (!info$is_count && !info$is_binomial) {
insight::format_error(paste0(
"Overdispersion checks can only be used for models from Poisson families or binomial families with trials > 1. ",
"You may try to use `check_overdispersion()` on `simulated_residuals()`, e.g. ",
"`check_overdispersion(simulate_residuals(", model_name, "))`."
))
}

# check for Bernoulli
if (info$is_bernoulli) {
insight::format_error(paste0(
"Overdispersion checks cannot be used for Bernoulli models. ",
"You may try to use `check_overdispersion()` on `simulated_residuals()`, e.g. ",
"`check_overdispersion(simulate_residuals(", model_name, "))`."
))
# for certain distributions, simulated residuals are more accurate
use_simulated <- info$is_zero_inflated || info$is_bernoulli || info$is_binomial || (!info$is_count && !info$is_binomial) || info$is_negbin

Check warning on line 234 in R/check_overdispersion.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/check_overdispersion.R,line=234,col=121,[line_length_linter] Lines should not be more than 120 characters. This line is 140 characters.

if (use_simulated) {
return(check_overdispersion(simulate_residuals(x, ...), ...))
}

rdf <- stats::df.residual(x)
rp <- insight::get_residuals(x, type = "pearson")

# check if pearson residuals are available
if (insight::is_empty_object(rp)) {
insight::format_error(paste0(
"Cannot test for overdispersion, because pearson residuals are not implemented for models with zero-inflation or variable dispersion. ", # nolint
"You may try to use `check_overdispersion()` on `simulated_residuals()`, e.g. ",
"`check_overdispersion(simulate_residuals(", model_name, "))`."
))
} else {
Pearson.chisq <- sum(rp^2)
prat <- Pearson.chisq / rdf
pval <- stats::pchisq(Pearson.chisq, df = rdf, lower.tail = FALSE)
return(check_overdispersion(simulate_residuals(x, ...), ...))
}

Pearson.chisq <- sum(rp^2)
prat <- Pearson.chisq / rdf
pval <- stats::pchisq(Pearson.chisq, df = rdf, lower.tail = FALSE)

out <- list(
chisq_statistic = Pearson.chisq,
dispersion_ratio = prat,
Expand Down Expand Up @@ -311,7 +285,7 @@ check_overdispersion.performance_simres <- function(x, alternative = c("two.side
result <- .simres_statistics(x, statistic_fun = dispersion, alternative = alternative)

out <- list(
dispersion_ratio = result$observed / mean(result$simulated),
dispersion_ratio = mean(result$simulated) / result$observed,
p_value = result$p
)

Expand Down
33 changes: 6 additions & 27 deletions R/check_zeroinflation.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,9 @@
#' negative binomial or zero-inflated models.
#'
#' In case of negative binomial models, models with zero-inflation component,
#' or hurdle models, the results from `check_zeroinflation()` are likely to be
#' unreliable. In such cases, it is recommended to use `simulate_residuals()`
#' first, followed by `check_zeroinflation()` to check for zero-inflation,
#' e.g.: `check_zeroinflation(simulate_residuals(model))`. Usually, such models
#' are detected automatically and `check_zeroinflation()` internally calls
#' `simulate_residuals()` if necessary.
#' or hurdle models, the results from `check_zeroinflation()` are based on
#' [`simulate_residuals()`], i.e. `check_zeroinflation(simulate_residuals(model))`
#' is internally called if necessary.
#'
#' @family functions to check model assumptions and and assess model quality
#'
Expand Down Expand Up @@ -71,9 +68,9 @@ check_zeroinflation.default <- function(x, tolerance = 0.05, ...) {
return(NULL)
}

# for glmmTMB models with zero-inflation component or nbinom families,
# for models with zero-inflation component or negative binomial families,
# we use simulated_residuals()
if (inherits(x, "glmmTMB") && (model_info$is_zero_inflated || model_info$is_negbin)) {
if (model_info$is_zero_inflated || model_info$is_negbin) {
if (missing(tolerance)) {
tolerance <- 0.1
}
Expand All @@ -82,26 +79,8 @@ check_zeroinflation.default <- function(x, tolerance = 0.05, ...) {

# get predictions of outcome
mu <- stats::fitted(x)

# get overdispersion parameters
if (model_info$is_negbin) {
if (methods::is(x, "glmmTMB")) {
theta <- stats::sigma(x)
} else if (methods::is(x, "glmerMod")) {
theta <- environment(x@resp$family$aic)[[".Theta"]]
} else {
theta <- x$theta
}
} else {
theta <- NULL
}

# get predicted zero-counts
if (is.null(theta)) {
pred.zero <- round(sum(stats::dpois(x = 0, lambda = mu)))
} else {
pred.zero <- round(sum(stats::dnbinom(x = 0, size = theta, mu = mu)))
}
pred.zero <- round(sum(stats::dpois(x = 0, lambda = mu)))

# proportion
structure(
Expand Down
14 changes: 10 additions & 4 deletions man/check_overdispersion.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 3 additions & 6 deletions man/check_zeroinflation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

64 changes: 64 additions & 0 deletions tests/testthat/test-check_overdispersion.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,67 @@ test_that("check_overdispersion", {
tolerance = 1e-3
)
})


test_that("check_overdispersion, zero-inflated and negbin", {
skip_if_not_installed("glmmTMB")
skip_if_not_installed("DHARMa")
skip_if_not(getRversion() >= "4.0.0")
data(Salamanders, package = "glmmTMB")

m1 <- glmmTMB::glmmTMB(
count ~ spp + mined,
ziformula = ~ spp + mined,
family = poisson,
data = Salamanders
)
m2 <- glmmTMB::glmmTMB(
count ~ spp + mined,
family = poisson,
data = Salamanders
)
m3 <- glmmTMB::glmmTMB(
count ~ spp + mined,
family = glmmTMB::nbinom1(),
data = Salamanders
)
expect_equal(
check_overdispersion(m1),
structure(
list(
dispersion_ratio = 0.504903379544268,
p_value = 0
),
class = c("check_overdisp", "see_check_overdisp")
),
tolerance = 1e-4,
ignore_attr = TRUE
)
expect_equal(
check_overdispersion(m2),
structure(
list(
chisq_statistic = 1873.7105986433,
dispersion_ratio = 2.94608584692342,
residual_df = 636L,
p_value = 3.26556213101505e-122
),
class = c("check_overdisp", "see_check_overdisp"),
object_name = "m1"
),
tolerance = 1e-4,
ignore_attr = TRUE
)
expect_equal(
check_overdispersion(m1),
structure(
list(
dispersion_ratio = 0.504903379544268,
p_value = 0
),
class = c("check_overdisp", "see_check_overdisp")
),
tolerance = 1e-4,
ignore_attr = TRUE
)
})
7 changes: 5 additions & 2 deletions tests/testthat/test-check_zeroinflation.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,11 @@ test_that("check_zeroinflation, glmer.nb", {
check_zeroinflation(m),
structure(
list(
predicted.zeros = 153, observed.zeros = 155L,
ratio = 0.987096774193548, tolerance = 0.05
predicted.zeros = 153,
observed.zeros = 155L,
ratio = 0.987329032258065,
tolerance = 0.1,
p.value = 0.944
),
class = "check_zi"
),
Expand Down

0 comments on commit 3575e6a

Please sign in to comment.