Skip to content

Commit

Permalink
check_overdispersion method
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed Mar 15, 2024
1 parent 5842c15 commit 3d0663e
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 43 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ S3method(check_outliers,rma.uni)
S3method(check_outliers,rq)
S3method(check_outliers,rqs)
S3method(check_outliers,rqss)
S3method(check_overdispersion,DHARMa)
S3method(check_overdispersion,default)
S3method(check_overdispersion,fixest)
S3method(check_overdispersion,fixest_multi)
Expand All @@ -103,6 +104,7 @@ S3method(check_overdispersion,model_fit)
S3method(check_overdispersion,negbin)
S3method(check_overdispersion,negbinirr)
S3method(check_overdispersion,negbinmfx)
S3method(check_overdispersion,performance_simres)
S3method(check_overdispersion,poissonirr)
S3method(check_overdispersion,poissonmfx)
S3method(check_predictions,BFBayesFactor)
Expand Down
100 changes: 75 additions & 25 deletions R/check_overdispersion.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#' models for overdispersion.
#'
#' @param x Fitted model of class `merMod`, `glmmTMB`, `glm`, or `glm.nb`
#' (package **MASS**).
#' (package **MASS**), or an object returned by `simulate_residuals()`.
#' @param ... Currently not used.
#'
#' @return A list with results from the overdispersion test, like chi-squared
Expand Down Expand Up @@ -113,7 +113,9 @@ print.check_overdisp <- function(x, digits = 3, ...) {
orig_x <- x

x$dispersion_ratio <- sprintf("%.*f", digits, x$dispersion_ratio)
x$chisq_statistic <- sprintf("%.*f", digits, x$chisq_statistic)
if (!is.null(x$chisq_statistic)) {
x$chisq_statistic <- sprintf("%.*f", digits, x$chisq_statistic)
}

x$p_value <- pval <- round(x$p_value, digits = digits)
if (x$p_value < 0.001) x$p_value <- "< 0.001"
Expand All @@ -125,9 +127,14 @@ print.check_overdisp <- function(x, digits = 3, ...) {
)

insight::print_color("# Overdispersion test\n\n", "blue")
cat(sprintf(" dispersion ratio = %s\n", format(x$dispersion_ratio, justify = "right", width = maxlen)))
cat(sprintf(" Pearson's Chi-Squared = %s\n", format(x$chisq_statistic, justify = "right", width = maxlen)))
cat(sprintf(" p-value = %s\n\n", format(x$p_value, justify = "right", width = maxlen)))
if (is.null(x$chisq_statistic)) {
cat(sprintf(" dispersion ratio = %s\n", format(x$dispersion_ratio, justify = "right", width = maxlen)))
cat(sprintf(" p-value = %s\n\n", format(x$p_value, justify = "right", width = maxlen)))
} else {
cat(sprintf(" dispersion ratio = %s\n", format(x$dispersion_ratio, justify = "right", width = maxlen)))
cat(sprintf(" Pearson's Chi-Squared = %s\n", format(x$chisq_statistic, justify = "right", width = maxlen)))
cat(sprintf(" p-value = %s\n\n", format(x$p_value, justify = "right", width = maxlen)))
}

if (pval > 0.05) {
message("No overdispersion detected.")
Expand All @@ -147,18 +154,24 @@ check_overdispersion.glm <- function(x, verbose = TRUE, ...) {
# check if we have poisson
info <- insight::model_info(x)
if (!info$is_count && !info$is_binomial) {
insight::format_error(
"Overdispersion checks can only be used for models from Poisson families or binomial families with trials > 1."
)
insight::format_error(paste0(
"Overdispersion checks can only be used for models from Poisson families or binomial families with trials > 1. ",
"You may try to use `check_overdispersion()` on `simulated_residuals()`, e.g. ",
"`check_overdispersion(simulate_residuals(", model_name, "))`."
))
}

# check for Bernoulli
if (info$is_bernoulli) {
insight::format_error("Overdispersion checks cannot be used for Bernoulli models.")
insight::format_error(paste0(
"Overdispersion checks cannot be used for Bernoulli models. ",
"You may try to use `check_overdispersion()` on `simulated_residuals()`, e.g. ",
"`check_overdispersion(simulate_residuals(", model_name, "))`."
))
}

if (info$is_binomial) {
return(check_overdispersion.merMod(x, verbose = verbose, ...))
return(check_overdispersion.merMod(x, ...))
}

yhat <- stats::fitted(x)
Expand Down Expand Up @@ -219,33 +232,37 @@ check_overdispersion.model_fit <- check_overdispersion.poissonmfx
# Overdispersion for mixed models ---------------------------

#' @export
check_overdispersion.merMod <- function(x, verbose = TRUE, ...) {
check_overdispersion.merMod <- function(x, ...) {
# for warning message
model_name <- insight::safe_deparse(substitute(x))

# check if we have poisson or binomial
info <- insight::model_info(x)
if (!info$is_count && !info$is_binomial) {
insight::format_error(
"Overdispersion checks can only be used for models from Poisson families or binomial families with trials > 1."
)
insight::format_error(paste0(
"Overdispersion checks can only be used for models from Poisson families or binomial families with trials > 1. ",
"You may try to use `check_overdispersion()` on `simulated_residuals()`, e.g. ",
"`check_overdispersion(simulate_residuals(", model_name, "))`."
))
}

# check for Bernoulli
if (info$is_bernoulli) {
insight::format_error("Overdispersion checks cannot be used for Bernoulli models.")
insight::format_error(paste0(
"Overdispersion checks cannot be used for Bernoulli models. ",
"You may try to use `check_overdispersion()` on `simulated_residuals()`, e.g. ",
"`check_overdispersion(simulate_residuals(", model_name, "))`."
))
}

rdf <- stats::df.residual(x)
rp <- insight::get_residuals(x, type = "pearson")
if (insight::is_empty_object(rp)) {
Pearson.chisq <- NA
prat <- NA
pval <- NA
rp <- NA
if (isTRUE(verbose)) {
insight::format_alert(
"Cannot test for overdispersion, because pearson residuals are not implemented for models with zero-inflation or variable dispersion.",
"Only the visual inspection using `plot(check_overdispersion(model))` is possible."
)
}
insight::format_error(paste0(
"Cannot test for overdispersion, because pearson residuals are not implemented for models with zero-inflation or variable dispersion. ", # nolint
"You may try to use `check_overdispersion()` on `simulated_residuals()`, e.g. ",
"`check_overdispersion(simulate_residuals(", model_name, "))`."
))
} else {
Pearson.chisq <- sum(rp^2)
prat <- Pearson.chisq / rdf
Expand All @@ -270,3 +287,36 @@ check_overdispersion.negbin <- check_overdispersion.merMod

#' @export
check_overdispersion.glmmTMB <- check_overdispersion.merMod


# simulated residuals -----------------------------

#' @rdname check_overdispersion
#' @export
check_overdispersion.performance_simres <- function(x,
tolerance = 0.1,
alternative = c("two.sided", "less", "greater"),
...) {
# match arguments
alternative <- match.arg(alternative)

# statistics function
variance <- stats::sd(x$simulatedResponse)^2
dispersion <- function(i) var(i - x$fittedPredictedResponse) / variance

# compute test results
.simres_statistics(x, statistic_fun = dispersion, alternative = alternative)

out <- list(
dispersion_ratio = .simres_statistics$observed / mean(.simres_statistics$simulated),
p_value = .simres_statistics$p
)

class(out) <- c("check_overdisp", "see_check_overdisp")
attr(out, "object_name") <- insight::safe_deparse_symbol(substitute(x))

out
}

#' @export
check_overdispersion.DHARMa <- check_overdispersion.performance_simres
24 changes: 7 additions & 17 deletions R/check_zeroinflation.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,6 @@ check_zeroinflation.default <- function(x, tolerance = 0.05, ...) {
insight::format_error("Model must be from Poisson-family.")
}

# for warning message
model_name <- insight::safe_deparse(substitute(x))

# get actual zero of response
obs.zero <- sum(insight::get_response(x, verbose = FALSE) == 0L)

Expand Down Expand Up @@ -127,25 +124,18 @@ check_zeroinflation.performance_simres <- function(x,
...) {
# match arguments
alternative <- match.arg(alternative)
# count observed and simulated zeros
observed <- sum(x$observedResponse == 0)
simulated <- apply(x$simulatedResponse, 2, function(i) sum(i == 0))
# p is simply ratio of simulated zeros to observed zeros
p <- switch(
alternative,
greater = mean(simulated >= observed),
less = mean(simulated <= observed),
min(min(mean(simulated <= observed), mean(simulated >= observed)) * 2, 1)
)

# compute test results
.simres_statistics(x, statistic_fun = function(i) sum(i == 0), alternative = alternative)

structure(
class = "check_zi",
list(
predicted.zeros = round(mean(simulated)),
observed.zeros = observed,
ratio = mean(simulated) / observed,
predicted.zeros = round(mean(.simres_statistics$simulated)),
observed.zeros = .simres_statistics$observed,
ratio = mean(.simres_statistics$simulated) / .simres_statistics$observed,
tolerance = tolerance,
p.value = p
p.value = .simres_statistics$p
)
)
}
Expand Down
17 changes: 17 additions & 0 deletions R/simulate_residuals.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ simulate_residuals <- function(x, iterations = 250, ...) {
out
}


# methods ------------------------------

#' @export
Expand All @@ -72,3 +73,19 @@ plot.performance_simres <- function(x, ...) {
insight::check_if_installed("see", "for residual plots")
NextMethod()
}


# helper functions ---------------------

.simres_statistics <- function(x, statistic_fun, alternative = "two.sided") {
# count observed and simulated zeros
observed <- statistic_fun(x$observedResponse)
simulated <- apply(x$simulatedResponse, 2, statistic_fun)
# p is simply ratio of simulated zeros to observed zeros
p <- switch(alternative,
greater = mean(simulated >= observed),
less = mean(simulated <= observed),
min(min(mean(simulated <= observed), mean(simulated >= observed)) * 2, 1)
)
list(observed = observed, simulated = simulated, p = p)
}
10 changes: 9 additions & 1 deletion man/check_overdispersion.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 3d0663e

Please sign in to comment.