diff --git a/DESCRIPTION b/DESCRIPTION index 2dcc2ae85..0f23a585c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.10.9.9 +Version: 0.10.9.10 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/R/check_model.R b/R/check_model.R index 6be17be22..f75a103a8 100644 --- a/R/check_model.R +++ b/R/check_model.R @@ -217,9 +217,9 @@ check_model.default <- function(x, if (minfo$is_bayesian) { suppressWarnings(.check_assumptions_stan(x, ...)) } else if (minfo$is_linear) { - suppressWarnings(.check_assumptions_linear(x, minfo, residual_type, verbose, ...)) + suppressWarnings(.check_assumptions_linear(x, minfo, check, residual_type, verbose, ...)) } else { - suppressWarnings(.check_assumptions_glm(x, minfo, residual_type, verbose, ...)) + suppressWarnings(.check_assumptions_glm(x, minfo, check, residual_type, verbose, ...)) }, error = function(e) { e @@ -236,6 +236,15 @@ check_model.default <- function(x, ) } + # did Q-Q plot work with simulated residuals? + if (verbose && is.null(assumptions_data$QQ) && residual_type == "simulated") { + insight::format_warning(paste0( + "Cannot simulate residuals for models of class `", + class(x)[1], + "`. Please try `check_model(..., residual_type = \"normal\")` instead." + )) + } + # try to find sensible default for "type" argument suggest_dots <- (minfo$is_bernoulli || minfo$is_count || minfo$is_ordinal || minfo$is_categorical || minfo$is_multinomial) # nolint if (missing(type) && suggest_dots) { @@ -411,26 +420,57 @@ check_model.DHARMa <- check_model.performance_simres # compile plots for checks of linear models ------------------------ -.check_assumptions_linear <- function(model, model_info, residual_type = "normal", verbose = TRUE, ...) { +.check_assumptions_linear <- function(model, model_info, check = "all", residual_type = "normal", verbose = TRUE, ...) { dat <- list() - dat$VIF <- .diag_vif(model, verbose = verbose) - dat$QQ <- switch(residual_type, - simulated = simulate_residuals(model, ...), - .diag_qq(model, model_info = model_info, verbose = verbose) - ) - dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose) - dat$NORM <- .diag_norm(model, verbose = verbose) - dat$NCV <- .diag_ncv(model, verbose = verbose) - dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose) - dat$OUTLIERS <- .safe(check_outliers(model, method = "cook")) - if (is.null(dat$OUTLIERS)) { - threshold <- NULL - } else { - threshold <- attributes(dat$OUTLIERS)$threshold$cook + # multicollinearity -------------- + if (any(c("all", "vif") %in% check)) { + dat$VIF <- .diag_vif(model, verbose = verbose) + } + + # Q-Q plot (normality/uniformity of residuals) -------------- + if (any(c("all", "qq") %in% check)) { + dat$QQ <- switch(residual_type, + simulated = .safe(simulate_residuals(model, ...)), + .diag_qq(model, model_info = model_info, verbose = verbose) + ) + } + + # Random Effects Q-Q plot (normality of BLUPs) -------------- + if (any(c("all", "reqq") %in% check)) { + dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose) + } + + # normal-curve plot (normality of residuals) -------------- + if (any(c("all", "normality") %in% check)) { + dat$NORM <- .diag_norm(model, verbose = verbose) + } + + # non-constant variance (heteroskedasticity, liniearity) -------------- + if (any(c("all", "ncv", "linearity") %in% check)) { + dat$NCV <- .diag_ncv(model, verbose = verbose) + } + + # homogeneity of variance -------------- + if (any(c("all", "homogeneity") %in% check)) { + dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose) + } + + # outliers -------------- + if (any(c("all", "outliers") %in% check)) { + dat$OUTLIERS <- .safe(check_outliers(model, method = "cook")) + if (is.null(dat$OUTLIERS)) { + threshold <- NULL + } else { + threshold <- attributes(dat$OUTLIERS)$threshold$cook + } + dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold) + } + + # posterior predictive checks -------------- + if (any(c("all", "pp_check") %in% check)) { + dat$PP_CHECK <- .safe(check_predictions(model, ...)) } - dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold) - dat$PP_CHECK <- .safe(check_predictions(model, ...)) dat <- insight::compact_list(dat) class(dat) <- c("check_model", "see_check_model") @@ -441,28 +481,55 @@ check_model.DHARMa <- check_model.performance_simres # compile plots for checks of generalized linear models ------------------------ -.check_assumptions_glm <- function(model, model_info, residual_type = "simulated", verbose = TRUE, ...) { +.check_assumptions_glm <- function(model, model_info, check = "all", residual_type = "simulated", verbose = TRUE, ...) { dat <- list() - dat$VIF <- .diag_vif(model, verbose = verbose) - dat$QQ <- switch(residual_type, - simulated = simulate_residuals(model, ...), - .diag_qq(model, model_info = model_info, verbose = verbose) - ) - dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose) - dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose) - dat$OUTLIERS <- .safe(check_outliers(model, method = "cook")) - if (is.null(dat$OUTLIERS)) { - threshold <- NULL - } else { - threshold <- attributes(dat$OUTLIERS)$threshold$cook + # multicollinearity -------------- + if (any(c("all", "vif") %in% check)) { + dat$VIF <- .diag_vif(model, verbose = verbose) + } + + # Q-Q plot (normality/uniformity of residuals) -------------- + if (any(c("all", "qq") %in% check)) { + dat$QQ <- switch(residual_type, + simulated = .safe(simulate_residuals(model, ...)), + .diag_qq(model, model_info = model_info, verbose = verbose) + ) + } + + # homogeneity of variance -------------- + if (any(c("all", "homogeneity") %in% check)) { + dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose) + } + + # Random Effects Q-Q plot (normality of BLUPs) -------------- + if (any(c("all", "reqq") %in% check)) { + dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose) + } + + # outliers -------------- + if (any(c("all", "outliers") %in% check)) { + dat$OUTLIERS <- .safe(check_outliers(model, method = "cook")) + if (is.null(dat$OUTLIERS)) { + threshold <- NULL + } else { + threshold <- attributes(dat$OUTLIERS)$threshold$cook + } + dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold) } - dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold) - dat$PP_CHECK <- .safe(check_predictions(model, ...)) - if (isTRUE(model_info$is_binomial)) { + + # posterior predictive checks -------------- + if (any(c("all", "pp_check") %in% check)) { + dat$PP_CHECK <- .safe(check_predictions(model, ...)) + } + + # binned residuals for bernoulli/binomial -------------- + if (isTRUE(model_info$is_binomial) && any(c("all", "binned_residuals") %in% check)) { dat$BINNED_RESID <- .safe(binned_residuals(model, verbose = verbose, ...)) } - if (isTRUE(model_info$is_count)) { + + # misspecified dispersion and zero-inflation -------------- + if (isTRUE(model_info$is_count) && any(c("all", "overdispersion") %in% check)) { dat$OVERDISPERSION <- .diag_overdispersion(model) } diff --git a/R/check_zeroinflation.R b/R/check_zeroinflation.R index 63badc5d4..aaa5e7641 100644 --- a/R/check_zeroinflation.R +++ b/R/check_zeroinflation.R @@ -30,18 +30,19 @@ #' #' @section Tests based on simulated residuals: #' For certain models, resp. model from certain families, tests are based on -#' [`simulated_residuals()`]. These are usually more accurate for tests than the -#' traditionally used Pearson residuals. However, when simulating from more -#' complex model, such as mixed models or models with zero-inflation, there are -#' several important considerations. Arguments specified in `...` are passed to -#' [`simulate_residuals()`], which relies on [`DHARMa::simulateResiduals()`] (and -#' therefore, arguments in `...` are passed further down to _DHARMa_). The -#' defaults in DHARMa are set on the most conservative option that works for -#' all models. However, in many cases, the help advises to use different settings -#' in particular situations or for particular models. It is recommended to read -#' the 'Details' in `?DHARMa::simulateResiduals` closely to understand the -#' implications of the simulation process and which arguments should be modified -#' to get the most accurate results. +#' simulated residuals (see [`simulated_residual()`]). These are usually more +#' accurate for testing such models than the traditionally used Pearson residuals. +#' However, when simulating from more complex models, such as mixed models or +#' models with zero-inflation, there are several important considerations. +#' Arguments specified in `...` are passed to [`simulate_residuals()`], which +#' relies on [`DHARMa::simulateResiduals()`] (and therefore, arguments in `...` +#' are passed further down to _DHARMa_). The defaults in DHARMa are set on the +#' most conservative option that works for all models. However, in many cases, +#' the help advises to use different settings in particular situations or for +#' particular models. It is recommended to read the 'Details' in +#' `?DHARMa::simulateResiduals` closely to understand the implications of the +#' simulation process and which arguments should be modified to get the most +#' accurate results. #' #' @family functions to check model assumptions and and assess model quality #' @@ -87,7 +88,7 @@ check_zeroinflation.default <- function(x, tolerance = 0.05, ...) { not_supported <- c("fixest", "glmx") # for models with zero-inflation component or negative binomial families, - # we use simulated_residuals() + # we use simulate_residuals() if (!inherits(x, not_supported) && (model_info$is_zero_inflated || model_info$is_negbin || model_info$family == "genpois")) { # nolint if (missing(tolerance)) { tolerance <- 0.1 diff --git a/R/simulate_residuals.R b/R/simulate_residuals.R index 207b660db..de9ee0f4d 100644 --- a/R/simulate_residuals.R +++ b/R/simulate_residuals.R @@ -24,9 +24,9 @@ #' @section Tests based on simulated residuals: #' For certain models, resp. model from certain families, tests like #' [`check_zeroinflation()`] or [`check_overdispersion()`] are based on -#' `simulated_residuals()`. These are usually more accurate for such tests than +#' simulated residuals. These are usually more accurate for such tests than #' the traditionally used Pearson residuals. However, when simulating from more -#' complex model, such as mixed models or models with zero-inflation, there are +#' complex models, such as mixed models or models with zero-inflation, there are #' several important considerations. `simulate_residuals()` relies on #' [`DHARMa::simulateResiduals()`], and additional arguments specified in `...` #' are passed further down to that function. The defaults in DHARMa are set on @@ -79,7 +79,7 @@ print.performance_simres <- function(x, ...) { msg <- paste0( "Simulated residuals from a model of class `", class(x$fittedModel)[1], "` based on ", x$nSim, " simulations. Use `check_residuals()` to check ", - "uniformity of residuals. It is recommended to refer to `?DHARMa::simulateReisudals`", + "uniformity of residuals. It is recommended to refer to `?DHARMa::simulateResiudals`", " and `vignette(\"DHARMa\")` for more information about different settings", " in particular situations or for particular models.\n" ) diff --git a/man/check_overdispersion.Rd b/man/check_overdispersion.Rd index 19c957323..8586e7833 100644 --- a/man/check_overdispersion.Rd +++ b/man/check_overdispersion.Rd @@ -79,18 +79,19 @@ negative binomial, see \emph{Gelman and Hill (2007), pages 115-116}). \section{Tests based on simulated residuals}{ For certain models, resp. model from certain families, tests are based on -\code{\link[=simulated_residuals]{simulated_residuals()}}. These are usually more accurate for tests than the -traditionally used Pearson residuals. However, when simulating from more -complex model, such as mixed models or models with zero-inflation, there are -several important considerations. Arguments specified in \code{...} are passed to -\code{\link[=simulate_residuals]{simulate_residuals()}}, which relies on \code{\link[DHARMa:simulateResiduals]{DHARMa::simulateResiduals()}} (and -therefore, arguments in \code{...} are passed further down to \emph{DHARMa}). The -defaults in DHARMa are set on the most conservative option that works for -all models. However, in many cases, the help advises to use different settings -in particular situations or for particular models. It is recommended to read -the 'Details' in \code{?DHARMa::simulateResiduals} closely to understand the -implications of the simulation process and which arguments should be modified -to get the most accurate results. +simulated residuals (see \code{\link[=simulated_residual]{simulated_residual()}}). These are usually more +accurate for testing such models than the traditionally used Pearson residuals. +However, when simulating from more complex models, such as mixed models or +models with zero-inflation, there are several important considerations. +Arguments specified in \code{...} are passed to \code{\link[=simulate_residuals]{simulate_residuals()}}, which +relies on \code{\link[DHARMa:simulateResiduals]{DHARMa::simulateResiduals()}} (and therefore, arguments in \code{...} +are passed further down to \emph{DHARMa}). The defaults in DHARMa are set on the +most conservative option that works for all models. However, in many cases, +the help advises to use different settings in particular situations or for +particular models. It is recommended to read the 'Details' in +\code{?DHARMa::simulateResiduals} closely to understand the implications of the +simulation process and which arguments should be modified to get the most +accurate results. } \examples{ diff --git a/man/check_residuals.Rd b/man/check_residuals.Rd index dfb56ff83..2c5445686 100644 --- a/man/check_residuals.Rd +++ b/man/check_residuals.Rd @@ -38,9 +38,9 @@ the \code{check_residuals()} function has similar goals like \code{\link[=check_ For certain models, resp. model from certain families, tests like \code{\link[=check_zeroinflation]{check_zeroinflation()}} or \code{\link[=check_overdispersion]{check_overdispersion()}} are based on -\code{simulated_residuals()}. These are usually more accurate for such tests than +simulated residuals. These are usually more accurate for such tests than the traditionally used Pearson residuals. However, when simulating from more -complex model, such as mixed models or models with zero-inflation, there are +complex models, such as mixed models or models with zero-inflation, there are several important considerations. \code{simulate_residuals()} relies on \code{\link[DHARMa:simulateResiduals]{DHARMa::simulateResiduals()}}, and additional arguments specified in \code{...} are passed further down to that function. The defaults in DHARMa are set on diff --git a/man/check_zeroinflation.Rd b/man/check_zeroinflation.Rd index 9de6c1f5c..a4ddc7135 100644 --- a/man/check_zeroinflation.Rd +++ b/man/check_zeroinflation.Rd @@ -54,18 +54,19 @@ is internally called if necessary. \section{Tests based on simulated residuals}{ For certain models, resp. model from certain families, tests are based on -\code{\link[=simulated_residuals]{simulated_residuals()}}. These are usually more accurate for tests than the -traditionally used Pearson residuals. However, when simulating from more -complex model, such as mixed models or models with zero-inflation, there are -several important considerations. Arguments specified in \code{...} are passed to -\code{\link[=simulate_residuals]{simulate_residuals()}}, which relies on \code{\link[DHARMa:simulateResiduals]{DHARMa::simulateResiduals()}} (and -therefore, arguments in \code{...} are passed further down to \emph{DHARMa}). The -defaults in DHARMa are set on the most conservative option that works for -all models. However, in many cases, the help advises to use different settings -in particular situations or for particular models. It is recommended to read -the 'Details' in \code{?DHARMa::simulateResiduals} closely to understand the -implications of the simulation process and which arguments should be modified -to get the most accurate results. +simulated residuals (see \code{\link[=simulated_residual]{simulated_residual()}}). These are usually more +accurate for testing such models than the traditionally used Pearson residuals. +However, when simulating from more complex models, such as mixed models or +models with zero-inflation, there are several important considerations. +Arguments specified in \code{...} are passed to \code{\link[=simulate_residuals]{simulate_residuals()}}, which +relies on \code{\link[DHARMa:simulateResiduals]{DHARMa::simulateResiduals()}} (and therefore, arguments in \code{...} +are passed further down to \emph{DHARMa}). The defaults in DHARMa are set on the +most conservative option that works for all models. However, in many cases, +the help advises to use different settings in particular situations or for +particular models. It is recommended to read the 'Details' in +\code{?DHARMa::simulateResiduals} closely to understand the implications of the +simulation process and which arguments should be modified to get the most +accurate results. } \examples{ diff --git a/man/simulate_residuals.Rd b/man/simulate_residuals.Rd index 030e69501..fd647f411 100644 --- a/man/simulate_residuals.Rd +++ b/man/simulate_residuals.Rd @@ -34,9 +34,9 @@ functions in the \strong{see} package. See also \code{vignette("DHARMa")}. There For certain models, resp. model from certain families, tests like \code{\link[=check_zeroinflation]{check_zeroinflation()}} or \code{\link[=check_overdispersion]{check_overdispersion()}} are based on -\code{simulated_residuals()}. These are usually more accurate for such tests than +simulated residuals. These are usually more accurate for such tests than the traditionally used Pearson residuals. However, when simulating from more -complex model, such as mixed models or models with zero-inflation, there are +complex models, such as mixed models or models with zero-inflation, there are several important considerations. \code{simulate_residuals()} relies on \code{\link[DHARMa:simulateResiduals]{DHARMa::simulateResiduals()}}, and additional arguments specified in \code{...} are passed further down to that function. The defaults in DHARMa are set on diff --git a/tests/testthat/test-check_model.R b/tests/testthat/test-check_model.R index b008658dc..a0aa2304c 100644 --- a/tests/testthat/test-check_model.R +++ b/tests/testthat/test-check_model.R @@ -69,3 +69,14 @@ test_that("`check_model()` warnings for tweedie", { ) ) }) + + +test_that("`check_model()` warnings for zero-infl", { + skip_if_not_installed("pscl") + data(bioChemists, package = "pscl") + model <- pscl::zeroinfl( + art ~ fem + mar + kid5 + ment | kid5 + phd, + data = bioChemists + ) + expect_message(expect_warning(check_model(model, verbose = TRUE), regex = "Cannot simulate"), regex = "Homogeneity") +})