Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Catch error #699

Merged
merged 4 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: performance
Title: Assessment of Regression Models Performance
Version: 0.10.9.9
Version: 0.10.9.10
Authors@R:
c(person(given = "Daniel",
family = "Lüdecke",
Expand Down
139 changes: 103 additions & 36 deletions R/check_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
#'
#' @details For Bayesian models from packages **rstanarm** or **brms**,
#' models will be "converted" to their frequentist counterpart, using
#' [`bayestestR::bayesian_as_frequentist`](https://easystats.github.io/bayestestR/reference/convert_bayesian_as_frequentist.html).

Check warning on line 62 in R/check_model.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/check_model.R,line=62,col=121,[line_length_linter] Lines should not be more than 120 characters. This line is 130 characters.
#' A more advanced model-check for Bayesian models will be implemented at a
#' later stage.
#'
Expand Down Expand Up @@ -87,7 +87,7 @@
#' plots are helpful to check model assumptions, they do not necessarily indicate
#' so-called "lack of fit", e.g. missed non-linear relationships or interactions.
#' Thus, it is always recommended to also look at
#' [effect plots, including partial residuals](https://strengejacke.github.io/ggeffects/articles/introduction_partial_residuals.html).

Check warning on line 90 in R/check_model.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/check_model.R,line=90,col=121,[line_length_linter] Lines should not be more than 120 characters. This line is 134 characters.
#'
#' @section Homogeneity of Variance:
#' This plot checks the assumption of equal variance (homoscedasticity). The
Expand Down Expand Up @@ -217,9 +217,9 @@
if (minfo$is_bayesian) {
suppressWarnings(.check_assumptions_stan(x, ...))
} else if (minfo$is_linear) {
suppressWarnings(.check_assumptions_linear(x, minfo, residual_type, verbose, ...))
suppressWarnings(.check_assumptions_linear(x, minfo, check, residual_type, verbose, ...))
} else {
suppressWarnings(.check_assumptions_glm(x, minfo, residual_type, verbose, ...))
suppressWarnings(.check_assumptions_glm(x, minfo, check, residual_type, verbose, ...))
},
error = function(e) {
e
Expand All @@ -236,6 +236,15 @@
)
}

# did Q-Q plot work with simulated residuals?
if (verbose && is.null(assumptions_data$QQ) && residual_type == "simulated") {
insight::format_warning(paste0(
"Cannot simulate residuals for models of class `",
class(x)[1],
"`. Please try `check_model(..., residual_type = \"normal\")` instead."
))
}

# try to find sensible default for "type" argument
suggest_dots <- (minfo$is_bernoulli || minfo$is_count || minfo$is_ordinal || minfo$is_categorical || minfo$is_multinomial) # nolint
if (missing(type) && suggest_dots) {
Expand Down Expand Up @@ -411,26 +420,57 @@

# compile plots for checks of linear models ------------------------

.check_assumptions_linear <- function(model, model_info, residual_type = "normal", verbose = TRUE, ...) {
.check_assumptions_linear <- function(model, model_info, check = "all", residual_type = "normal", verbose = TRUE, ...) {
dat <- list()

dat$VIF <- .diag_vif(model, verbose = verbose)
dat$QQ <- switch(residual_type,
simulated = simulate_residuals(model, ...),
.diag_qq(model, model_info = model_info, verbose = verbose)
)
dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
dat$NORM <- .diag_norm(model, verbose = verbose)
dat$NCV <- .diag_ncv(model, verbose = verbose)
dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
if (is.null(dat$OUTLIERS)) {
threshold <- NULL
} else {
threshold <- attributes(dat$OUTLIERS)$threshold$cook
# multicollinearity --------------
if (any(c("all", "vif") %in% check)) {
dat$VIF <- .diag_vif(model, verbose = verbose)
}

# Q-Q plot (normality/uniformity of residuals) --------------
if (any(c("all", "qq") %in% check)) {
dat$QQ <- switch(residual_type,
simulated = .safe(simulate_residuals(model, ...)),
.diag_qq(model, model_info = model_info, verbose = verbose)
)
}

# Random Effects Q-Q plot (normality of BLUPs) --------------
if (any(c("all", "reqq") %in% check)) {
dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
}

# normal-curve plot (normality of residuals) --------------
if (any(c("all", "normality") %in% check)) {
dat$NORM <- .diag_norm(model, verbose = verbose)
}

# non-constant variance (heteroskedasticity, liniearity) --------------
if (any(c("all", "ncv", "linearity") %in% check)) {
dat$NCV <- .diag_ncv(model, verbose = verbose)
}

# homogeneity of variance --------------
if (any(c("all", "homogeneity") %in% check)) {
dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
}

# outliers --------------
if (any(c("all", "outliers") %in% check)) {
dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
if (is.null(dat$OUTLIERS)) {
threshold <- NULL
} else {
threshold <- attributes(dat$OUTLIERS)$threshold$cook

Check warning on line 465 in R/check_model.R

View check run for this annotation

Codecov / codecov/patch

R/check_model.R#L465

Added line #L465 was not covered by tests
}
dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
}

# posterior predictive checks --------------
if (any(c("all", "pp_check") %in% check)) {
dat$PP_CHECK <- .safe(check_predictions(model, ...))
}
dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
dat$PP_CHECK <- .safe(check_predictions(model, ...))

dat <- insight::compact_list(dat)
class(dat) <- c("check_model", "see_check_model")
Expand All @@ -441,28 +481,55 @@

# compile plots for checks of generalized linear models ------------------------

.check_assumptions_glm <- function(model, model_info, residual_type = "simulated", verbose = TRUE, ...) {
.check_assumptions_glm <- function(model, model_info, check = "all", residual_type = "simulated", verbose = TRUE, ...) {
dat <- list()

dat$VIF <- .diag_vif(model, verbose = verbose)
dat$QQ <- switch(residual_type,
simulated = simulate_residuals(model, ...),
.diag_qq(model, model_info = model_info, verbose = verbose)
)
dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
if (is.null(dat$OUTLIERS)) {
threshold <- NULL
} else {
threshold <- attributes(dat$OUTLIERS)$threshold$cook
# multicollinearity --------------
if (any(c("all", "vif") %in% check)) {
dat$VIF <- .diag_vif(model, verbose = verbose)
}

# Q-Q plot (normality/uniformity of residuals) --------------
if (any(c("all", "qq") %in% check)) {
dat$QQ <- switch(residual_type,
simulated = .safe(simulate_residuals(model, ...)),
.diag_qq(model, model_info = model_info, verbose = verbose)
)
}

# homogeneity of variance --------------
if (any(c("all", "homogeneity") %in% check)) {
dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
}

# Random Effects Q-Q plot (normality of BLUPs) --------------
if (any(c("all", "reqq") %in% check)) {
dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
}

# outliers --------------
if (any(c("all", "outliers") %in% check)) {
dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
if (is.null(dat$OUTLIERS)) {
threshold <- NULL
} else {
threshold <- attributes(dat$OUTLIERS)$threshold$cook
}
dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
}
dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
dat$PP_CHECK <- .safe(check_predictions(model, ...))
if (isTRUE(model_info$is_binomial)) {

# posterior predictive checks --------------
if (any(c("all", "pp_check") %in% check)) {
dat$PP_CHECK <- .safe(check_predictions(model, ...))
}

# binned residuals for bernoulli/binomial --------------
if (isTRUE(model_info$is_binomial) && any(c("all", "binned_residuals") %in% check)) {
dat$BINNED_RESID <- .safe(binned_residuals(model, verbose = verbose, ...))
}
if (isTRUE(model_info$is_count)) {

# misspecified dispersion and zero-inflation --------------
if (isTRUE(model_info$is_count) && any(c("all", "overdispersion") %in% check)) {
dat$OVERDISPERSION <- .diag_overdispersion(model)
}

Expand Down
27 changes: 14 additions & 13 deletions R/check_zeroinflation.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,19 @@
#'
#' @section Tests based on simulated residuals:
#' For certain models, resp. model from certain families, tests are based on
#' [`simulated_residuals()`]. These are usually more accurate for tests than the
#' traditionally used Pearson residuals. However, when simulating from more
#' complex model, such as mixed models or models with zero-inflation, there are
#' several important considerations. Arguments specified in `...` are passed to
#' [`simulate_residuals()`], which relies on [`DHARMa::simulateResiduals()`] (and
#' therefore, arguments in `...` are passed further down to _DHARMa_). The
#' defaults in DHARMa are set on the most conservative option that works for
#' all models. However, in many cases, the help advises to use different settings
#' in particular situations or for particular models. It is recommended to read
#' the 'Details' in `?DHARMa::simulateResiduals` closely to understand the
#' implications of the simulation process and which arguments should be modified
#' to get the most accurate results.
#' simulated residuals (see [`simulated_residual()`]). These are usually more
#' accurate for testing such models than the traditionally used Pearson residuals.
#' However, when simulating from more complex models, such as mixed models or
#' models with zero-inflation, there are several important considerations.
#' Arguments specified in `...` are passed to [`simulate_residuals()`], which
#' relies on [`DHARMa::simulateResiduals()`] (and therefore, arguments in `...`
#' are passed further down to _DHARMa_). The defaults in DHARMa are set on the
#' most conservative option that works for all models. However, in many cases,
#' the help advises to use different settings in particular situations or for
#' particular models. It is recommended to read the 'Details' in
#' `?DHARMa::simulateResiduals` closely to understand the implications of the
#' simulation process and which arguments should be modified to get the most
#' accurate results.
#'
#' @family functions to check model assumptions and and assess model quality
#'
Expand Down Expand Up @@ -87,7 +88,7 @@ check_zeroinflation.default <- function(x, tolerance = 0.05, ...) {
not_supported <- c("fixest", "glmx")

# for models with zero-inflation component or negative binomial families,
# we use simulated_residuals()
# we use simulate_residuals()
if (!inherits(x, not_supported) && (model_info$is_zero_inflated || model_info$is_negbin || model_info$family == "genpois")) { # nolint
if (missing(tolerance)) {
tolerance <- 0.1
Expand Down
6 changes: 3 additions & 3 deletions R/simulate_residuals.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
#' @section Tests based on simulated residuals:
#' For certain models, resp. model from certain families, tests like
#' [`check_zeroinflation()`] or [`check_overdispersion()`] are based on
#' `simulated_residuals()`. These are usually more accurate for such tests than
#' simulated residuals. These are usually more accurate for such tests than
#' the traditionally used Pearson residuals. However, when simulating from more
#' complex model, such as mixed models or models with zero-inflation, there are
#' complex models, such as mixed models or models with zero-inflation, there are
#' several important considerations. `simulate_residuals()` relies on
#' [`DHARMa::simulateResiduals()`], and additional arguments specified in `...`
#' are passed further down to that function. The defaults in DHARMa are set on
Expand Down Expand Up @@ -79,7 +79,7 @@
msg <- paste0(
"Simulated residuals from a model of class `", class(x$fittedModel)[1],
"` based on ", x$nSim, " simulations. Use `check_residuals()` to check ",
"uniformity of residuals. It is recommended to refer to `?DHARMa::simulateReisudals`",
"uniformity of residuals. It is recommended to refer to `?DHARMa::simulateResiudals`",

Check warning on line 82 in R/simulate_residuals.R

View check run for this annotation

Codecov / codecov/patch

R/simulate_residuals.R#L82

Added line #L82 was not covered by tests
" and `vignette(\"DHARMa\")` for more information about different settings",
" in particular situations or for particular models.\n"
)
Expand Down
25 changes: 13 additions & 12 deletions man/check_overdispersion.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/check_residuals.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 13 additions & 12 deletions man/check_zeroinflation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/simulate_residuals.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions tests/testthat/test-check_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,14 @@ test_that("`check_model()` warnings for tweedie", {
)
)
})


test_that("`check_model()` warnings for zero-infl", {
skip_if_not_installed("pscl")
data(bioChemists, package = "pscl")
model <- pscl::zeroinfl(
art ~ fem + mar + kid5 + ment | kid5 + phd,
data = bioChemists
)
expect_message(expect_warning(check_model(model, verbose = TRUE), regex = "Cannot simulate"), regex = "Homogeneity")
})
Loading