diff --git a/NAMESPACE b/NAMESPACE index f9252ee3f..24daed512 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -73,6 +73,7 @@ S3method(check_normality,lmerModLmerTest) S3method(check_normality,merMod) S3method(check_normality,numeric) S3method(check_outliers,BFBayesFactor) +S3method(check_outliers,DHARMa) S3method(check_outliers,character) S3method(check_outliers,data.frame) S3method(check_outliers,default) @@ -89,6 +90,7 @@ S3method(check_outliers,meta) S3method(check_outliers,metabin) S3method(check_outliers,metagen) S3method(check_outliers,numeric) +S3method(check_outliers,performance_simres) S3method(check_outliers,rma) S3method(check_outliers,rma.uni) S3method(check_outliers,rq) @@ -295,6 +297,7 @@ S3method(print,check_normality_binom) S3method(print,check_outliers) S3method(print,check_outliers_metafor) S3method(print,check_outliers_metagen) +S3method(print,check_outliers_simres) S3method(print,check_overdisp) S3method(print,check_residuals) S3method(print,check_sphericity) diff --git a/NEWS.md b/NEWS.md index bc3638554..6fb76ca53 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,7 +7,25 @@ ## New functions * `simulate_residuals()` and `check_residuals()`, to simulate and check residuals - from generalized linear (mixed) models. + from generalized linear (mixed) models. Simulating residuals is based on the + DHARMa package, and objects returned by `simulate_residuals()` inherit from + the `DHARMa` class, and thus can be used with any functions from the *DHARMa* + package. However, there are also implementations in the *performance* package, + such as `check_overdispersion()`, `check_zeroinflation()`, `check_outliers()` + or `check_model()`. + +* Plots for `check_model()` have been improved. The Q-Q plots are now based + on simulated residuals from the DHARMa package for non-Gaussian models, thus + providing more accurate and informative plots. The half-normal QQ plot for + generalized linear models can still be obtained by setting the new argument + `residual_type = "normal"`. + +* Following functions now support simulated residuals (from `simulate_residuals()`) + resp. objects returned from `DHARMa::simulateResiduals()`: + - `check_overdispersion()` + - `check_zeroinflation()` + - `check_outliers()` + - `check_model()` ## General diff --git a/R/check_outliers.R b/R/check_outliers.R index 2017d4cc5..96ef06f12 100644 --- a/R/check_outliers.R +++ b/R/check_outliers.R @@ -12,7 +12,8 @@ #' by at least half of the methods). See the **Details** section below #' for a description of the methods. #' -#' @param x A model or a data.frame object. +#' @param x A model, a data.frame, a `performance_simres` [`simulate_residuals()`] +#' or a `DHARMa` object. #' @param method The outlier detection method(s). Can be `"all"` or some of #' `"cook"`, `"pareto"`, `"zscore"`, `"zscore_robust"`, `"iqr"`, `"ci"`, `"eti"`, #' `"hdi"`, `"bci"`, `"mahalanobis"`, `"mahalanobis_robust"`, `"mcd"`, `"ics"`, @@ -27,7 +28,11 @@ #' @param ... When `method = "ics"`, further arguments in `...` are passed #' down to [ICSOutlier::ics.outlier()]. When `method = "mahalanobis"`, #' they are passed down to [stats::mahalanobis()]. `percentage_central` can -#' be specified when `method = "mcd"`. +#' be specified when `method = "mcd"`. For objects of class `performance_simres` +#' or `DHARMa`, further arguments are passed down to `DHARMa::testOutliers()`. +#' +#' @inheritParams check_zeroinflation +#' @inheritParams simulate_residuals #' #' @return A logical vector of the detected outliers with a nice printing #' method: a check (message) on whether outliers were detected or not. The @@ -785,6 +790,28 @@ plot.check_outliers <- function(x, ...) { NextMethod() } +#' @export +print.check_outliers_simres <- function(x, digits = 2, ...) { + result <- paste0( + insight::format_value(100 * x$Coefficient, digits = digits, ...), + "%, ", + insight::format_ci(100 * x$CI_low, 100 * x$CI_high, digits = digits, ...) + ) + insight::print_color("# Outliers detection\n\n", "blue") + cat(sprintf(" Proportion of expected outliers: %.*f%%\n", digits, 100 * x$Expected)) + cat(sprintf(" Proportion of observed outliers: %s\n\n", result)) + + p_string <- paste0(" (", insight::format_p(x$p_value), ")") + + if (x$p_value < 0.05) { + message("Outliers were detected", p_string, ".") + } else { + message("No outliers were detected", p_string, ".") + } + + invisible(x) +} + # other classes ------------------------- @@ -1438,6 +1465,30 @@ check_outliers.meta <- check_outliers.metagen check_outliers.metabin <- check_outliers.metagen +#' @rdname check_outliers +#' @export +check_outliers.performance_simres <- function(x, type = "default", iterations = 100, alternative = "two.sided", ...) { + type <- match.arg(type, c("default", "binomial", "bootstrap")) + alternative <- match.arg(alternative, c("two.sided", "greater", "less")) + + insight::check_if_installed("DHARMa") + result <- DHARMa::testOutliers(x, type = type, nBoot = iterations, alternative = alternative, plot = FALSE, ...) + + outlier <- list( + Coefficient = as.vector(result$estimate), + Expected = as.numeric(gsub("(.*)\\(expected: (\\d.*)\\)", "\\2", names(result$estimate))), + CI_low = result$conf.int[1], + CI_high = result$conf.int[2], + p_value = result$p.value + ) + class(outlier) <- c("check_outliers_simres", class(outlier)) + outlier +} + +#' @export +check_outliers.DHARMa <- check_outliers.performance_simres + + # Thresholds -------------------------------------------------------------- diff --git a/R/simulate_residuals.R b/R/simulate_residuals.R index 0dee1e045..a88d0deb9 100644 --- a/R/simulate_residuals.R +++ b/R/simulate_residuals.R @@ -96,9 +96,17 @@ plot.performance_simres <- function(x, ...) { # helper functions --------------------- .simres_statistics <- function(x, statistic_fun, alternative = "two.sided") { - # count observed and simulated zeros - observed <- statistic_fun(x$observedResponse) - simulated <- apply(x$simulatedResponse, 2, statistic_fun) + # summarize the observed and simulated residuals + if (!is.null(statistic_fun)) { + # either apply a function to observed and simulated residusls, + # to calcualte a summary statistic + observed <- statistic_fun(x$observedResponse) + simulated <- apply(x$simulatedResponse, 2, statistic_fun) + } else { + # or we pass the values to compute the p-value directly (for "check_outliers()") + observed <- x + simulated <- statistic_fun + } # p is simply ratio of simulated zeros to observed zeros p <- switch(alternative, greater = mean(simulated >= observed),