Skip to content

Commit

Permalink
check_outliers for DHARMa
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed Mar 18, 2024
1 parent 50735e3 commit 0180abd
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 6 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ S3method(check_normality,lmerModLmerTest)
S3method(check_normality,merMod)
S3method(check_normality,numeric)
S3method(check_outliers,BFBayesFactor)
S3method(check_outliers,DHARMa)
S3method(check_outliers,character)
S3method(check_outliers,data.frame)
S3method(check_outliers,default)
Expand All @@ -89,6 +90,7 @@ S3method(check_outliers,meta)
S3method(check_outliers,metabin)
S3method(check_outliers,metagen)
S3method(check_outliers,numeric)
S3method(check_outliers,performance_simres)
S3method(check_outliers,rma)
S3method(check_outliers,rma.uni)
S3method(check_outliers,rq)
Expand Down Expand Up @@ -295,6 +297,7 @@ S3method(print,check_normality_binom)
S3method(print,check_outliers)
S3method(print,check_outliers_metafor)
S3method(print,check_outliers_metagen)
S3method(print,check_outliers_simres)
S3method(print,check_overdisp)
S3method(print,check_residuals)
S3method(print,check_sphericity)
Expand Down
20 changes: 19 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,25 @@
## New functions

* `simulate_residuals()` and `check_residuals()`, to simulate and check residuals
from generalized linear (mixed) models.
from generalized linear (mixed) models. Simulating residuals is based on the
DHARMa package, and objects returned by `simulate_residuals()` inherit from
the `DHARMa` class, and thus can be used with any functions from the *DHARMa*
package. However, there are also implementations in the *performance* package,
such as `check_overdispersion()`, `check_zeroinflation()`, `check_outliers()`
or `check_model()`.

* Plots for `check_model()` have been improved. The Q-Q plots are now based
on simulated residuals from the DHARMa package for non-Gaussian models, thus
providing more accurate and informative plots. The half-normal QQ plot for
generalized linear models can still be obtained by setting the new argument
`residual_type = "normal"`.

* Following functions now support simulated residuals (from `simulate_residuals()`)
resp. objects returned from `DHARMa::simulateResiduals()`:
- `check_overdispersion()`
- `check_zeroinflation()`
- `check_outliers()`
- `check_model()`

## General

Expand Down
55 changes: 53 additions & 2 deletions R/check_outliers.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
#' by at least half of the methods). See the **Details** section below
#' for a description of the methods.
#'
#' @param x A model or a data.frame object.
#' @param x A model, a data.frame, a `performance_simres` [`simulate_residuals()`]
#' or a `DHARMa` object.
#' @param method The outlier detection method(s). Can be `"all"` or some of
#' `"cook"`, `"pareto"`, `"zscore"`, `"zscore_robust"`, `"iqr"`, `"ci"`, `"eti"`,
#' `"hdi"`, `"bci"`, `"mahalanobis"`, `"mahalanobis_robust"`, `"mcd"`, `"ics"`,
Expand All @@ -27,7 +28,11 @@
#' @param ... When `method = "ics"`, further arguments in `...` are passed
#' down to [ICSOutlier::ics.outlier()]. When `method = "mahalanobis"`,
#' they are passed down to [stats::mahalanobis()]. `percentage_central` can
#' be specified when `method = "mcd"`.
#' be specified when `method = "mcd"`. For objects of class `performance_simres`
#' or `DHARMa`, further arguments are passed down to `DHARMa::testOutliers()`.
#'
#' @inheritParams check_zeroinflation
#' @inheritParams simulate_residuals
#'
#' @return A logical vector of the detected outliers with a nice printing
#' method: a check (message) on whether outliers were detected or not. The
Expand Down Expand Up @@ -785,6 +790,28 @@ plot.check_outliers <- function(x, ...) {
NextMethod()
}

#' @export
print.check_outliers_simres <- function(x, digits = 2, ...) {
result <- paste0(
insight::format_value(100 * x$Coefficient, digits = digits, ...),
"%, ",
insight::format_ci(100 * x$CI_low, 100 * x$CI_high, digits = digits, ...)
)
insight::print_color("# Outliers detection\n\n", "blue")
cat(sprintf(" Proportion of expected outliers: %.*f%%\n", digits, 100 * x$Expected))
cat(sprintf(" Proportion of observed outliers: %s\n\n", result))

p_string <- paste0(" (", insight::format_p(x$p_value), ")")

if (x$p_value < 0.05) {
message("Outliers were detected", p_string, ".")
} else {
message("No outliers were detected", p_string, ".")
}

invisible(x)
}



# other classes -------------------------
Expand Down Expand Up @@ -1438,6 +1465,30 @@ check_outliers.meta <- check_outliers.metagen
check_outliers.metabin <- check_outliers.metagen


#' @rdname check_outliers
#' @export
check_outliers.performance_simres <- function(x, type = "default", iterations = 100, alternative = "two.sided", ...) {
type <- match.arg(type, c("default", "binomial", "bootstrap"))
alternative <- match.arg(alternative, c("two.sided", "greater", "less"))

insight::check_if_installed("DHARMa")
result <- DHARMa::testOutliers(x, type = type, nBoot = iterations, alternative = alternative, plot = FALSE, ...)

outlier <- list(
Coefficient = as.vector(result$estimate),
Expected = as.numeric(gsub("(.*)\\(expected: (\\d.*)\\)", "\\2", names(result$estimate))),
CI_low = result$conf.int[1],
CI_high = result$conf.int[2],
p_value = result$p.value
)
class(outlier) <- c("check_outliers_simres", class(outlier))
outlier
}

#' @export
check_outliers.DHARMa <- check_outliers.performance_simres



# Thresholds --------------------------------------------------------------

Expand Down
14 changes: 11 additions & 3 deletions R/simulate_residuals.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,17 @@ plot.performance_simres <- function(x, ...) {
# helper functions ---------------------

.simres_statistics <- function(x, statistic_fun, alternative = "two.sided") {
# count observed and simulated zeros
observed <- statistic_fun(x$observedResponse)
simulated <- apply(x$simulatedResponse, 2, statistic_fun)
# summarize the observed and simulated residuals
if (!is.null(statistic_fun)) {

Check warning on line 100 in R/simulate_residuals.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/simulate_residuals.R,line=100,col=7,[if_not_else_linter] Prefer `if (A) x else y` to the less-readable `if (!A) y else x` in a simple if/else statement.
# either apply a function to observed and simulated residusls,
# to calcualte a summary statistic
observed <- statistic_fun(x$observedResponse)
simulated <- apply(x$simulatedResponse, 2, statistic_fun)
} else {
# or we pass the values to compute the p-value directly (for "check_outliers()")
observed <- x
simulated <- statistic_fun
}
# p is simply ratio of simulated zeros to observed zeros
p <- switch(alternative,
greater = mean(simulated >= observed),
Expand Down

0 comments on commit 0180abd

Please sign in to comment.