Skip to content

Commit

Permalink
Check for influential observations of GLM w/o numeric variables (#779)
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke authored Nov 24, 2024
1 parent b72cf51 commit ce079b7
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 31 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: performance
Title: Assessment of Regression Models Performance
Version: 0.12.4.7
Version: 0.12.4.8
Authors@R:
c(person(given = "Daniel",
family = "Lüdecke",
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@

* Increased accuracy for `check_convergence()` for *glmmTMB* models.

## Bug fixes

* `check_outliers()` did not warn that no numeric variables were found when only
the response variable was numeric, but all relevant predictors were not.

# performance 0.12.4

## Changes
Expand Down
50 changes: 20 additions & 30 deletions R/check_outliers.R
Original file line number Diff line number Diff line change
Expand Up @@ -378,42 +378,23 @@ check_outliers.default <- function(x,
# Check args
if (all(method == "all")) {
method <- c(
"zscore_robust",
"iqr",
"ci",
"cook",
"pareto",
"mahalanobis",
"mahalanobis_robust",
"mcd",
"ics",
"optics",
"lof"
"zscore_robust", "iqr", "ci", "cook", "pareto", "mahalanobis",
"mahalanobis_robust", "mcd", "ics", "optics", "lof"
)
}

method <- match.arg(
method,
c(
"zscore",
"zscore_robust",
"iqr",
"ci",
"hdi",
"eti",
"bci",
"cook",
"pareto",
"mahalanobis",
"mahalanobis_robust",
"mcd",
"ics",
"optics",
"lof"
"zscore", "zscore_robust", "iqr", "ci", "hdi", "eti", "bci", "cook",
"pareto", "mahalanobis", "mahalanobis_robust", "mcd", "ics", "optics", "lof"
),
several.ok = TRUE
)

# Get model information
m_info <- insight::model_info(x)

# Get data
my_data <- insight::get_data(x, verbose = FALSE)

Expand All @@ -427,8 +408,17 @@ check_outliers.default <- function(x,
)
}

# Remove non-numerics
my_data <- datawizard::data_select(my_data, select = is.numeric, verbose = FALSE)
# Remove non-numerics, but in case of binomial, only check predictors
if (m_info$is_binomial) {
model_predictors <- unique(insight::find_predictors(x, flatten = TRUE))
} else {
model_predictors <- colnames(my_data)
}
my_data <- datawizard::data_select(
my_data[model_predictors],
select = is.numeric,
verbose = FALSE
)

# check if any data left
if (is.null(my_data) || ncol(my_data) == 0) {
Expand Down Expand Up @@ -468,7 +458,7 @@ check_outliers.default <- function(x,
}

# Cook
if ("cook" %in% method && !insight::model_info(x)$is_bayesian && !inherits(x, "bife")) {
if ("cook" %in% method && !m_info$is_bayesian && !inherits(x, "bife")) {
data_cook <- .check_outliers_cook(
x,
threshold = thresholds$cook
Expand Down Expand Up @@ -508,7 +498,7 @@ check_outliers.default <- function(x,
}

# Pareto
if ("pareto" %in% method && insight::model_info(x)$is_bayesian) {
if ("pareto" %in% method && m_info$is_bayesian) {
data_pareto <- .check_outliers_pareto(
x,
threshold = thresholds$pareto
Expand Down
10 changes: 10 additions & 0 deletions tests/testthat/test-check_outliers.R
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,16 @@ test_that("check_outliers with invald data", {
})


test_that("check_outliers on numeric data only", {
data(mtcars)
# all predictors categorical
mtcars$wt <- as.factor(mtcars$wt)
mtcars$mpg <- as.factor(mtcars$mpg)
model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial")
expect_error(check_outliers(model), regex = "No numeric")
})


test_that("check_outliers with DHARMa", {
skip_if_not_installed("DHARMa")
mt1 <- mtcars[, c(1, 3, 4)]
Expand Down

0 comments on commit ce079b7

Please sign in to comment.