From 637e87506d51c49dcf1e4c684f1eaca0eea49c0c Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 24 Nov 2024 17:42:20 +0100 Subject: [PATCH] Check for influential observations of GLM w/o numeric variables Fixes #735 --- DESCRIPTION | 2 +- NEWS.md | 5 ++++ R/check_outliers.R | 39 ++++++++-------------------- tests/testthat/test-check_outliers.R | 10 +++++++ 4 files changed, 27 insertions(+), 29 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b1ce379bf..b6f58b233 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.4.7 +Version: 0.12.4.8 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 1bddce2ce..a181937d8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,6 +13,11 @@ * Increased accuracy for `check_convergence()` for *glmmTMB* models. +## Bug fixes + +* `check_outliers()` did not warn that no numeric variables were found when only + the response variable was numeric, but all relevant predictors were not. + # performance 0.12.4 ## Changes diff --git a/R/check_outliers.R b/R/check_outliers.R index 7729f9931..73b3f6d61 100644 --- a/R/check_outliers.R +++ b/R/check_outliers.R @@ -378,38 +378,16 @@ check_outliers.default <- function(x, # Check args if (all(method == "all")) { method <- c( - "zscore_robust", - "iqr", - "ci", - "cook", - "pareto", - "mahalanobis", - "mahalanobis_robust", - "mcd", - "ics", - "optics", - "lof" + "zscore_robust", "iqr", "ci", "cook", "pareto", "mahalanobis", + "mahalanobis_robust", "mcd", "ics", "optics", "lof" ) } method <- match.arg( method, c( - "zscore", - "zscore_robust", - "iqr", - "ci", - "hdi", - "eti", - "bci", - "cook", - "pareto", - "mahalanobis", - "mahalanobis_robust", - "mcd", - "ics", - "optics", - "lof" + "zscore", "zscore_robust", "iqr", "ci", "hdi", "eti", "bci", "cook", + "pareto", "mahalanobis", "mahalanobis_robust", "mcd", "ics", "optics", "lof" ), several.ok = TRUE ) @@ -427,8 +405,13 @@ check_outliers.default <- function(x, ) } - # Remove non-numerics - my_data <- datawizard::data_select(my_data, select = is.numeric, verbose = FALSE) + # Remove non-numerics, but only check predictors + model_predictors <- unique(insight::find_predictors(model, flatten = TRUE)) + my_data <- datawizard::data_select( + my_data[model_predictors], + select = is.numeric, + verbose = FALSE + ) # check if any data left if (is.null(my_data) || ncol(my_data) == 0) { diff --git a/tests/testthat/test-check_outliers.R b/tests/testthat/test-check_outliers.R index 6aa64516f..f037950c8 100644 --- a/tests/testthat/test-check_outliers.R +++ b/tests/testthat/test-check_outliers.R @@ -361,6 +361,16 @@ test_that("check_outliers with invald data", { }) +test_that("check_outliers on numeric data only", { + data(mtcars) + # all predictors categorical + mtcars$wt <- as.factor(mtcars$wt) + mtcars$mpg <- as.factor(mtcars$mpg) + model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial") + expect_error(check_outliers(model), regex = "No numeric") +}) + + test_that("check_outliers with DHARMa", { skip_if_not_installed("DHARMa") mt1 <- mtcars[, c(1, 3, 4)]