From 637e87506d51c49dcf1e4c684f1eaca0eea49c0c Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sun, 24 Nov 2024 17:42:20 +0100
Subject: [PATCH] Check for influential observations of GLM w/o  numeric
 variables Fixes #735

---
 DESCRIPTION                          |  2 +-
 NEWS.md                              |  5 ++++
 R/check_outliers.R                   | 39 ++++++++--------------------
 tests/testthat/test-check_outliers.R | 10 +++++++
 4 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index b1ce379bf..b6f58b233 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: performance
 Title: Assessment of Regression Models Performance
-Version: 0.12.4.7
+Version: 0.12.4.8
 Authors@R:
     c(person(given = "Daniel",
              family = "Lüdecke",
diff --git a/NEWS.md b/NEWS.md
index 1bddce2ce..a181937d8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -13,6 +13,11 @@
 
 * Increased accuracy for `check_convergence()` for *glmmTMB* models.
 
+## Bug fixes
+
+* `check_outliers()` did not warn that no numeric variables were found when only
+  the response variable was numeric, but all relevant predictors were not.
+
 # performance 0.12.4
 
 ## Changes
diff --git a/R/check_outliers.R b/R/check_outliers.R
index 7729f9931..73b3f6d61 100644
--- a/R/check_outliers.R
+++ b/R/check_outliers.R
@@ -378,38 +378,16 @@ check_outliers.default <- function(x,
   # Check args
   if (all(method == "all")) {
     method <- c(
-      "zscore_robust",
-      "iqr",
-      "ci",
-      "cook",
-      "pareto",
-      "mahalanobis",
-      "mahalanobis_robust",
-      "mcd",
-      "ics",
-      "optics",
-      "lof"
+      "zscore_robust", "iqr", "ci", "cook", "pareto", "mahalanobis",
+      "mahalanobis_robust", "mcd", "ics", "optics", "lof"
     )
   }
 
   method <- match.arg(
     method,
     c(
-      "zscore",
-      "zscore_robust",
-      "iqr",
-      "ci",
-      "hdi",
-      "eti",
-      "bci",
-      "cook",
-      "pareto",
-      "mahalanobis",
-      "mahalanobis_robust",
-      "mcd",
-      "ics",
-      "optics",
-      "lof"
+      "zscore", "zscore_robust", "iqr", "ci", "hdi", "eti", "bci", "cook",
+      "pareto", "mahalanobis", "mahalanobis_robust", "mcd", "ics", "optics", "lof"
     ),
     several.ok = TRUE
   )
@@ -427,8 +405,13 @@ check_outliers.default <- function(x,
     )
   }
 
-  # Remove non-numerics
-  my_data <- datawizard::data_select(my_data, select = is.numeric, verbose = FALSE)
+  # Remove non-numerics, but only check predictors
+  model_predictors <- unique(insight::find_predictors(model, flatten = TRUE))
+  my_data <- datawizard::data_select(
+    my_data[model_predictors],
+    select = is.numeric,
+    verbose = FALSE
+  )
 
   # check if any data left
   if (is.null(my_data) || ncol(my_data) == 0) {
diff --git a/tests/testthat/test-check_outliers.R b/tests/testthat/test-check_outliers.R
index 6aa64516f..f037950c8 100644
--- a/tests/testthat/test-check_outliers.R
+++ b/tests/testthat/test-check_outliers.R
@@ -361,6 +361,16 @@ test_that("check_outliers with invald data", {
 })
 
 
+test_that("check_outliers on numeric data only", {
+  data(mtcars)
+  # all predictors categorical
+  mtcars$wt <- as.factor(mtcars$wt)
+  mtcars$mpg <- as.factor(mtcars$mpg)
+  model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial")
+  expect_error(check_outliers(model), regex = "No numeric")
+})
+
+
 test_that("check_outliers with DHARMa", {
   skip_if_not_installed("DHARMa")
   mt1 <- mtcars[, c(1, 3, 4)]