From 637e87506d51c49dcf1e4c684f1eaca0eea49c0c Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 24 Nov 2024 17:42:20 +0100 Subject: [PATCH 1/4] Check for influential observations of GLM w/o numeric variables Fixes #735 --- DESCRIPTION | 2 +- NEWS.md | 5 ++++ R/check_outliers.R | 39 ++++++++-------------------- tests/testthat/test-check_outliers.R | 10 +++++++ 4 files changed, 27 insertions(+), 29 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b1ce379bf..b6f58b233 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.4.7 +Version: 0.12.4.8 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 1bddce2ce..a181937d8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,6 +13,11 @@ * Increased accuracy for `check_convergence()` for *glmmTMB* models. +## Bug fixes + +* `check_outliers()` did not warn that no numeric variables were found when only + the response variable was numeric, but all relevant predictors were not. + # performance 0.12.4 ## Changes diff --git a/R/check_outliers.R b/R/check_outliers.R index 7729f9931..73b3f6d61 100644 --- a/R/check_outliers.R +++ b/R/check_outliers.R @@ -378,38 +378,16 @@ check_outliers.default <- function(x, # Check args if (all(method == "all")) { method <- c( - "zscore_robust", - "iqr", - "ci", - "cook", - "pareto", - "mahalanobis", - "mahalanobis_robust", - "mcd", - "ics", - "optics", - "lof" + "zscore_robust", "iqr", "ci", "cook", "pareto", "mahalanobis", + "mahalanobis_robust", "mcd", "ics", "optics", "lof" ) } method <- match.arg( method, c( - "zscore", - "zscore_robust", - "iqr", - "ci", - "hdi", - "eti", - "bci", - "cook", - "pareto", - "mahalanobis", - "mahalanobis_robust", - "mcd", - "ics", - "optics", - "lof" + "zscore", "zscore_robust", "iqr", "ci", "hdi", "eti", "bci", "cook", + "pareto", "mahalanobis", "mahalanobis_robust", "mcd", "ics", "optics", "lof" ), several.ok = TRUE ) @@ -427,8 +405,13 @@ check_outliers.default <- function(x, ) } - # Remove non-numerics - my_data <- datawizard::data_select(my_data, select = is.numeric, verbose = FALSE) + # Remove non-numerics, but only check predictors + model_predictors <- unique(insight::find_predictors(model, flatten = TRUE)) + my_data <- datawizard::data_select( + my_data[model_predictors], + select = is.numeric, + verbose = FALSE + ) # check if any data left if (is.null(my_data) || ncol(my_data) == 0) { diff --git a/tests/testthat/test-check_outliers.R b/tests/testthat/test-check_outliers.R index 6aa64516f..f037950c8 100644 --- a/tests/testthat/test-check_outliers.R +++ b/tests/testthat/test-check_outliers.R @@ -361,6 +361,16 @@ test_that("check_outliers with invald data", { }) +test_that("check_outliers on numeric data only", { + data(mtcars) + # all predictors categorical + mtcars$wt <- as.factor(mtcars$wt) + mtcars$mpg <- as.factor(mtcars$mpg) + model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial") + expect_error(check_outliers(model), regex = "No numeric") +}) + + test_that("check_outliers with DHARMa", { skip_if_not_installed("DHARMa") mt1 <- mtcars[, c(1, 3, 4)] From e0e83d918dc2f2c45c49a594817fcaa1da55ee2b Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 24 Nov 2024 17:58:11 +0100 Subject: [PATCH 2/4] fix --- R/check_outliers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/check_outliers.R b/R/check_outliers.R index 73b3f6d61..9234796fb 100644 --- a/R/check_outliers.R +++ b/R/check_outliers.R @@ -406,7 +406,7 @@ check_outliers.default <- function(x, } # Remove non-numerics, but only check predictors - model_predictors <- unique(insight::find_predictors(model, flatten = TRUE)) + model_predictors <- unique(insight::find_predictors(x, flatten = TRUE)) my_data <- datawizard::data_select( my_data[model_predictors], select = is.numeric, From a5ca7167512ff16c8e4f4332c5d6663173d4d965 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 24 Nov 2024 18:38:24 +0100 Subject: [PATCH 3/4] fix --- R/check_outliers.R | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/R/check_outliers.R b/R/check_outliers.R index 9234796fb..161bc65e7 100644 --- a/R/check_outliers.R +++ b/R/check_outliers.R @@ -392,6 +392,9 @@ check_outliers.default <- function(x, several.ok = TRUE ) + # Get model information + m_info <- insight::model_info(x) + # Get data my_data <- insight::get_data(x, verbose = FALSE) @@ -405,8 +408,12 @@ check_outliers.default <- function(x, ) } - # Remove non-numerics, but only check predictors - model_predictors <- unique(insight::find_predictors(x, flatten = TRUE)) + # Remove non-numerics, but in case of binomial, only check predictors + if (m_info$is_binomial) { + model_predictors <- unique(insight::find_predictors(x, flatten = TRUE)) + } else { + model_predictors <- colnames(my_data) + } my_data <- datawizard::data_select( my_data[model_predictors], select = is.numeric, @@ -451,7 +458,7 @@ check_outliers.default <- function(x, } # Cook - if ("cook" %in% method && !insight::model_info(x)$is_bayesian && !inherits(x, "bife")) { + if ("cook" %in% method && !m_info$is_bayesian && !inherits(x, "bife")) { data_cook <- .check_outliers_cook( x, threshold = thresholds$cook @@ -491,7 +498,7 @@ check_outliers.default <- function(x, } # Pareto - if ("pareto" %in% method && insight::model_info(x)$is_bayesian) { + if ("pareto" %in% method && m_info(x)$is_bayesian) { data_pareto <- .check_outliers_pareto( x, threshold = thresholds$pareto From 1c2d62879353104e4e17dcb58421a344b49c1c13 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 24 Nov 2024 18:49:33 +0100 Subject: [PATCH 4/4] fix --- R/check_outliers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/check_outliers.R b/R/check_outliers.R index 161bc65e7..ffb2d633f 100644 --- a/R/check_outliers.R +++ b/R/check_outliers.R @@ -498,7 +498,7 @@ check_outliers.default <- function(x, } # Pareto - if ("pareto" %in% method && m_info(x)$is_bayesian) { + if ("pareto" %in% method && m_info$is_bayesian) { data_pareto <- .check_outliers_pareto( x, threshold = thresholds$pareto