From 6f153beaf04f1f907ee1c8f6baf8bcd9d4ec99c8 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 4 Feb 2024 21:46:16 +0100 Subject: [PATCH 1/6] difficult-to-diagnose errors using "difftime" response in a linear model Fixes #678 --- DESCRIPTION | 2 +- NEWS.md | 3 +++ R/check_collinearity.R | 19 ++++++++++++++----- R/check_outliers.R | 16 ++++++++++++++-- man/check_outliers.Rd | 3 +++ tests/testthat/test-check_collinearity.R | 6 ++++++ tests/testthat/test-check_outliers.R | 13 +++++++++++++ 7 files changed, 54 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9391ecf34..864dad20b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.10.8.11 +Version: 0.10.8.12 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 0cc49b785..d9696e082 100644 --- a/NEWS.md +++ b/NEWS.md @@ -26,6 +26,9 @@ * `check_outliers()` now properly accept the `percentage_central` argument when using the `"mcd"` method. +* Fixed edge cases in `check_collinearity()` and `check_outliers()` for models + with response variables of classes `Date`, `POSIXct`, `POSIXlt` or `difftime`. + # performance 0.10.8 ## Changes diff --git a/R/check_collinearity.R b/R/check_collinearity.R index 2add49126..2a5437688 100644 --- a/R/check_collinearity.R +++ b/R/check_collinearity.R @@ -404,7 +404,18 @@ check_collinearity.zerocount <- function(x, .check_collinearity <- function(x, component, ci = 0.95, verbose = TRUE) { - v <- insight::get_varcov(x, component = component, verbose = FALSE) + v <- .safe(insight::get_varcov(x, component = component, verbose = FALSE)) + + # sanity check + if (is.null(v)) { + if (isTRUE(verbose)) { + insight::format_alert( + sprintf("Could not extract the variance-covariance matrix for the %s component of the model.", component) + ) + } + return(NULL) + } + term_assign <- .term_assignments(x, component, verbose = verbose) # any assignment found? @@ -432,10 +443,8 @@ check_collinearity.zerocount <- function(x, if (insight::has_intercept(x)) { v <- v[-1, -1] term_assign <- term_assign[-1] - } else { - if (isTRUE(verbose)) { - insight::format_alert("Model has no intercept. VIFs may not be sensible.") - } + } else if (isTRUE(verbose)) { + insight::format_alert("Model has no intercept. VIFs may not be sensible.") } f <- insight::find_formula(x) diff --git a/R/check_outliers.R b/R/check_outliers.R index f3bf7f5a9..40ec737e8 100644 --- a/R/check_outliers.R +++ b/R/check_outliers.R @@ -23,6 +23,7 @@ #' 'Details'). If a numeric value is given, it will be used as the threshold #' for any of the method run. #' @param ID Optional, to report an ID column along with the row number. +#' @param verbose Toggle warnings. #' @param ... When `method = "ics"`, further arguments in `...` are passed #' down to [ICSOutlier::ics.outlier()]. When `method = "mahalanobis"`, #' they are passed down to [stats::mahalanobis()]. `percentage_central` can @@ -348,6 +349,7 @@ check_outliers.default <- function(x, method = c("cook", "pareto"), threshold = NULL, ID = NULL, + verbose = TRUE, ...) { # Check args if (all(method == "all")) { @@ -391,8 +393,18 @@ check_outliers.default <- function(x, # Get data my_data <- insight::get_data(x, verbose = FALSE) + # sanity check for date, POSIXt and difftime variables + if (any(vapply(my_data, inherits, FUN.VALUE = logical(1), what = c("Date", "POSIXt", "difftime"))) && verbose) { + insight::format_alert("Date variables are not supported for outliers detection. These will be ignored.") + } + # Remove non-numerics - my_data <- datawizard::data_select(my_data, select = is.numeric) + my_data <- datawizard::data_select(my_data, select = is.numeric, verbose = FALSE) + + # check if any data left + if (is.null(my_data) || ncol(my_data) == 0) { + insight::format_error("No numeric variables found. No data to check for outliers.") + } # Thresholds if (is.null(threshold)) { @@ -409,7 +421,7 @@ check_outliers.default <- function(x, ) } - if (!missing(ID)) { + if (!missing(ID) && verbose) { insight::format_warning(paste0("ID argument not supported for model objects of class `", class(x)[1], "`.")) } diff --git a/man/check_outliers.Rd b/man/check_outliers.Rd index 8dc0326bb..22b88228c 100644 --- a/man/check_outliers.Rd +++ b/man/check_outliers.Rd @@ -14,6 +14,7 @@ check_outliers(x, ...) method = c("cook", "pareto"), threshold = NULL, ID = NULL, + verbose = TRUE, ... ) @@ -41,6 +42,8 @@ considered as outlier. If \code{NULL}, default values will be used (see for any of the method run.} \item{ID}{Optional, to report an ID column along with the row number.} + +\item{verbose}{Toggle warnings.} } \value{ A logical vector of the detected outliers with a nice printing diff --git a/tests/testthat/test-check_collinearity.R b/tests/testthat/test-check_collinearity.R index 3d68b87ac..6864ebcaf 100644 --- a/tests/testthat/test-check_collinearity.R +++ b/tests/testthat/test-check_collinearity.R @@ -211,3 +211,9 @@ test_that("check_collinearity, hurdle/zi models w/o zi-formula", { ) expect_equal(out$VIF, c(1.05772, 1.05772, 1.06587, 1.06587), tolerance = 1e-4) }) + +test_that("check_collinearity, invalid data", { + dd <- data.frame(y = as.difftime(0:5, units = "days")) + m1 <- lm(y ~ 1, data = dd) + expect_message(expect_null(check_collinearity(m1)), "Could not extract") +}) diff --git a/tests/testthat/test-check_outliers.R b/tests/testthat/test-check_outliers.R index 259f3eac9..8840e26bf 100644 --- a/tests/testthat/test-check_outliers.R +++ b/tests/testthat/test-check_outliers.R @@ -329,3 +329,16 @@ test_that("cook multiple methods which", { c("setosa", "versicolor", "virginica") ) }) + + +test_that("check_outliers with invald data", { + dd <- data.frame(y = as.difftime(0:5, units = "days")) + m1 <- lm(y ~ 1, data = dd) + expect_error( + expect_message( + check_outliers(m1), + regex = "Date variables are not supported" + ), + regex = "No numeric variables found" + ) +}) From 8d9299ea9d9eb6d14a81b011eed4c6e6956f1937 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 4 Feb 2024 23:44:32 +0100 Subject: [PATCH 2/6] clarify msg --- R/check_collinearity.R | 5 ++++- R/check_model.R | 9 ++++++--- R/check_outliers.R | 7 ++++++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/R/check_collinearity.R b/R/check_collinearity.R index 2a5437688..ab0a6e464 100644 --- a/R/check_collinearity.R +++ b/R/check_collinearity.R @@ -410,7 +410,10 @@ check_collinearity.zerocount <- function(x, if (is.null(v)) { if (isTRUE(verbose)) { insight::format_alert( - sprintf("Could not extract the variance-covariance matrix for the %s component of the model.", component) + paste( + sprintf("Could not extract the variance-covariance matrix for the %s component of the model.", component), + "Please try to run `vcov(model)`, which may help identifying the problem." + ) ) } return(NULL) diff --git a/R/check_model.R b/R/check_model.R index 2a8672823..7e8125681 100644 --- a/R/check_model.R +++ b/R/check_model.R @@ -192,12 +192,15 @@ check_model.default <- function(x, suppressWarnings(.check_assumptions_glm(x, minfo, verbose, ...)) }, error = function(e) { - NULL + e } ) - if (is.null(ca)) { - insight::format_error(paste0("`check_model()` not implemented for models of class `", class(x)[1], "` yet.")) + if (inherits(ca, c("error", "simpleError"))) { + insight::format_error( + paste("\n`check_model()` returned following error:", ca$message), + paste0("\nIf the error message does not help identifying your problem, another reason why `check_model()` failed might be that models of class `", class(x)[1], "` are not yet supported.") # nolint + ) } # try to find sensible default for "type" argument diff --git a/R/check_outliers.R b/R/check_outliers.R index 40ec737e8..574201454 100644 --- a/R/check_outliers.R +++ b/R/check_outliers.R @@ -395,7 +395,12 @@ check_outliers.default <- function(x, # sanity check for date, POSIXt and difftime variables if (any(vapply(my_data, inherits, FUN.VALUE = logical(1), what = c("Date", "POSIXt", "difftime"))) && verbose) { - insight::format_alert("Date variables are not supported for outliers detection. These will be ignored.") + insight::format_alert( + paste( + "Date variables are not supported for outliers detection. These will be ignored.", + "Make sure any date variables are converted to numeric or factor {.b before} fitting the model." + ) + ) } # Remove non-numerics From a2b5e71ef75939e527a408fb7dc6c947c5a6c686 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 4 Feb 2024 23:45:54 +0100 Subject: [PATCH 3/6] add test --- tests/testthat/test-check_model.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/testthat/test-check_model.R b/tests/testthat/test-check_model.R index 6b4ce4db2..1aae35d77 100644 --- a/tests/testthat/test-check_model.R +++ b/tests/testthat/test-check_model.R @@ -36,3 +36,15 @@ test_that("`check_outliers()` works if convergence issues", { x <- check_outliers(m, verbose = FALSE) expect_s3_class(x, "check_outliers") }) + +test_that("`check_model()` for invalid models", { + dd <- data.frame(y = as.difftime(0:5, units = "days")) + m1 <- lm(y ~ 1, data = dd) + expect_error( + expect_message( + check_model(m1), + regex = "Date variables are not supported" + ), + regex = "No numeric variables found" + ) +}) From ec6eeb19002b89da2eb586c0f751971a230a7822 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 5 Feb 2024 00:18:50 +0100 Subject: [PATCH 4/6] fix msg --- R/check_collinearity.R | 2 +- R/check_model.R | 5 ++++- tests/testthat/test-check_collinearity.R | 2 +- tests/testthat/test-check_model.R | 8 +------- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/R/check_collinearity.R b/R/check_collinearity.R index ab0a6e464..90909d364 100644 --- a/R/check_collinearity.R +++ b/R/check_collinearity.R @@ -404,7 +404,7 @@ check_collinearity.zerocount <- function(x, .check_collinearity <- function(x, component, ci = 0.95, verbose = TRUE) { - v <- .safe(insight::get_varcov(x, component = component, verbose = FALSE)) + v <- insight::get_varcov(x, component = component, verbose = FALSE) # sanity check if (is.null(v)) { diff --git a/R/check_model.R b/R/check_model.R index 7e8125681..ace81d275 100644 --- a/R/check_model.R +++ b/R/check_model.R @@ -197,8 +197,11 @@ check_model.default <- function(x, ) if (inherits(ca, c("error", "simpleError"))) { + pattern <- "(\n|\\s{2,})" + replacement <- " " + cleaned_string <- gsub(pattern, replacement, ca$message) insight::format_error( - paste("\n`check_model()` returned following error:", ca$message), + paste("`check_model()` returned following error:", cleaned_string), paste0("\nIf the error message does not help identifying your problem, another reason why `check_model()` failed might be that models of class `", class(x)[1], "` are not yet supported.") # nolint ) } diff --git a/tests/testthat/test-check_collinearity.R b/tests/testthat/test-check_collinearity.R index 6864ebcaf..b2b8ae740 100644 --- a/tests/testthat/test-check_collinearity.R +++ b/tests/testthat/test-check_collinearity.R @@ -215,5 +215,5 @@ test_that("check_collinearity, hurdle/zi models w/o zi-formula", { test_that("check_collinearity, invalid data", { dd <- data.frame(y = as.difftime(0:5, units = "days")) m1 <- lm(y ~ 1, data = dd) - expect_message(expect_null(check_collinearity(m1)), "Could not extract") + expect_message(expect_null(check_collinearity(m1)), "Can't extract variance-covariance matrix") }) diff --git a/tests/testthat/test-check_model.R b/tests/testthat/test-check_model.R index 1aae35d77..c225bc140 100644 --- a/tests/testthat/test-check_model.R +++ b/tests/testthat/test-check_model.R @@ -40,11 +40,5 @@ test_that("`check_outliers()` works if convergence issues", { test_that("`check_model()` for invalid models", { dd <- data.frame(y = as.difftime(0:5, units = "days")) m1 <- lm(y ~ 1, data = dd) - expect_error( - expect_message( - check_model(m1), - regex = "Date variables are not supported" - ), - regex = "No numeric variables found" - ) + expect_error(check_model(m1)) }) From ed4af986d6bd38e52bea8e1c61bb72cbc7d3bcaf Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 5 Feb 2024 00:27:25 +0100 Subject: [PATCH 5/6] run tests only on latest insight --- tests/testthat/test-check_collinearity.R | 1 + tests/testthat/test-check_model.R | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/testthat/test-check_collinearity.R b/tests/testthat/test-check_collinearity.R index b2b8ae740..02e3d5d06 100644 --- a/tests/testthat/test-check_collinearity.R +++ b/tests/testthat/test-check_collinearity.R @@ -213,6 +213,7 @@ test_that("check_collinearity, hurdle/zi models w/o zi-formula", { }) test_that("check_collinearity, invalid data", { + skip_if(packageVersion("insight") < "0.19.8.2") dd <- data.frame(y = as.difftime(0:5, units = "days")) m1 <- lm(y ~ 1, data = dd) expect_message(expect_null(check_collinearity(m1)), "Can't extract variance-covariance matrix") diff --git a/tests/testthat/test-check_model.R b/tests/testthat/test-check_model.R index c225bc140..cc512ed78 100644 --- a/tests/testthat/test-check_model.R +++ b/tests/testthat/test-check_model.R @@ -38,6 +38,7 @@ test_that("`check_outliers()` works if convergence issues", { }) test_that("`check_model()` for invalid models", { + skip_if(packageVersion("insight") < "0.19.8.2") dd <- data.frame(y = as.difftime(0:5, units = "days")) m1 <- lm(y ~ 1, data = dd) expect_error(check_model(m1)) From 0a60642926f1b278913f7476ba6de91bf7425f31 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 5 Feb 2024 06:54:30 +0100 Subject: [PATCH 6/6] Update test-check_collinearity.R --- tests/testthat/test-check_collinearity.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-check_collinearity.R b/tests/testthat/test-check_collinearity.R index 02e3d5d06..ea41af513 100644 --- a/tests/testthat/test-check_collinearity.R +++ b/tests/testthat/test-check_collinearity.R @@ -216,5 +216,5 @@ test_that("check_collinearity, invalid data", { skip_if(packageVersion("insight") < "0.19.8.2") dd <- data.frame(y = as.difftime(0:5, units = "days")) m1 <- lm(y ~ 1, data = dd) - expect_message(expect_null(check_collinearity(m1)), "Can't extract variance-covariance matrix") + expect_error(check_collinearity(m1), "Can't extract variance-covariance matrix") })