easystats · strengejacke · May 18, 2024 · May 16, 2024 · May 16, 2024 · May 16, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: performance
 Title: Assessment of Regression Models Performance
-Version: 0.11.0.8
+Version: 0.11.0.9
 Authors@R: 
     c(person(given = "Daniel",
              family = "Lüdecke",
@@ -154,3 +154,4 @@ Config/Needs/website:
     r-lib/pkgdown,
     easystats/easystatstemplate
 Config/rcmdcheck/ignore-inconsequential-notes: true
+Remotes: easystats/insight
diff --git a/NEWS.md b/NEWS.md
@@ -5,6 +5,10 @@
 * Aliases `posterior_predictive_check()` and `check_posterior_predictions()` for
   `check_predictions()` are deprecated.
 
+* Arguments named `group` or `group_by` will be deprecated in a future release.
+  Please use `by` instead. This affects `check_heterogeneity_bias()` in
+  *performance*.
+
 ## General
 
 * Improved documentation and new vignettes added.

diff --git a/R/check_heterogeneity_bias.R b/R/check_heterogeneity_bias.R
@@ -8,9 +8,10 @@
 #' @param select Character vector (or formula) with names of variables to select
 #'   that should be checked. If `x` is a mixed model object, this argument
 #'   will be ignored.
-#' @param group Character vector (or formula) with the name of the variable that
+#' @param by Character vector (or formula) with the name of the variable that
 #'   indicates the group- or cluster-ID. If `x` is a model object, this
 #'   argument will be ignored.
+#' @param group Deprecated. Use `by` instead.
 #'
 #' @seealso
 #' For further details, read the vignette
@@ -25,31 +26,38 @@
 #' @examples
 #' data(iris)
 #' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID
-#' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID")
+#' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID")
 #' @export
-check_heterogeneity_bias <- function(x, select = NULL, group = NULL) {
+check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL) {
+  ## TODO: deprecate later
+  if (!is.null(group)) {
+    insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint
+    by <- group
+  }
   if (insight::is_model(x)) {
-    group <- insight::find_random(x, split_nested = TRUE, flatten = TRUE)
-    if (is.null(group)) {
-      insight::format_error("Model is no mixed model. Please provide a mixed model, or a data frame and arguments `select` and `group`.") # nolint
+    by <- insight::find_random(x, split_nested = TRUE, flatten = TRUE)
+    if (is.null(by)) {
+      insight::format_error("Model is no mixed model. Please provide a mixed model, or a data frame and arguments `select` and `by`.") # nolint
     }
     my_data <- insight::get_data(x, source = "mf", verbose = FALSE)
     select <- insight::find_predictors(x, effects = "fixed", component = "conditional", flatten = TRUE)
   } else {
     if (inherits(select, "formula")) {
       select <- all.vars(select)
     }
-    if (inherits(group, "formula")) {
-      group <- all.vars(group)
+    if (inherits(by, "formula")) {
+      by <- all.vars(by)
     }
     my_data <- x
   }
 
-  unique_groups <- .n_unique(my_data[[group]])
-  combinations <- expand.grid(select, group)
+  unique_groups <- .n_unique(my_data[[by]])
+  combinations <- expand.grid(select, by)
 
   result <- Map(function(predictor, id) {
     # demean predictor
+
+    ## FIXME: update argument name later!
     d <- datawizard::demean(my_data, select = predictor, group = id, verbose = FALSE)
 
     # get new names

diff --git a/R/performance_cv.R b/R/performance_cv.R
@@ -69,7 +69,7 @@
       test_pred <- insight::get_predicted(model, ci = NULL, data = data)
       test_resd <- test_resp - test_pred
     } else if (method == "holdout") {
-      train_i <- sample(seq_len(nrow(model_data)), size = round((1 - prop) * nrow(model_data)), replace = FALSE)
+      train_i <- sample.int(nrow(model_data), size = round((1 - prop) * nrow(model_data)), replace = FALSE)
       model_upd <- stats::update(model, data = model_data[train_i, ])
       test_resp <- model_data[-train_i, resp.name]
       test_pred <- insight::get_predicted(model_upd, ci = NULL, data = model_data[-train_i, ])
@@ -105,7 +105,7 @@
      models_upd <- lapply(cv_folds, function(.x) {
        stats::update(model, data = model_data[.x$train, ])
      })
      test_pred <- mapply(function(.x, .y) {
        insight::get_predicted(.y, ci = NULL, data = model_data[.x$test, ])
      }, cv_folds, models_upd, SIMPLIFY = FALSE)
      test_resp <- lapply(cv_folds, function(.x) {
@@ -124,7 +124,7 @@
    R2 <- 1 - MSE / mean((test_resp - mean(test_resp, na.rm = TRUE))^2, na.rm = TRUE)
    out <- data.frame(MSE = MSE, RMSE = RMSE, R2 = R2)
  } else {
    test_resd <- mapply(function(.x, .y) {
      .x - .y
    }, test_resp, test_pred, SIMPLIFY = FALSE)
    MSEs <- sapply(test_resd, function(x) mean(x^2, na.rm = TRUE))
@@ -148,7 +148,7 @@
      paste0(
        "Metric",
        ifelse(length(missing_metrics) > 1, "s '", " '"),
        paste0(missing_metrics, collapse = "', '"),
        "' not yet supported."
      )
    ), colour = "red"))
@@ -158,7 +158,7 @@
 }

 # TODO: implement performance::log_lik() function for deviance/elpd metrics
 #   - When given a model, it should pass it to insight::get_loglikelihood, stats4::logLik, stats::logLik, or rstantools::log_lik
 #   - When given a model and new data, it should pass to rstantools::log_lik if stan
 #     or compute a df like this:
 #       df <- list(residuals = cv_residuals); class(df) <- class(model)

diff --git a/man/check_heterogeneity_bias.Rd b/man/check_heterogeneity_bias.Rd
diff --git a/tests/testthat/test-check_heterogeneity_bias.R b/tests/testthat/test-check_heterogeneity_bias.R
@@ -2,29 +2,29 @@ test_that("check_heterogeneity_bias", {
   data(iris)
   set.seed(123)
   iris$ID <- sample.int(4, nrow(iris), replace = TRUE) # fake-ID
-  out <- check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID")
+  out <- check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID")
   expect_equal(out, c("Sepal.Length", "Petal.Length"), ignore_attr = TRUE)
   expect_output(print(out), "Possible heterogeneity bias due to following predictors: Sepal\\.Length, Petal\\.Length")
 
-  out <- check_heterogeneity_bias(iris, select = ~ Sepal.Length + Petal.Length, group = ~ID)
+  out <- check_heterogeneity_bias(iris, select = ~ Sepal.Length + Petal.Length, by = ~ID)
   expect_equal(out, c("Sepal.Length", "Petal.Length"), ignore_attr = TRUE)
   expect_output(print(out), "Possible heterogeneity bias due to following predictors: Sepal\\.Length, Petal\\.Length")
 
   m <- lm(Sepal.Length ~ Petal.Length + Petal.Width + Species + ID, data = iris)
   expect_error(
-    check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), group = "ID"),
+    check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), by = "ID"),
     regex = "no mixed model"
   )
 
   skip_if_not_installed("lme4")
   m <- lme4::lmer(Sepal.Length ~ Petal.Length + Petal.Width + Species + (1 | ID), data = iris)
-  out <- check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), group = "ID")
+  out <- check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), by = "ID")
   expect_equal(out, c("Petal.Length", "Petal.Width", "Species"), ignore_attr = TRUE)
   expect_output(
     print(out),
     "Possible heterogeneity bias due to following predictors: Petal\\.Length, Petal\\.Width, Species"
   )
-  out <- check_heterogeneity_bias(m, select = ~ Sepal.Length + Petal.Length, group = ~ID)
+  out <- check_heterogeneity_bias(m, select = ~ Sepal.Length + Petal.Length, by = ~ID)
   expect_equal(out, c("Petal.Length", "Petal.Width", "Species"), ignore_attr = TRUE)
   expect_output(
     print(out),