From 74cb000b7fbb89939b858e334759bce287b90ced Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sun, 25 Aug 2024 15:08:03 +0200
Subject: [PATCH 1/3] update `check_heterogeneity_bias()`

---
 DESCRIPTION                     |  2 +-
 NEWS.md                         |  6 ++++++
 R/check_heterogeneity_bias.R    | 34 +++++++++++++++++++++++++++------
 man/check_heterogeneity_bias.Rd | 31 +++++++++++++++++++++++++++---
 4 files changed, 63 insertions(+), 10 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index c0345d5ec..a40d601bd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: performance
 Title: Assessment of Regression Models Performance
-Version: 0.12.2.11
+Version: 0.12.2.12
 Authors@R:
     c(person(given = "Daniel",
              family = "Lüdecke",
diff --git a/NEWS.md b/NEWS.md
index fa8d7451d..efe1a9561 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,6 +4,12 @@
 
 * `check_dag()`, to check DAGs for correct adjustment sets.
 
+## Changes
+
+* `check_heterogeneity_bias()` gets a `nested` argument. Furthermore, `by` can
+  specifiy more than one variable, meaning that nested or cross-classified
+  model designs can also be tested for heterogeneity bias.
+
 # performance 0.12.2
 
 Patch release, to ensure that _performance_ runs with older version of
diff --git a/R/check_heterogeneity_bias.R b/R/check_heterogeneity_bias.R
index 424bf8b5b..3c9b502ce 100644
--- a/R/check_heterogeneity_bias.R
+++ b/R/check_heterogeneity_bias.R
@@ -9,8 +9,24 @@
 #'   that should be checked. If `x` is a mixed model object, this argument
 #'   will be ignored.
 #' @param by Character vector (or formula) with the name of the variable that
-#'   indicates the group- or cluster-ID. If `x` is a model object, this
-#'   argument will be ignored.
+#'   indicates the group- or cluster-ID. For cross-classified or nested designs,
+#'   `by` can also identify two or more variables as group- or cluster-IDs. If
+#'   the data is nested and should be treated as such, set `nested = TRUE`. Else,
+#'   if `by` defines two or more variables and `nested = FALSE`, a cross-classified
+#'   design is assumed. If `x` is a model object, this argument will be ignored.
+#'
+#'   For nested designs, `by` can be:
+#'   - a character vector with the name of the variable that indicates the
+#'     levels, ordered from *highest* level to *lowest* (e.g.
+#'     `by = c("L4", "L3", "L2")`.
+#'   - a character vector with variable names in the format `by = "L4/L3/L2"`,
+#'     where the levels are separated by `/`.
+#'
+#'   See also section _De-meaning for cross-classified designs_ and
+#'   _De-meaning for nested designs_ below.
+#' @param nested Logical, if `TRUE`, the data is treated as nested. If `FALSE`,
+#'   the data is treated as cross-classified. Only applies if `by` contains more
+#'   than one variable.
 #' @param group Deprecated. Use `by` instead.
 #'
 #' @seealso
@@ -28,7 +44,7 @@
 #' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID
 #' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID")
 #' @export
-check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL) {
+check_heterogeneity_bias <- function(x, select = NULL, by = NULL, nested = FALSE, group = NULL) {
   insight::check_if_installed("datawizard", minimum_version = "0.12.0")
 
   ## TODO: deprecate later
@@ -54,8 +70,14 @@ check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL)
     my_data <- x
   }
 
-  unique_groups <- .n_unique(my_data[[by]])
-  combinations <- expand.grid(select, by)
+  # for nested designs?
+  if (nested) {
+    # separate level-indicators with "/", as supported by datawizard
+    by <- paste(by, collapse = "/")
+  }
+
+  # create all combinations that should be checked
+  combinations <- expand.grid(select, by[1])
 
   result <- Map(function(predictor, id) {
     # demean predictor
@@ -72,7 +94,7 @@ check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL)
     } else {
       NULL
     }
-  }, as.character(combinations[[1]]), as.character(combinations[[2]]))
+  }, as.character(combinations[[1]]), by)
 
   out <- unlist(insight::compact_list(result), use.names = FALSE)
 
diff --git a/man/check_heterogeneity_bias.Rd b/man/check_heterogeneity_bias.Rd
index 46f9f70a5..228d26510 100644
--- a/man/check_heterogeneity_bias.Rd
+++ b/man/check_heterogeneity_bias.Rd
@@ -4,7 +4,13 @@
 \alias{check_heterogeneity_bias}
 \title{Check model predictor for heterogeneity bias}
 \usage{
-check_heterogeneity_bias(x, select = NULL, by = NULL, group = NULL)
+check_heterogeneity_bias(
+  x,
+  select = NULL,
+  by = NULL,
+  nested = FALSE,
+  group = NULL
+)
 }
 \arguments{
 \item{x}{A data frame or a mixed model object.}
@@ -14,8 +20,27 @@ that should be checked. If \code{x} is a mixed model object, this argument
 will be ignored.}
 
 \item{by}{Character vector (or formula) with the name of the variable that
-indicates the group- or cluster-ID. If \code{x} is a model object, this
-argument will be ignored.}
+indicates the group- or cluster-ID. For cross-classified or nested designs,
+\code{by} can also identify two or more variables as group- or cluster-IDs. If
+the data is nested and should be treated as such, set \code{nested = TRUE}. Else,
+if \code{by} defines two or more variables and \code{nested = FALSE}, a cross-classified
+design is assumed. If \code{x} is a model object, this argument will be ignored.
+
+For nested designs, \code{by} can be:
+\itemize{
+\item a character vector with the name of the variable that indicates the
+levels, ordered from \emph{highest} level to \emph{lowest} (e.g.
+\code{by = c("L4", "L3", "L2")}.
+\item a character vector with variable names in the format \code{by = "L4/L3/L2"},
+where the levels are separated by \code{/}.
+}
+
+See also section \emph{De-meaning for cross-classified designs} and
+\emph{De-meaning for nested designs} below.}
+
+\item{nested}{Logical, if \code{TRUE}, the data is treated as nested. If \code{FALSE},
+the data is treated as cross-classified. Only applies if \code{by} contains more
+than one variable.}
 
 \item{group}{Deprecated. Use \code{by} instead.}
 }

From 4c2cff09d3469c0d73898548b853e099cea94476 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sun, 25 Aug 2024 16:53:13 +0200
Subject: [PATCH 2/3] Update DESCRIPTION

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a40d601bd..ee591ae47 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -156,4 +156,4 @@ Config/Needs/website:
     r-lib/pkgdown,
     easystats/easystatstemplate
 Config/rcmdcheck/ignore-inconsequential-notes: true
-Remotes: easystats/see
+Remotes: easystats/see, easystats/insight

From 3d3e4e493d39bb652e8ff6b9ce2cbe2ac6728603 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sun, 25 Aug 2024 17:25:09 +0200
Subject: [PATCH 3/3] Update NEWS.md

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index efe1a9561..7be029839 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -7,7 +7,7 @@
 ## Changes
 
 * `check_heterogeneity_bias()` gets a `nested` argument. Furthermore, `by` can
-  specifiy more than one variable, meaning that nested or cross-classified
+  specify more than one variable, meaning that nested or cross-classified
   model designs can also be tested for heterogeneity bias.
 
 # performance 0.12.2