From 3f7af632ee824acf196e2d6d5889422dafb3551b Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sun, 4 Aug 2024 10:00:21 +0200
Subject: [PATCH] update print, docs

---
 DESCRIPTION                        |   2 +-
 R/check_dag.R                      | 103 +++++++++++++++++++----------
 man/check_dag.Rd                   |  54 ++++++++++++---
 tests/testthat/_snaps/check_dag.md |  78 ++++++++++++----------
 tests/testthat/test-check_dag.R    |   9 +++
 5 files changed, 166 insertions(+), 80 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9158e5179..494c87973 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: performance
 Title: Assessment of Regression Models Performance
-Version: 0.12.2.5
+Version: 0.12.2.6
 Authors@R:
     c(person(given = "Daniel",
              family = "Lüdecke",
diff --git a/R/check_dag.R b/R/check_dag.R
index 842508f10..451499de1 100644
--- a/R/check_dag.R
+++ b/R/check_dag.R
@@ -1,14 +1,18 @@
 #' @title Check correct model adjustment for identifying causal effects
 #' @name check_dag
 #'
-#' @description `check_dag()` checks if a model is correctly adjusted for
-#' identifying causal effects. It returns a **dagitty** object that can be
-#' visualized with `plot()`. `check_dag()` is a convenient wrapper around
-#' `ggdag::dagify()`, which used `dagitty::adjustmentSets()` and
-#' `dagitty::adjustedNodes()` to check if the model is correctly adjusted for
-#' identifying causal (direct and total) effects of a given exposure on the
-#' outcome. `as.dag()` is a small convenient function to return the dagitty-string,
-#' which can be used for the online-tool from the dagitty-website.
+#' @description The purpose of `check_dag()` is to build, check and visualize
+#' your model based on directed acyclic graphs (DAG). The function checks if a
+#' model is correctly adjusted for identifying specific relationships of
+#' variables, especiall directed (maybe also "causal") effects for given
+#' exposures on an outcome. It returns a **dagitty** object that can be
+#' visualized with `plot()`.
+#'
+#' `check_dag()` is a convenient wrapper around `ggdag::dagify()`,
+#' `dagitty::adjustmentSets()` and `dagitty::adjustedNodes()` to check correct
+#' adjustment sets. `as.dag()` is a small convenient function to return the
+#' dagitty-string, which can be used for the online-tool from the
+#' dagitty-website.
 #'
 #' @param ... One or more formulas, which are converted into **dagitty** syntax.
 #' First element may also be model object. If a model objects is provided, its
@@ -29,7 +33,8 @@
 #' `"all"` (default), `"total"`, or `"direct"`.
 #' @param x An object of class `check_dag`, as returned by `check_dag()`.
 #'
-#' @details
+#' @section Specifying the DAG formulas:
+#'
 #' The formulas have following syntax:
 #'
 #' - One-directed paths: On the *left-hand-side* is the name of the variables
@@ -40,12 +45,36 @@
 #'
 #' - Bi-directed paths: Use `~~` to indicate bi-directed paths. For example,
 #'   `Y ~~ X` indicates that the path between `Y` and `X` is bi-directed, and
-#'   the arrow points in both directions.
+#'   the arrow points in both directions. Bi-directed paths often indicate
+#'   unmeasured cause, or umeasured confounding, of the two involved variables.
+#'
+#' @section Why are DAGs important - the Table 2 fallacy:
+#'
+#' Correctly thinking about and identifying the relationships between variables
+#' is important when it comes to reporting coefficients from regression models
+#' that mutually adjust for "confounders" or include covariates. Different
+#' coefficients might have different interpretations, depending on their
+#' relationship to other variables in the model. Sometimes, a regression
+#' coefficient represents the direct effect of an exposure on an outcome, but
+#' sometimes it must be interpreted as total effect, due to the involvement
+#' of mediating effects. This problem is also called "Table 2 fallacy"
+#' (_Westreich and Greenland 2013_). DAG helps visualizing and thereby focusing
+#' the relationships of variables in a regression model to detect missing
+#' adjustments or over-adjustment.
 #'
 #' @return An object of class `check_dag`, which can be visualized with `plot()`.
 #' The returned object also inherits from class `dagitty` and thus can be used
 #' with all functions from the **ggdag** and **dagitty** packages.
 #'
+#' @references
+#' - Rohrer, J. M. (2018). Thinking clearly about correlations and causation:
+#'   Graphical causal models for observational data. Advances in Methods and
+#'   Practices in Psychological Science, 1(1), 27–42. \doi{10.1177/2515245917745629}
+#'
+#' - Westreich, D., & Greenland, S. (2013). The Table 2 Fallacy: Presenting and
+#'   Interpreting Confounder and Modifier Coefficients. American Journal of
+#'   Epidemiology, 177(4), 292–298. \doi{10.1093/aje/kws412}
+#'
 #' @examplesIf require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE)
 #' # no adjustment needed
 #' check_dag(
@@ -203,6 +232,7 @@ check_dag.default <- function(...,
   attr(dag, "outcome") <- outcome
   attr(dag, "exposure") <- exposure
   attr(dag, "adjusted") <- adjusted
+  attr(dag, "adjustment_sets") <- checks[[1]]$current_adjustments
   attr(dag, "check_direct") <- insight::compact_list(checks[[1]])
   attr(dag, "check_total") <- insight::compact_list(checks[[2]])
 
@@ -236,6 +266,29 @@ as.dag <- function(x, ...) {
 print.check_dag <- function(x, ...) {
   effect <- attributes(x)$effect
 
+  # header
+  cat(insight::print_color("# Check for correct adjustment sets", "blue"))
+
+  # model specification
+  exposure_outcome_text <- paste0(
+    "\n- Outcome: ", attributes(x)$outcome,
+    "\n- Exposure", ifelse(length(attributes(x)$exposure) > 1, "s", ""),
+    ": ", datawizard::text_concatenate(attributes(x)$exposure)
+  )
+
+  # add information on adjustments
+  if (!is.null(attributes(x)$adjustment_sets)) {
+    exposure_outcome_text <- paste0(
+      exposure_outcome_text,
+      "\n- Adjustment",
+      ifelse(length(attributes(x)$adjustment_sets) > 1, "s", ""),
+      ": ", datawizard::text_concatenate(attributes(x)$adjustment_sets)
+    )
+  }
+
+  cat(exposure_outcome_text)
+  cat("\n\n")
+
   for (i in c("direct", "total")) {
     if (i == "direct") {
       out <- attributes(x)$check_direct
@@ -243,30 +296,13 @@ print.check_dag <- function(x, ...) {
       out <- attributes(x)$check_total
     }
 
-    exposure_outcome_text <- paste0(
-      "\n- Outcome: ", attributes(x)$outcome,
-      "\n- Exposure", ifelse(length(attributes(x)$exposure) > 1, "s", ""),
-      ": ", datawizard::text_concatenate(attributes(x)$exposure)
-    )
-
-    # add information on adjustments
-    if (!is.null(out$current_adjustments)) {
-      exposure_outcome_text <- paste0(
-        exposure_outcome_text,
-        "\n- Adjustment",
-        ifelse(length(out$current_adjustments) > 1, "s", ""),
-        ": ", datawizard::text_concatenate(out$current_adjustments)
-      )
-    }
-
     # build message with check results for effects -----------------------
 
     if (isTRUE(out$adjustment_not_needed)) {
       # Scenario 1: no adjustment needed
       msg <- paste0(
         insight::color_text("Model is correctly specified.", "green"),
-        exposure_outcome_text,
-        "\n\nNo adjustment needed to estimate the ", i, " effect of ",
+        "\nNo adjustment needed to estimate the ", i, " effect of ",
         datawizard::text_concatenate(attributes(x)$exposure, enclose = "`"),
         " on `",
         attributes(x)$outcome,
@@ -276,8 +312,7 @@ print.check_dag <- function(x, ...) {
       # Scenario 2: incorrectly adjusted, adjustments where none is allowed
       msg <- paste0(
         insight::color_text("Incorrectly adjusted!", "red"),
-        exposure_outcome_text,
-        "\n\nTo estimate the ", i, " effect, do ",
+        "\nTo estimate the ", i, " effect, do ",
         insight::color_text("not", "italic"),
         " adjust for ",
         datawizard::text_concatenate(out$current_adjustments, enclose = "`"),
@@ -287,8 +322,7 @@ print.check_dag <- function(x, ...) {
       # Scenario 3: missing adjustments
       msg <- paste0(
         insight::color_text("Incorrectly adjusted!", "red"),
-        exposure_outcome_text,
-        "\n\nTo estimate the ", i, " effect, ",
+        "\nTo estimate the ", i, " effect, ",
         insight::color_text("also", "italic"),
         " adjust for ",
         insight::color_text(datawizard::text_concatenate(out$minimal_adjustments, enclose = "`"), "yellow"),
@@ -306,14 +340,13 @@ print.check_dag <- function(x, ...) {
       # Scenario 4: correct adjustment
       msg <- paste0(
         insight::color_text("Model is correctly specified.", "green"),
-        exposure_outcome_text,
-        "\n\nAll minimal sufficient adjustments to estimate the ", i, " effect were done."
+        "\nAll minimal sufficient adjustments to estimate the ", i, " effect were done."
       )
     }
 
     if (effect %in% c("all", i)) {
       cat(insight::print_color(insight::format_message(
-        paste0("# Correct adjustments for identifying {.i ", i, "} effects\n\n")
+        paste0("Identification of {.i ", i, "} effects\n\n")
       ), "blue"))
       cat(msg)
       cat("\n\n")
diff --git a/man/check_dag.Rd b/man/check_dag.Rd
index 281a62ef9..2b259f478 100644
--- a/man/check_dag.Rd
+++ b/man/check_dag.Rd
@@ -48,16 +48,22 @@ The returned object also inherits from class \code{dagitty} and thus can be used
 with all functions from the \strong{ggdag} and \strong{dagitty} packages.
 }
 \description{
-\code{check_dag()} checks if a model is correctly adjusted for
-identifying causal effects. It returns a \strong{dagitty} object that can be
-visualized with \code{plot()}. \code{check_dag()} is a convenient wrapper around
-\code{ggdag::dagify()}, which used \code{dagitty::adjustmentSets()} and
-\code{dagitty::adjustedNodes()} to check if the model is correctly adjusted for
-identifying causal (direct and total) effects of a given exposure on the
-outcome. \code{as.dag()} is a small convenient function to return the dagitty-string,
-which can be used for the online-tool from the dagitty-website.
+The purpose of \code{check_dag()} is to build, check and visualize
+your model based on directed acyclic graphs (DAG). The function checks if a
+model is correctly adjusted for identifying specific relationships of
+variables, especiall directed (maybe also "causal") effects for given
+exposures on an outcome. It returns a \strong{dagitty} object that can be
+visualized with \code{plot()}.
+
+\code{check_dag()} is a convenient wrapper around \code{ggdag::dagify()},
+\code{dagitty::adjustmentSets()} and \code{dagitty::adjustedNodes()} to check correct
+adjustment sets. \code{as.dag()} is a small convenient function to return the
+dagitty-string, which can be used for the online-tool from the
+dagitty-website.
 }
-\details{
+\section{Specifying the DAG formulas}{
+
+
 The formulas have following syntax:
 \itemize{
 \item One-directed paths: On the \emph{left-hand-side} is the name of the variables
@@ -67,9 +73,27 @@ to come from. For example, the formula \code{Y ~ X1 + X2}, paths directed from
 both \code{X1} and \code{X2} to \code{Y} are assumed.
 \item Bi-directed paths: Use \verb{~~} to indicate bi-directed paths. For example,
 \code{Y ~~ X} indicates that the path between \code{Y} and \code{X} is bi-directed, and
-the arrow points in both directions.
+the arrow points in both directions. Bi-directed paths often indicate
+unmeasured cause, or umeasured confounding, of the two involved variables.
 }
 }
+
+\section{Why are DAGs important - the Table 2 fallacy}{
+
+
+Correctly thinking about and identifying the relationships between variables
+is important when it comes to reporting coefficients from regression models
+that mutually adjust for "confounders" or include covariates. Different
+coefficients might have different interpretations, depending on their
+relationship to other variables in the model. Sometimes, a regression
+coefficient represents the direct effect of an exposure on an outcome, but
+sometimes it must be interpreted as total effect, due to the involvement
+of mediating effects. This problem is also called "Table 2 fallacy"
+(\emph{Westreich and Greenland 2013}). DAG helps visualizing and thereby focusing
+the relationships of variables in a regression model to detect missing
+adjustments or over-adjustment.
+}
+
 \examples{
 \dontshow{if (require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 # no adjustment needed
@@ -115,3 +139,13 @@ dag
 plot(dag)
 \dontshow{\}) # examplesIf}
 }
+\references{
+\itemize{
+\item Rohrer, J. M. (2018). Thinking clearly about correlations and causation:
+Graphical causal models for observational data. Advances in Methods and
+Practices in Psychological Science, 1(1), 27–42. \doi{10.1177/2515245917745629}
+\item Westreich, D., & Greenland, S. (2013). The Table 2 Fallacy: Presenting and
+Interpreting Confounder and Modifier Coefficients. American Journal of
+Epidemiology, 177(4), 292–298. \doi{10.1093/aje/kws412}
+}
+}
diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md
index 16432d033..d3b44a397 100644
--- a/tests/testthat/_snaps/check_dag.md
+++ b/tests/testthat/_snaps/check_dag.md
@@ -3,20 +3,18 @@
     Code
       print(dag)
     Output
-      # Correct adjustments for identifying direct effects
-      
-      Model is correctly specified.
+      # Check for correct adjustment sets
       - Outcome: y
       - Exposure: x
       
+      Identification of direct effects
+      
+      Model is correctly specified.
       No adjustment needed to estimate the direct effect of `x` on `y`.
       
-      # Correct adjustments for identifying total effects
+      Identification of total effects
       
       Model is correctly specified.
-      - Outcome: y
-      - Exposure: x
-      
       No adjustment needed to estimate the total effect of `x` on `y`.
       
 
@@ -25,22 +23,19 @@
     Code
       print(dag)
     Output
-      # Correct adjustments for identifying direct effects
-      
-      Model is correctly specified.
+      # Check for correct adjustment sets
       - Outcome: y
       - Exposure: x
       - Adjustment: b
       
+      Identification of direct effects
+      
+      Model is correctly specified.
       All minimal sufficient adjustments to estimate the direct effect were done.
       
-      # Correct adjustments for identifying total effects
+      Identification of total effects
       
       Model is correctly specified.
-      - Outcome: y
-      - Exposure: x
-      - Adjustment: b
-      
       All minimal sufficient adjustments to estimate the total effect were done.
       
 
@@ -49,21 +44,19 @@
     Code
       print(dag)
     Output
-      # Correct adjustments for identifying direct effects
-      
-      Incorrectly adjusted!
+      # Check for correct adjustment sets
       - Outcome: y
       - Exposure: x
       
+      Identification of direct effects
+      
+      Incorrectly adjusted!
       To estimate the direct effect, also adjust for `b`.
       Currently, the model does not adjust for any variables.
       
-      # Correct adjustments for identifying total effects
+      Identification of total effects
       
       Incorrectly adjusted!
-      - Outcome: y
-      - Exposure: x
-      
       To estimate the total effect, also adjust for `b`.
       Currently, the model does not adjust for any variables.
       
@@ -73,23 +66,43 @@
     Code
       print(dag)
     Output
-      # Correct adjustments for identifying direct effects
-      
-      Incorrectly adjusted!
+      # Check for correct adjustment sets
       - Outcome: y
       - Exposure: x
       - Adjustment: c
       
+      Identification of direct effects
+      
+      Incorrectly adjusted!
       To estimate the direct effect, also adjust for `b` and `c`.
       Currently, the model currently only adjusts for `c`.
       
-      # Correct adjustments for identifying total effects
+      Identification of total effects
       
       Incorrectly adjusted!
+      To estimate the total effect, also adjust for `b` and `c`.
+      Currently, the model currently only adjusts for `c`.
+      
+
+---
+
+    Code
+      print(dag)
+    Output
+      # Check for correct adjustment sets
       - Outcome: y
       - Exposure: x
       - Adjustment: c
       
+      Identification of direct effects
+      
+      Incorrectly adjusted!
+      To estimate the direct effect, also adjust for `b` and `c`.
+      Currently, the model currently only adjusts for `c`.
+      
+      Identification of total effects
+      
+      Incorrectly adjusted!
       To estimate the total effect, also adjust for `b` and `c`.
       Currently, the model currently only adjusts for `c`.
       
@@ -99,22 +112,19 @@
     Code
       print(dag)
     Output
-      # Correct adjustments for identifying direct effects
-      
-      Model is correctly specified.
+      # Check for correct adjustment sets
       - Outcome: mpg
       - Exposure: wt
       - Adjustments: cyl, disp and gear
       
+      Identification of direct effects
+      
+      Model is correctly specified.
       All minimal sufficient adjustments to estimate the direct effect were done.
       
-      # Correct adjustments for identifying total effects
+      Identification of total effects
       
       Model is correctly specified.
-      - Outcome: mpg
-      - Exposure: wt
-      - Adjustments: cyl, disp and gear
-      
       All minimal sufficient adjustments to estimate the total effect were done.
       
 
diff --git a/tests/testthat/test-check_dag.R b/tests/testthat/test-check_dag.R
index efc7c95bf..7d834b60a 100644
--- a/tests/testthat/test-check_dag.R
+++ b/tests/testthat/test-check_dag.R
@@ -31,6 +31,15 @@ test_that("check_dag", {
     adjusted = "c"
   )
   expect_snapshot(print(dag))
+  dag <- check_dag(
+    y ~ x + b + c + d,
+    x ~ b,
+    x ~ c,
+    outcome = "y",
+    exposure = "x",
+    adjusted = "c"
+  )
+  expect_snapshot(print(dag))
   data(mtcars)
   m <- lm(mpg ~ wt + gear + disp + cyl, data = mtcars)
   dag <- check_dag(