From 3f7af632ee824acf196e2d6d5889422dafb3551b Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 4 Aug 2024 10:00:21 +0200 Subject: [PATCH] update print, docs --- DESCRIPTION | 2 +- R/check_dag.R | 103 +++++++++++++++++++---------- man/check_dag.Rd | 54 ++++++++++++--- tests/testthat/_snaps/check_dag.md | 78 ++++++++++++---------- tests/testthat/test-check_dag.R | 9 +++ 5 files changed, 166 insertions(+), 80 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9158e5179..494c87973 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.5 +Version: 0.12.2.6 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/R/check_dag.R b/R/check_dag.R index 842508f10..451499de1 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -1,14 +1,18 @@ #' @title Check correct model adjustment for identifying causal effects #' @name check_dag #' -#' @description `check_dag()` checks if a model is correctly adjusted for -#' identifying causal effects. It returns a **dagitty** object that can be -#' visualized with `plot()`. `check_dag()` is a convenient wrapper around -#' `ggdag::dagify()`, which used `dagitty::adjustmentSets()` and -#' `dagitty::adjustedNodes()` to check if the model is correctly adjusted for -#' identifying causal (direct and total) effects of a given exposure on the -#' outcome. `as.dag()` is a small convenient function to return the dagitty-string, -#' which can be used for the online-tool from the dagitty-website. +#' @description The purpose of `check_dag()` is to build, check and visualize +#' your model based on directed acyclic graphs (DAG). The function checks if a +#' model is correctly adjusted for identifying specific relationships of +#' variables, especiall directed (maybe also "causal") effects for given +#' exposures on an outcome. It returns a **dagitty** object that can be +#' visualized with `plot()`. +#' +#' `check_dag()` is a convenient wrapper around `ggdag::dagify()`, +#' `dagitty::adjustmentSets()` and `dagitty::adjustedNodes()` to check correct +#' adjustment sets. `as.dag()` is a small convenient function to return the +#' dagitty-string, which can be used for the online-tool from the +#' dagitty-website. #' #' @param ... One or more formulas, which are converted into **dagitty** syntax. #' First element may also be model object. If a model objects is provided, its @@ -29,7 +33,8 @@ #' `"all"` (default), `"total"`, or `"direct"`. #' @param x An object of class `check_dag`, as returned by `check_dag()`. #' -#' @details +#' @section Specifying the DAG formulas: +#' #' The formulas have following syntax: #' #' - One-directed paths: On the *left-hand-side* is the name of the variables @@ -40,12 +45,36 @@ #' #' - Bi-directed paths: Use `~~` to indicate bi-directed paths. For example, #' `Y ~~ X` indicates that the path between `Y` and `X` is bi-directed, and -#' the arrow points in both directions. +#' the arrow points in both directions. Bi-directed paths often indicate +#' unmeasured cause, or umeasured confounding, of the two involved variables. +#' +#' @section Why are DAGs important - the Table 2 fallacy: +#' +#' Correctly thinking about and identifying the relationships between variables +#' is important when it comes to reporting coefficients from regression models +#' that mutually adjust for "confounders" or include covariates. Different +#' coefficients might have different interpretations, depending on their +#' relationship to other variables in the model. Sometimes, a regression +#' coefficient represents the direct effect of an exposure on an outcome, but +#' sometimes it must be interpreted as total effect, due to the involvement +#' of mediating effects. This problem is also called "Table 2 fallacy" +#' (_Westreich and Greenland 2013_). DAG helps visualizing and thereby focusing +#' the relationships of variables in a regression model to detect missing +#' adjustments or over-adjustment. #' #' @return An object of class `check_dag`, which can be visualized with `plot()`. #' The returned object also inherits from class `dagitty` and thus can be used #' with all functions from the **ggdag** and **dagitty** packages. #' +#' @references +#' - Rohrer, J. M. (2018). Thinking clearly about correlations and causation: +#' Graphical causal models for observational data. Advances in Methods and +#' Practices in Psychological Science, 1(1), 27–42. \doi{10.1177/2515245917745629} +#' +#' - Westreich, D., & Greenland, S. (2013). The Table 2 Fallacy: Presenting and +#' Interpreting Confounder and Modifier Coefficients. American Journal of +#' Epidemiology, 177(4), 292–298. \doi{10.1093/aje/kws412} +#' #' @examplesIf require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE) #' # no adjustment needed #' check_dag( @@ -203,6 +232,7 @@ check_dag.default <- function(..., attr(dag, "outcome") <- outcome attr(dag, "exposure") <- exposure attr(dag, "adjusted") <- adjusted + attr(dag, "adjustment_sets") <- checks[[1]]$current_adjustments attr(dag, "check_direct") <- insight::compact_list(checks[[1]]) attr(dag, "check_total") <- insight::compact_list(checks[[2]]) @@ -236,6 +266,29 @@ as.dag <- function(x, ...) { print.check_dag <- function(x, ...) { effect <- attributes(x)$effect + # header + cat(insight::print_color("# Check for correct adjustment sets", "blue")) + + # model specification + exposure_outcome_text <- paste0( + "\n- Outcome: ", attributes(x)$outcome, + "\n- Exposure", ifelse(length(attributes(x)$exposure) > 1, "s", ""), + ": ", datawizard::text_concatenate(attributes(x)$exposure) + ) + + # add information on adjustments + if (!is.null(attributes(x)$adjustment_sets)) { + exposure_outcome_text <- paste0( + exposure_outcome_text, + "\n- Adjustment", + ifelse(length(attributes(x)$adjustment_sets) > 1, "s", ""), + ": ", datawizard::text_concatenate(attributes(x)$adjustment_sets) + ) + } + + cat(exposure_outcome_text) + cat("\n\n") + for (i in c("direct", "total")) { if (i == "direct") { out <- attributes(x)$check_direct @@ -243,30 +296,13 @@ print.check_dag <- function(x, ...) { out <- attributes(x)$check_total } - exposure_outcome_text <- paste0( - "\n- Outcome: ", attributes(x)$outcome, - "\n- Exposure", ifelse(length(attributes(x)$exposure) > 1, "s", ""), - ": ", datawizard::text_concatenate(attributes(x)$exposure) - ) - - # add information on adjustments - if (!is.null(out$current_adjustments)) { - exposure_outcome_text <- paste0( - exposure_outcome_text, - "\n- Adjustment", - ifelse(length(out$current_adjustments) > 1, "s", ""), - ": ", datawizard::text_concatenate(out$current_adjustments) - ) - } - # build message with check results for effects ----------------------- if (isTRUE(out$adjustment_not_needed)) { # Scenario 1: no adjustment needed msg <- paste0( insight::color_text("Model is correctly specified.", "green"), - exposure_outcome_text, - "\n\nNo adjustment needed to estimate the ", i, " effect of ", + "\nNo adjustment needed to estimate the ", i, " effect of ", datawizard::text_concatenate(attributes(x)$exposure, enclose = "`"), " on `", attributes(x)$outcome, @@ -276,8 +312,7 @@ print.check_dag <- function(x, ...) { # Scenario 2: incorrectly adjusted, adjustments where none is allowed msg <- paste0( insight::color_text("Incorrectly adjusted!", "red"), - exposure_outcome_text, - "\n\nTo estimate the ", i, " effect, do ", + "\nTo estimate the ", i, " effect, do ", insight::color_text("not", "italic"), " adjust for ", datawizard::text_concatenate(out$current_adjustments, enclose = "`"), @@ -287,8 +322,7 @@ print.check_dag <- function(x, ...) { # Scenario 3: missing adjustments msg <- paste0( insight::color_text("Incorrectly adjusted!", "red"), - exposure_outcome_text, - "\n\nTo estimate the ", i, " effect, ", + "\nTo estimate the ", i, " effect, ", insight::color_text("also", "italic"), " adjust for ", insight::color_text(datawizard::text_concatenate(out$minimal_adjustments, enclose = "`"), "yellow"), @@ -306,14 +340,13 @@ print.check_dag <- function(x, ...) { # Scenario 4: correct adjustment msg <- paste0( insight::color_text("Model is correctly specified.", "green"), - exposure_outcome_text, - "\n\nAll minimal sufficient adjustments to estimate the ", i, " effect were done." + "\nAll minimal sufficient adjustments to estimate the ", i, " effect were done." ) } if (effect %in% c("all", i)) { cat(insight::print_color(insight::format_message( - paste0("# Correct adjustments for identifying {.i ", i, "} effects\n\n") + paste0("Identification of {.i ", i, "} effects\n\n") ), "blue")) cat(msg) cat("\n\n") diff --git a/man/check_dag.Rd b/man/check_dag.Rd index 281a62ef9..2b259f478 100644 --- a/man/check_dag.Rd +++ b/man/check_dag.Rd @@ -48,16 +48,22 @@ The returned object also inherits from class \code{dagitty} and thus can be used with all functions from the \strong{ggdag} and \strong{dagitty} packages. } \description{ -\code{check_dag()} checks if a model is correctly adjusted for -identifying causal effects. It returns a \strong{dagitty} object that can be -visualized with \code{plot()}. \code{check_dag()} is a convenient wrapper around -\code{ggdag::dagify()}, which used \code{dagitty::adjustmentSets()} and -\code{dagitty::adjustedNodes()} to check if the model is correctly adjusted for -identifying causal (direct and total) effects of a given exposure on the -outcome. \code{as.dag()} is a small convenient function to return the dagitty-string, -which can be used for the online-tool from the dagitty-website. +The purpose of \code{check_dag()} is to build, check and visualize +your model based on directed acyclic graphs (DAG). The function checks if a +model is correctly adjusted for identifying specific relationships of +variables, especiall directed (maybe also "causal") effects for given +exposures on an outcome. It returns a \strong{dagitty} object that can be +visualized with \code{plot()}. + +\code{check_dag()} is a convenient wrapper around \code{ggdag::dagify()}, +\code{dagitty::adjustmentSets()} and \code{dagitty::adjustedNodes()} to check correct +adjustment sets. \code{as.dag()} is a small convenient function to return the +dagitty-string, which can be used for the online-tool from the +dagitty-website. } -\details{ +\section{Specifying the DAG formulas}{ + + The formulas have following syntax: \itemize{ \item One-directed paths: On the \emph{left-hand-side} is the name of the variables @@ -67,9 +73,27 @@ to come from. For example, the formula \code{Y ~ X1 + X2}, paths directed from both \code{X1} and \code{X2} to \code{Y} are assumed. \item Bi-directed paths: Use \verb{~~} to indicate bi-directed paths. For example, \code{Y ~~ X} indicates that the path between \code{Y} and \code{X} is bi-directed, and -the arrow points in both directions. +the arrow points in both directions. Bi-directed paths often indicate +unmeasured cause, or umeasured confounding, of the two involved variables. } } + +\section{Why are DAGs important - the Table 2 fallacy}{ + + +Correctly thinking about and identifying the relationships between variables +is important when it comes to reporting coefficients from regression models +that mutually adjust for "confounders" or include covariates. Different +coefficients might have different interpretations, depending on their +relationship to other variables in the model. Sometimes, a regression +coefficient represents the direct effect of an exposure on an outcome, but +sometimes it must be interpreted as total effect, due to the involvement +of mediating effects. This problem is also called "Table 2 fallacy" +(\emph{Westreich and Greenland 2013}). DAG helps visualizing and thereby focusing +the relationships of variables in a regression model to detect missing +adjustments or over-adjustment. +} + \examples{ \dontshow{if (require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # no adjustment needed @@ -115,3 +139,13 @@ dag plot(dag) \dontshow{\}) # examplesIf} } +\references{ +\itemize{ +\item Rohrer, J. M. (2018). Thinking clearly about correlations and causation: +Graphical causal models for observational data. Advances in Methods and +Practices in Psychological Science, 1(1), 27–42. \doi{10.1177/2515245917745629} +\item Westreich, D., & Greenland, S. (2013). The Table 2 Fallacy: Presenting and +Interpreting Confounder and Modifier Coefficients. American Journal of +Epidemiology, 177(4), 292–298. \doi{10.1093/aje/kws412} +} +} diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md index 16432d033..d3b44a397 100644 --- a/tests/testthat/_snaps/check_dag.md +++ b/tests/testthat/_snaps/check_dag.md @@ -3,20 +3,18 @@ Code print(dag) Output - # Correct adjustments for identifying direct effects - - Model is correctly specified. + # Check for correct adjustment sets - Outcome: y - Exposure: x + Identification of direct effects + + Model is correctly specified. No adjustment needed to estimate the direct effect of `x` on `y`. - # Correct adjustments for identifying total effects + Identification of total effects Model is correctly specified. - - Outcome: y - - Exposure: x - No adjustment needed to estimate the total effect of `x` on `y`. @@ -25,22 +23,19 @@ Code print(dag) Output - # Correct adjustments for identifying direct effects - - Model is correctly specified. + # Check for correct adjustment sets - Outcome: y - Exposure: x - Adjustment: b + Identification of direct effects + + Model is correctly specified. All minimal sufficient adjustments to estimate the direct effect were done. - # Correct adjustments for identifying total effects + Identification of total effects Model is correctly specified. - - Outcome: y - - Exposure: x - - Adjustment: b - All minimal sufficient adjustments to estimate the total effect were done. @@ -49,21 +44,19 @@ Code print(dag) Output - # Correct adjustments for identifying direct effects - - Incorrectly adjusted! + # Check for correct adjustment sets - Outcome: y - Exposure: x + Identification of direct effects + + Incorrectly adjusted! To estimate the direct effect, also adjust for `b`. Currently, the model does not adjust for any variables. - # Correct adjustments for identifying total effects + Identification of total effects Incorrectly adjusted! - - Outcome: y - - Exposure: x - To estimate the total effect, also adjust for `b`. Currently, the model does not adjust for any variables. @@ -73,23 +66,43 @@ Code print(dag) Output - # Correct adjustments for identifying direct effects - - Incorrectly adjusted! + # Check for correct adjustment sets - Outcome: y - Exposure: x - Adjustment: c + Identification of direct effects + + Incorrectly adjusted! To estimate the direct effect, also adjust for `b` and `c`. Currently, the model currently only adjusts for `c`. - # Correct adjustments for identifying total effects + Identification of total effects Incorrectly adjusted! + To estimate the total effect, also adjust for `b` and `c`. + Currently, the model currently only adjusts for `c`. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets - Outcome: y - Exposure: x - Adjustment: c + Identification of direct effects + + Incorrectly adjusted! + To estimate the direct effect, also adjust for `b` and `c`. + Currently, the model currently only adjusts for `c`. + + Identification of total effects + + Incorrectly adjusted! To estimate the total effect, also adjust for `b` and `c`. Currently, the model currently only adjusts for `c`. @@ -99,22 +112,19 @@ Code print(dag) Output - # Correct adjustments for identifying direct effects - - Model is correctly specified. + # Check for correct adjustment sets - Outcome: mpg - Exposure: wt - Adjustments: cyl, disp and gear + Identification of direct effects + + Model is correctly specified. All minimal sufficient adjustments to estimate the direct effect were done. - # Correct adjustments for identifying total effects + Identification of total effects Model is correctly specified. - - Outcome: mpg - - Exposure: wt - - Adjustments: cyl, disp and gear - All minimal sufficient adjustments to estimate the total effect were done. diff --git a/tests/testthat/test-check_dag.R b/tests/testthat/test-check_dag.R index efc7c95bf..7d834b60a 100644 --- a/tests/testthat/test-check_dag.R +++ b/tests/testthat/test-check_dag.R @@ -31,6 +31,15 @@ test_that("check_dag", { adjusted = "c" ) expect_snapshot(print(dag)) + dag <- check_dag( + y ~ x + b + c + d, + x ~ b, + x ~ c, + outcome = "y", + exposure = "x", + adjusted = "c" + ) + expect_snapshot(print(dag)) data(mtcars) m <- lm(mpg ~ wt + gear + disp + cyl, data = mtcars) dag <- check_dag(