From aa47f8c0e7025cf4e8d9c314588df4893589b823 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 14 Jul 2024 00:32:04 +0200 Subject: [PATCH 1/5] Possible bug: error is thrown saying check_model is not implemented, if model formula contains a ratio (#751) --- DESCRIPTION | 2 +- NEWS.md | 3 +++ R/check_predictions.R | 10 +++++++++- tests/testthat/test-check_predictions.R | 15 +++++++++++++++ 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e8b5bc7d0..2fdb67ab8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.0.10 +Version: 0.12.0.11 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index bf8f36f70..3c9ac74ac 100644 --- a/NEWS.md +++ b/NEWS.md @@ -27,6 +27,9 @@ variable that was named like a valid R function name (e.g., `lm(log(lapply) ~ x)`, when data contained a variable named `lapply`). +* Fixed issue in `check_predictions()` for linear models when response was + transformed as ratio (e.g. `lm(succes/trials ~ x)`). + # performance 0.12.0 ## Breaking diff --git a/R/check_predictions.R b/R/check_predictions.R index e9ee08c78..fbc3b5eaf 100644 --- a/R/check_predictions.R +++ b/R/check_predictions.R @@ -303,10 +303,18 @@ pp_check.lm <- function(object, pattern <- "^(scale|exp|expm1|log|log1p|log10|log2|sqrt)" # check for transformed response, and backtransform simulations - if (!is.null(resp_string) && grepl(paste0(pattern, "\\("), resp_string)) { + if (!is.null(resp_string) && length(resp_string) == 1 && grepl(paste0(pattern, "\\("), resp_string)) { out <- .backtransform_sims(out, resp_string) } + # sanity check - do we have a ratio or similar? + if (is.data.frame(response)) { + # get response data, evaluate formula + response <- eval(str2lang(insight::find_response(object)), + envir = insight::get_response(object) + ) + } + out$y <- response attr(out, "check_range") <- check_range diff --git a/tests/testthat/test-check_predictions.R b/tests/testthat/test-check_predictions.R index 5eb44658f..04bc1d2cd 100644 --- a/tests/testthat/test-check_predictions.R +++ b/tests/testthat/test-check_predictions.R @@ -128,3 +128,18 @@ test_that("check_predictions, glm, binomial", { expect_equal(head(out4$sim_1), c(0, 0, 0, 1, 0, 1), tolerance = 1e-4) expect_true(attributes(out4)$model_info$is_bernoulli) }) + + +test_that("check_predictions, lm, ratio-response", { + skip_if_not_installed("lme4") + data(cbpp, package = "lme4") + model1 <- lm(I(incidence / size) ~ period, data = cbpp) + set.seed(123) + out <- check_predictions(model1) + expect_equal( + head(out$y), + c(0.14286, 0.25, 0.44444, 0, 0.13636, 0.05556), + ignore_attr = TRUE, + tolerance = 1e-4 + ) +}) From 6e1eb60370ac87477dc78d362b143130a5ce5747 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 14 Jul 2024 01:21:28 +0200 Subject: [PATCH 2/5] Explained Variance for random slopes model (#752) --- R/helpers.R | 2 +- R/r2_nakagawa.R | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/R/helpers.R b/R/helpers.R index 09b211678..5c4d97b03 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -1,6 +1,6 @@ # small wrapper around this commonly used try-catch .safe <- function(code, on_error = NULL) { - if (isTRUE(getOption("easystats_errors", FALSE) && is.null(on_error))) { + if (isTRUE(getOption("easystats_errors", FALSE)) && is.null(on_error)) { code } else { tryCatch(code, error = function(e) on_error) diff --git a/R/r2_nakagawa.R b/R/r2_nakagawa.R index dfa1eb6dc..1246a444a 100644 --- a/R/r2_nakagawa.R +++ b/R/r2_nakagawa.R @@ -131,9 +131,16 @@ r2_nakagawa <- function(model, # null-model if (is.null(null_model)) { - null_model <- insight::null_model(model) + null_model <- insight::null_model( + model, + verbose = isTRUE(getOption("easystats_errors", FALSE)) + ) } - vars_null <- insight::get_variance(null_model, tolerance = tolerance) + vars_null <- insight::get_variance( + null_model, + tolerance = tolerance, + verbose = isTRUE(getOption("easystats_errors", FALSE)) + ) # names of group levels group_names <- insight::find_random(model, split_nested = TRUE, flatten = TRUE) From 96cdb6cd5ee666308702b564bd4b60228aa4d00a Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 14 Jul 2024 02:09:56 +0200 Subject: [PATCH 3/5] Marginal R2 is a misnomer in brms (#753) --- DESCRIPTION | 2 +- NEWS.md | 2 ++ R/r2_bayes.R | 56 ++++++++++++++++++++++++++-------- man/icc.Rd | 14 +++++---- man/r2_bayes.Rd | 20 ++++++++++-- man/r2_nakagawa.Rd | 14 +++++---- tests/testthat/test-r2_bayes.R | 12 ++++++++ 7 files changed, 91 insertions(+), 29 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2fdb67ab8..b444674d8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.0.11 +Version: 0.12.0.12 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 3c9ac74ac..756192b52 100644 --- a/NEWS.md +++ b/NEWS.md @@ -21,6 +21,8 @@ * New function `r2_ferrari()` to compute Ferrari & Cribari-Neto's R2 for generalized linear models, in particular beta-regression. +* Improved documentation of some functions. + ## Bug fixes * Fixed issue in `check_model()` when model contained a transformed response diff --git a/R/r2_bayes.R b/R/r2_bayes.R index f96e98b41..4831a4bd2 100644 --- a/R/r2_bayes.R +++ b/R/r2_bayes.R @@ -21,12 +21,20 @@ #' credible intervals for the R2 values are saved as attributes. #' #' @details -#' `r2_bayes()` returns an "unadjusted" R2 value. See [r2_loo()] to calculate a +#' `r2_bayes()` returns an "unadjusted" R2 value. See [`r2_loo()`] to calculate a #' LOO-adjusted R2, which comes conceptually closer to an adjusted R2 measure. #' #' For mixed models, the conditional and marginal R2 are returned. The marginal #' R2 considers only the variance of the fixed effects, while the conditional R2 -#' takes both the fixed and random effects into account. +#' takes both the fixed and random effects into account. Technically, since +#' `r2_bayes()` relies on [`rstantools::bayes_R2()`], the "marginal" R2 calls +#' `bayes_R2(re.form = NA)`, while the "conditional" R2 calls +#' `bayes_R2(re.form = NULL)`. The `re.form` argument is passed to +#' [`rstantools::posterior_epred()`], which is internally called in `bayes_R2()`. +#' +#' Note that for "marginal" and "conditional", we refer to the wording suggested +#' by _Nakagawa et al. 2017_. Thus, we don't use the term "marginal" in the sense +#' that the random effects are integrated out, but are "ignored". #' #' `r2_posterior()` is the actual workhorse for `r2_bayes()` and returns a #' posterior sample of Bayesian R2 values. @@ -72,9 +80,13 @@ #' r2_bayes(model) #' } #' @references -#' Gelman, A., Goodrich, B., Gabry, J., and Vehtari, A. (2018). R-squared for -#' Bayesian regression models. The American Statistician, 1–6. -#' \doi{10.1080/00031305.2018.1549100} +#' - Gelman, A., Goodrich, B., Gabry, J., and Vehtari, A. (2018). R-squared for +#' Bayesian regression models. The American Statistician, 1–6. +#' \doi{10.1080/00031305.2018.1549100} +#' - Nakagawa, S., Johnson, P. C. D., and Schielzeth, H. (2017). The +#' coefficient of determination R2 and intra-class correlation coefficient from +#' generalized linear mixed-effects models revisited and expanded. Journal of +#' The Royal Society Interface, 14(134), 20170213. #' @export r2_bayes <- function(model, robust = TRUE, ci = 0.95, verbose = TRUE, ...) { r2_bayesian <- r2_posterior(model, verbose = verbose, ...) @@ -185,20 +197,38 @@ r2_posterior.brmsfit <- function(model, verbose = TRUE, ...) { names(br2) <- res } } else if (mi$is_mixed) { - br2 <- list( - R2_Bayes = as.vector(rstantools::bayes_R2( + if (inherits(model, "stanreg")) { + pred_cond <- rstanarm::posterior_epred( model, re.form = NULL, re_formula = NULL, - summary = FALSE - )), - R2_Bayes_marginal = as.vector(rstantools::bayes_R2( + ) + pred_marginal <- rstanarm::posterior_epred( model, re.form = NA, re_formula = NA, - summary = FALSE - )) - ) + ) + y <- insight::get_response(model) + br2 <- list( + R2_Bayes = as.vector(rstantools::bayes_R2(pred_cond, y = y)), + R2_Bayes_marginal = as.vector(rstantools::bayes_R2(pred_marginal, y = y)) + ) + } else { + br2 <- list( + R2_Bayes = as.vector(rstantools::bayes_R2( + model, + re.form = NULL, + re_formula = NULL, + summary = FALSE + )), + R2_Bayes_marginal = as.vector(rstantools::bayes_R2( + model, + re.form = NA, + re_formula = NA, + summary = FALSE + )) + ) + } names(br2$R2_Bayes) <- rep("Conditional R2", length(br2$R2_Bayes)) names(br2$R2_Bayes_marginal) <- rep("Marginal R2", length(br2$R2_Bayes)) } else { diff --git a/man/icc.Rd b/man/icc.Rd index 6b6edbcb8..071e1b617 100644 --- a/man/icc.Rd +++ b/man/icc.Rd @@ -59,14 +59,16 @@ model, you can pass it here, too, to speed up the process.} \item{approximation}{Character string, indicating the approximation method for the distribution-specific (observation level, or residual) variance. Only applies to non-Gaussian models. Can be \code{"lognormal"} (default), \code{"delta"} or -\code{"trigamma"}. For binomial models, the default is the \emph{theoretical} distribution -specific variance, however, it can also be \code{"observation_level"}. See -\emph{Nakagawa et al. 2017}, in particular supplement 2, for details.} +\code{"trigamma"}. For binomial models, the default is the \emph{theoretical} +distribution specific variance, however, it can also be +\code{"observation_level"}. See \emph{Nakagawa et al. 2017}, in particular supplement +2, for details.} \item{model_component}{For models that can have a zero-inflation component, -specify for which component variances should be returned. If \code{NULL} or \code{"full"} -(the default), both the conditional and the zero-inflation component are taken -into account. If \code{"conditional"}, only the conditional component is considered.} +specify for which component variances should be returned. If \code{NULL} or +\code{"full"} (the default), both the conditional and the zero-inflation component +are taken into account. If \code{"conditional"}, only the conditional component is +considered.} \item{verbose}{Toggle warnings and messages.} diff --git a/man/r2_bayes.Rd b/man/r2_bayes.Rd index 63f9843ba..05280c992 100644 --- a/man/r2_bayes.Rd +++ b/man/r2_bayes.Rd @@ -55,13 +55,21 @@ LOO-adjusted R2, which comes conceptually closer to an adjusted R2 measure. For mixed models, the conditional and marginal R2 are returned. The marginal R2 considers only the variance of the fixed effects, while the conditional R2 -takes both the fixed and random effects into account. +takes both the fixed and random effects into account. Technically, since +\code{r2_bayes()} relies on \code{\link[rstantools:bayes_R2]{rstantools::bayes_R2()}}, the "marginal" R2 calls +\code{bayes_R2(re.form = NA)}, while the "conditional" R2 calls +\code{bayes_R2(re.form = NULL)}. The \code{re.form} argument is passed to +\code{\link[rstantools:posterior_epred]{rstantools::posterior_epred()}}, which is internally called in \code{bayes_R2()}. + +Note that for "marginal" and "conditional", we refer to the wording suggested +by \emph{Nakagawa et al. 2017}. Thus, we don't use the term "marginal" in the sense +that the random effects are integrated out, but are "ignored". \code{r2_posterior()} is the actual workhorse for \code{r2_bayes()} and returns a posterior sample of Bayesian R2 values. } \examples{ -\dontshow{if (require("rstanarm") && require("rstantools") && require("brms")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm") && require("rstantools") && require("brms") && require("RcppEigen")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(performance) \donttest{ model <- suppressWarnings(rstanarm::stan_glm( @@ -104,7 +112,13 @@ r2_bayes(model) \dontshow{\}) # examplesIf} } \references{ -Gelman, A., Goodrich, B., Gabry, J., and Vehtari, A. (2018). R-squared for +\itemize{ +\item Gelman, A., Goodrich, B., Gabry, J., and Vehtari, A. (2018). R-squared for Bayesian regression models. The American Statistician, 1–6. \doi{10.1080/00031305.2018.1549100} +\item Nakagawa, S., Johnson, P. C. D., and Schielzeth, H. (2017). The +coefficient of determination R2 and intra-class correlation coefficient from +generalized linear mixed-effects models revisited and expanded. Journal of +The Royal Society Interface, 14(134), 20170213. +} } diff --git a/man/r2_nakagawa.Rd b/man/r2_nakagawa.Rd index b8c8601e3..169af5709 100644 --- a/man/r2_nakagawa.Rd +++ b/man/r2_nakagawa.Rd @@ -59,14 +59,16 @@ model, you can pass it here, too, to speed up the process.} \item{approximation}{Character string, indicating the approximation method for the distribution-specific (observation level, or residual) variance. Only applies to non-Gaussian models. Can be \code{"lognormal"} (default), \code{"delta"} or -\code{"trigamma"}. For binomial models, the default is the \emph{theoretical} distribution -specific variance, however, it can also be \code{"observation_level"}. See -\emph{Nakagawa et al. 2017}, in particular supplement 2, for details.} +\code{"trigamma"}. For binomial models, the default is the \emph{theoretical} +distribution specific variance, however, it can also be +\code{"observation_level"}. See \emph{Nakagawa et al. 2017}, in particular supplement +2, for details.} \item{model_component}{For models that can have a zero-inflation component, -specify for which component variances should be returned. If \code{NULL} or \code{"full"} -(the default), both the conditional and the zero-inflation component are taken -into account. If \code{"conditional"}, only the conditional component is considered.} +specify for which component variances should be returned. If \code{NULL} or +\code{"full"} (the default), both the conditional and the zero-inflation component +are taken into account. If \code{"conditional"}, only the conditional component is +considered.} \item{verbose}{Toggle warnings and messages.} diff --git a/tests/testthat/test-r2_bayes.R b/tests/testthat/test-r2_bayes.R index 00e3a54e2..97820908d 100644 --- a/tests/testthat/test-r2_bayes.R +++ b/tests/testthat/test-r2_bayes.R @@ -29,3 +29,15 @@ test_that("r2_BayesFactor", { expect_equal(r_CI$CI_low, 0.27, tolerance = 0.05) expect_equal(r_CI$CI_high, 0.54, tolerance = 0.05) }) + +test_that("r2_bayes", { + skip_on_cran() + skip_if_not_installed("rstanarm") + skip_if_not_installed("rstantools") + skip_if_not_installed("httr2") + model <- insight::download_model("stanreg_lmerMod_1") + set.seed(123) + out <- r2_bayes(model) + expect_equal(out$R2_Bayes, 0.642, tolerance = 1e-3) + expect_equal(out$R2_Bayes_marginal, 0.58534, tolerance = 1e-3) +}) From 758e3c03320f88e60d49c92d6980a22d5d8a0ee7 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 14 Jul 2024 02:10:36 +0200 Subject: [PATCH 4/5] desc, news --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b444674d8..1b711cb64 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.0.12 +Version: 0.12.0.13 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 756192b52..3fca779e1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -32,6 +32,8 @@ * Fixed issue in `check_predictions()` for linear models when response was transformed as ratio (e.g. `lm(succes/trials ~ x)`). +* Fixed issue in `r2_bayes()` for mixed models from *rstanarm*. + # performance 0.12.0 ## Breaking From 9145e1595f476e57104e54548152cf83fa88968c Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 14 Jul 2024 09:06:20 +0200 Subject: [PATCH 5/5] fix test, revise tests --- tests/testthat/test-icc.R | 10 +--------- tests/testthat/test-model_performance.bayesian.R | 11 ++++++----- tests/testthat/test-r2_bayes.R | 4 +++- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/tests/testthat/test-icc.R b/tests/testthat/test-icc.R index e537b8b1f..82da6a3eb 100644 --- a/tests/testthat/test-icc.R +++ b/tests/testthat/test-icc.R @@ -1,13 +1,12 @@ skip_on_os("mac") +skip_on_cran() test_that("icc", { - skip_on_cran() m0 <- lm(Sepal.Length ~ Petal.Length, data = iris) expect_warning(expect_null(icc(m0))) }) test_that("icc", { - skip_on_cran() skip_if_not_installed("lme4") m1 <- lme4::lmer(Sepal.Length ~ Petal.Length + (1 | Species), data = iris) expect_equal( @@ -25,7 +24,6 @@ test_that("icc", { # bootstrapped CIs ------------ test_that("icc, CI", { - skip_on_cran() skip_if_not_installed("lme4") data(sleepstudy, package = "lme4") m <- lme4::lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) @@ -37,7 +35,6 @@ test_that("icc, CI", { test_that("icc", { - skip_on_cran() skip_if_not_installed("curl") skip_if_offline() skip_if_not_installed("httr2") @@ -54,7 +51,6 @@ test_that("icc", { }) test_that("icc", { - skip_on_cran() skip_if_not_installed("curl") skip_if_offline() skip_if_not_installed("httr2") @@ -68,7 +64,6 @@ test_that("icc", { }) test_that("icc", { - skip_on_cran() skip_if_not_installed("curl") skip_if_offline() skip_if_not_installed("httr2") @@ -86,7 +81,6 @@ test_that("icc", { }) test_that("icc", { - skip_on_cran() skip_if_not_installed("lme4") data(sleepstudy, package = "lme4") set.seed(12345) @@ -117,7 +111,6 @@ test_that("icc", { test_that("icc", { - skip_on_cran() skip_if_not_installed("nlme") skip_if_not_installed("lme4") m <- nlme::lme(Sepal.Length ~ Petal.Length, random = ~ 1 | Species, data = iris) @@ -128,7 +121,6 @@ test_that("icc", { test_that("icc, glmmTMB 1.1.9+", { - skip_on_cran() skip_if_not_installed("glmmTMB", minimum_version = "1.1.9") set.seed(101) dd <- data.frame( diff --git a/tests/testthat/test-model_performance.bayesian.R b/tests/testthat/test-model_performance.bayesian.R index da4b6ff18..ff78ca3cb 100644 --- a/tests/testthat/test-model_performance.bayesian.R +++ b/tests/testthat/test-model_performance.bayesian.R @@ -1,5 +1,8 @@ +skip_on_cran() +skip_if_not_installed("rstanarm") +skip_if_not_installed("rstantools") + test_that("model_performance.stanreg", { - skip_on_cran() skip_if_not_installed("curl") skip_if_offline() skip_if_not_installed("httr2") @@ -24,14 +27,13 @@ test_that("model_performance.stanreg", { skip_if(is.null(model)) perf <- model_performance(model) - expect_equal(perf$R2, 0.6286546, tolerance = 1e-3) - expect_equal(perf$R2_adjusted, 0.6053507, tolerance = 1e-3) + expect_equal(perf$R2, 0.642, tolerance = 1e-3) + expect_equal(perf$R2_adjusted, 0.6053454, tolerance = 1e-3) expect_equal(perf$ELPD, -31.55849, tolerance = 1e-3) }) test_that("model_performance.brmsfit", { - skip_on_cran() skip_if_not_installed("curl") skip_if_offline() skip_if_not_installed("httr2") @@ -72,7 +74,6 @@ test_that("model_performance.brmsfit", { test_that("model_performance.BFBayesFactor", { - skip_on_cran() skip_if_not_installed("BayesFactor") mod <- BayesFactor::ttestBF(mtcars$wt, mu = 3) expect_warning({ diff --git a/tests/testthat/test-r2_bayes.R b/tests/testthat/test-r2_bayes.R index 97820908d..6c6a3a57d 100644 --- a/tests/testthat/test-r2_bayes.R +++ b/tests/testthat/test-r2_bayes.R @@ -32,9 +32,11 @@ test_that("r2_BayesFactor", { test_that("r2_bayes", { skip_on_cran() + skip_if_not_installed("curl") + skip_if_offline() + skip_if_not_installed("httr2") skip_if_not_installed("rstanarm") skip_if_not_installed("rstantools") - skip_if_not_installed("httr2") model <- insight::download_model("stanreg_lmerMod_1") set.seed(123) out <- r2_bayes(model)