From 16731d802b06be365bee21e16976faa4e2979512 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 5 Jul 2024 11:44:44 +0200 Subject: [PATCH 1/3] 95 CI for rmse Fix #740 --- DESCRIPTION | 2 +- NAMESPACE | 2 + NEWS.md | 4 ++ R/icc.R | 40 ++++++------ R/performance_rmse.R | 133 +++++++++++++++++++++++++++++++++++----- man/icc.Rd | 35 ++++++----- man/performance_mae.Rd | 4 +- man/performance_mse.Rd | 4 +- man/performance_rmse.Rd | 44 ++++++++++++- man/r2_nakagawa.Rd | 30 ++++----- 10 files changed, 227 insertions(+), 71 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d0c466e6f..1b7413974 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.0.4 +Version: 0.12.0.5 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NAMESPACE b/NAMESPACE index 7a20bd1dc..c46c53cc6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -148,6 +148,7 @@ S3method(display,test_performance) S3method(fitted,BFBayesFactor) S3method(format,compare_performance) S3method(format,performance_model) +S3method(format,performance_rmse) S3method(format,test_performance) S3method(logLik,cpglm) S3method(logLik,iv_robust) @@ -319,6 +320,7 @@ S3method(print,performance_hosmer) S3method(print,performance_model) S3method(print,performance_pcp) S3method(print,performance_pp_check) +S3method(print,performance_rmse) S3method(print,performance_roc) S3method(print,performance_score) S3method(print,performance_simres) diff --git a/NEWS.md b/NEWS.md index d032d4e00..775d2d7be 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,10 @@ * `icc()` and `r2_nakagawa()` get a `model_component` argument indicating the component for zero-inflation or hurdle models. +* `performance_rmse()` (resp. `rmse()`) can now compute analytical and + bootstrapped confidence intervals. The function gains following new arguments: + `ci`, `ci_method` and `iterations`. + # performance 0.12.0 ## Breaking diff --git a/R/icc.R b/R/icc.R index c89df5f42..52117bc4a 100644 --- a/R/icc.R +++ b/R/icc.R @@ -1,5 +1,7 @@ -#' Intraclass Correlation Coefficient (ICC) +#' @title Intraclass Correlation Coefficient (ICC) +#' @name icc #' +#' @description #' This function calculates the intraclass-correlation coefficient (ICC) - #' sometimes also called *variance partition coefficient* (VPC) or #' *repeatability* - for mixed effects models. The ICC can be calculated for all @@ -15,24 +17,23 @@ #' Else, for instance for nested models, name a specific group-level effect #' to calculate the variance decomposition for this group-level. See 'Details' #' and `?brms::posterior_predict`. -#' @param ci Confidence resp. credible interval level. For `icc()` and `r2()`, -#' confidence intervals are based on bootstrapped samples from the ICC resp. -#' R2 value. See `iterations`. -#' @param by_group Logical, if `TRUE`, `icc()` returns the variance -#' components for each random-effects level (if there are multiple levels). -#' See 'Details'. +#' @param ci Confidence resp. credible interval level. For `icc()`, `r2()`, and +#' `rmse()`, confidence intervals are based on bootstrapped samples from the +#' ICC, R2 or RMSE value. See `iterations`. +#' @param by_group Logical, if `TRUE`, `icc()` returns the variance components +#' for each random-effects level (if there are multiple levels). See 'Details'. #' @param iterations Number of bootstrap-replicates when computing confidence -#' intervals for the ICC or R2. +#' intervals for the ICC, R2, RMSE etc. #' @param ci_method Character string, indicating the bootstrap-method. Should -#' be `NULL` (default), in which case `lme4::bootMer()` is used for -#' bootstrapped confidence intervals. However, if bootstrapped intervals cannot -#' be calculated this was, try `ci_method = "boot"`, which falls back to -#' `boot::boot()`. This may successfully return bootstrapped confidence intervals, -#' but bootstrapped samples may not be appropriate for the multilevel structure -#' of the model. There is also an option `ci_method = "analytical"`, which tries -#' to calculate analytical confidence assuming a chi-squared distribution. -#' However, these intervals are rather inaccurate and often too narrow. It is -#' recommended to calculate bootstrapped confidence intervals for mixed models. +#' be `NULL` (default), in which case `lme4::bootMer()` is used for bootstrapped +#' confidence intervals. However, if bootstrapped intervals cannot be calculated +#' this way, try `ci_method = "boot"`, which falls back to `boot::boot()`. This +#' may successfully return bootstrapped confidence intervals, but bootstrapped +#' samples may not be appropriate for the multilevel structure of the model. +#' There is also an option `ci_method = "analytical"`, which tries to calculate +#' analytical confidence assuming a chi-squared distribution. However, these +#' intervals are rather inaccurate and often too narrow. It is recommended to +#' calculate bootstrapped confidence intervals for mixed models. #' @param verbose Toggle warnings and messages. #' @param null_model Optional, a null model to compute the random effect variances, #' which is passed to [`insight::get_variance()`]. Usually only required if @@ -40,7 +41,8 @@ #' calculating the null model takes longer and you already have fit the null #' model, you can pass it here, too, to speed up the process. #' @param ... Arguments passed down to `lme4::bootMer()` or `boot::boot()` -#' for bootstrapped ICC or R2. +#' for bootstrapped ICC, R2, RMSE etc.; for `variance_decomposition()`, +#' arguments are passed down to `brms::posterior_predict()`. #' #' @inheritParams r2_bayes #' @inheritParams insight::get_variance @@ -321,8 +323,6 @@ icc <- function(model, -#' @param ... Arguments passed down to `brms::posterior_predict()`. -#' @inheritParams icc #' @rdname icc #' @export variance_decomposition <- function(model, diff --git a/R/performance_rmse.R b/R/performance_rmse.R index 0cc5eac90..2e98d5514 100644 --- a/R/performance_rmse.R +++ b/R/performance_rmse.R @@ -6,7 +6,7 @@ #' #' @param model A model. #' @param normalized Logical, use `TRUE` if normalized rmse should be returned. -#' @inheritParams model_performance.lm +#' @inheritParams icc #' #' @details The RMSE is the square root of the variance of the residuals and indicates #' the absolute fit of the model to the data (difference between observed data @@ -30,33 +30,138 @@ #' # normalized RMSE #' performance_rmse(m, normalized = TRUE) #' @export -performance_rmse <- function(model, normalized = FALSE, verbose = TRUE) { +performance_rmse <- function(model, + normalized = FALSE, + ci = NULL, + iterations = 100, + ci_method = NULL, + verbose = TRUE, + ...) { tryCatch( { - # compute rmse - rmse_val <- sqrt(performance_mse(model, verbose = verbose)) - - # if normalized, divide by range of response - if (normalized) { - # get response - resp <- datawizard::to_numeric(insight::get_response(model, verbose = FALSE), dummy_factors = FALSE, preserve_levels = TRUE) - # compute rmse, normalized - rmse_val <- rmse_val / (max(resp, na.rm = TRUE) - min(resp, na.rm = TRUE)) + out <- .calculate_rmse(model, normalized, verbose) + # check if CIs are requested, and compute CIs + if (!is.null(ci) && !is.na(ci)) { + # analytical CI? + if (identical(ci_method, "analytical")) { + out <- .analytical_rmse_ci(out, model, ci) + } else { + # bootstrapped CI + result <- .bootstrap_rmse(model, iterations, normalized, ci_method, ...) + # CI for RMSE + rmse_ci <- as.vector(result$t[, 1]) + rmse_ci <- rmse_ci[!is.na(rmse_ci)] + # validation check + if (length(rmse_ci) > 0) { + rmse_ci <- bayestestR::eti(rmse_ci, ci = ci) + out <- cbind(data.frame(RMSE = out), rmse_ci) + class(out) <- c("performance_rmse", "data.frame") + } else { + insight::format_warning("Could not compute confidence intervals for RMSE.") + } + } } - - rmse_val }, error = function(e) { if (inherits(e, c("simpleError", "error")) && verbose) { insight::print_color(e$message, "red") cat("\n") } - NA + out <- NA } ) + + out } #' @rdname performance_rmse #' @export rmse <- performance_rmse + + +# methods --------------------------------------------------------------------- + +#' @export +format.performance_rmse <- function(x, ...) { + insight::format_table(x, ...) +} + +#' @export +print.performance_rmse <- function(x, ...) { + cat(insight::export_table(format(x, ...), ...)) +} + + +# helper function to compute RMSE ---------------------------------------------- + +.calculate_rmse <- function(model, normalized = FALSE, verbose = FALSE, ...) { + # compute rmse + rmse_val <- sqrt(performance_mse(model, verbose = verbose)) + + # if normalized, divide by range of response + if (normalized) { + # get response + resp <- datawizard::to_numeric( + insight::get_response(model, verbose = FALSE), + dummy_factors = FALSE, + preserve_levels = TRUE + ) + # compute rmse, normalized + rmse_val <- rmse_val / (max(resp, na.rm = TRUE) - min(resp, na.rm = TRUE)) + } + + rmse_val +} + + +# analytical CIs -------------------------------------------------------------- + +.analytical_rmse_ci <- function(out, model, ci, ...) { + s <- insight::get_sigma(model, ci = ci, verbose = FALSE) + n <- insight::n_obs(model) + conf_ints <- c(attr(s, "CI_low"), attr(s, "CI_high")) * ((n - 1) / n) + out <- data.frame( + RMSE = out, + CI = ci, + CI_low = conf_ints[1], + CI_high = conf_ints[2] + ) + class(out) <- c("performance_rmse", "data.frame") + out +} + + +# bootstrapping CIs ----------------------------------------------------------- + +.boot_calculate_rmse <- function(data, indices, model, normalized, ...) { + d <- data[indices, ] # allows boot to select sample + fit <- suppressWarnings(suppressMessages(stats::update(model, data = d))) + .calculate_rmse(model = fit, normalized = normalized) +} + +.bootstrap_rmse <- function(model, iterations = 100, normalized = FALSE, ci_method = NULL, ...) { + if (inherits(model, c("merMod", "lmerMod", "glmmTMB")) && !identical(ci_method, "boot")) { + # cannot pass argument "normalized" to "lme4::bootMer()" + if (isTRUE(normalized)) { + insight::format_error("Normalized RMSE cannot be used with confidence intervals. Please use `ci_method = \"boot\"`.") # nolint + } + result <- .do_lme4_bootmer( + model, + .calculate_rmse, + iterations, + dots = list(...) + ) + } else { + insight::check_if_installed("boot") + result <- boot::boot( + data = insight::get_data(model, verbose = FALSE), + statistic = .boot_calculate_rmse, + R = iterations, + sim = "ordinary", + model = model, + normalized = normalized + ) + } + result +} diff --git a/man/icc.Rd b/man/icc.Rd index cd1347a9e..6b6edbcb8 100644 --- a/man/icc.Rd +++ b/man/icc.Rd @@ -24,32 +24,31 @@ variance_decomposition(model, re_formula = NULL, robust = TRUE, ci = 0.95, ...) \arguments{ \item{model}{A (Bayesian) mixed effects model.} -\item{by_group}{Logical, if \code{TRUE}, \code{icc()} returns the variance -components for each random-effects level (if there are multiple levels). -See 'Details'.} +\item{by_group}{Logical, if \code{TRUE}, \code{icc()} returns the variance components +for each random-effects level (if there are multiple levels). See 'Details'.} \item{tolerance}{Tolerance for singularity check of random effects, to decide whether to compute random effect variances or not. Indicates up to which value the convergence result is accepted. The larger tolerance is, the stricter the test will be. See \code{\link[performance:check_singularity]{performance::check_singularity()}}.} -\item{ci}{Confidence resp. credible interval level. For \code{icc()} and \code{r2()}, -confidence intervals are based on bootstrapped samples from the ICC resp. -R2 value. See \code{iterations}.} +\item{ci}{Confidence resp. credible interval level. For \code{icc()}, \code{r2()}, and +\code{rmse()}, confidence intervals are based on bootstrapped samples from the +ICC, R2 or RMSE value. See \code{iterations}.} \item{iterations}{Number of bootstrap-replicates when computing confidence -intervals for the ICC or R2.} +intervals for the ICC, R2, RMSE etc.} \item{ci_method}{Character string, indicating the bootstrap-method. Should -be \code{NULL} (default), in which case \code{lme4::bootMer()} is used for -bootstrapped confidence intervals. However, if bootstrapped intervals cannot -be calculated this was, try \code{ci_method = "boot"}, which falls back to -\code{boot::boot()}. This may successfully return bootstrapped confidence intervals, -but bootstrapped samples may not be appropriate for the multilevel structure -of the model. There is also an option \code{ci_method = "analytical"}, which tries -to calculate analytical confidence assuming a chi-squared distribution. -However, these intervals are rather inaccurate and often too narrow. It is -recommended to calculate bootstrapped confidence intervals for mixed models.} +be \code{NULL} (default), in which case \code{lme4::bootMer()} is used for bootstrapped +confidence intervals. However, if bootstrapped intervals cannot be calculated +this way, try \code{ci_method = "boot"}, which falls back to \code{boot::boot()}. This +may successfully return bootstrapped confidence intervals, but bootstrapped +samples may not be appropriate for the multilevel structure of the model. +There is also an option \code{ci_method = "analytical"}, which tries to calculate +analytical confidence assuming a chi-squared distribution. However, these +intervals are rather inaccurate and often too narrow. It is recommended to +calculate bootstrapped confidence intervals for mixed models.} \item{null_model}{Optional, a null model to compute the random effect variances, which is passed to \code{\link[insight:get_variance]{insight::get_variance()}}. Usually only required if @@ -71,7 +70,9 @@ into account. If \code{"conditional"}, only the conditional component is conside \item{verbose}{Toggle warnings and messages.} -\item{...}{Arguments passed down to \code{brms::posterior_predict()}.} +\item{...}{Arguments passed down to \code{lme4::bootMer()} or \code{boot::boot()} +for bootstrapped ICC, R2, RMSE etc.; for \code{variance_decomposition()}, +arguments are passed down to \code{brms::posterior_predict()}.} \item{re_formula}{Formula containing group-level effects to be considered in the prediction. If \code{NULL} (default), include all group-level effects. diff --git a/man/performance_mae.Rd b/man/performance_mae.Rd index 2d68e1606..20874f5ba 100644 --- a/man/performance_mae.Rd +++ b/man/performance_mae.Rd @@ -12,7 +12,9 @@ mae(model, ...) \arguments{ \item{model}{A model.} -\item{...}{Arguments passed to or from other methods.} +\item{...}{Arguments passed down to \code{lme4::bootMer()} or \code{boot::boot()} +for bootstrapped ICC, R2, RMSE etc.; for \code{variance_decomposition()}, +arguments are passed down to \code{brms::posterior_predict()}.} } \value{ Numeric, the mean absolute error of \code{model}. diff --git a/man/performance_mse.Rd b/man/performance_mse.Rd index ef4d6781a..e81327c14 100644 --- a/man/performance_mse.Rd +++ b/man/performance_mse.Rd @@ -12,7 +12,9 @@ mse(model, ...) \arguments{ \item{model}{A model.} -\item{...}{Arguments passed to or from other methods.} +\item{...}{Arguments passed down to \code{lme4::bootMer()} or \code{boot::boot()} +for bootstrapped ICC, R2, RMSE etc.; for \code{variance_decomposition()}, +arguments are passed down to \code{brms::posterior_predict()}.} } \value{ Numeric, the mean square error of \code{model}. diff --git a/man/performance_rmse.Rd b/man/performance_rmse.Rd index bea4534b5..124570c29 100644 --- a/man/performance_rmse.Rd +++ b/man/performance_rmse.Rd @@ -5,16 +5,54 @@ \alias{rmse} \title{Root Mean Squared Error} \usage{ -performance_rmse(model, normalized = FALSE, verbose = TRUE) +performance_rmse( + model, + normalized = FALSE, + ci = NULL, + iterations = 100, + ci_method = NULL, + verbose = TRUE, + ... +) -rmse(model, normalized = FALSE, verbose = TRUE) +rmse( + model, + normalized = FALSE, + ci = NULL, + iterations = 100, + ci_method = NULL, + verbose = TRUE, + ... +) } \arguments{ \item{model}{A model.} \item{normalized}{Logical, use \code{TRUE} if normalized rmse should be returned.} -\item{verbose}{Toggle off warnings.} +\item{ci}{Confidence resp. credible interval level. For \code{icc()}, \code{r2()}, and +\code{rmse()}, confidence intervals are based on bootstrapped samples from the +ICC, R2 or RMSE value. See \code{iterations}.} + +\item{iterations}{Number of bootstrap-replicates when computing confidence +intervals for the ICC, R2, RMSE etc.} + +\item{ci_method}{Character string, indicating the bootstrap-method. Should +be \code{NULL} (default), in which case \code{lme4::bootMer()} is used for bootstrapped +confidence intervals. However, if bootstrapped intervals cannot be calculated +this way, try \code{ci_method = "boot"}, which falls back to \code{boot::boot()}. This +may successfully return bootstrapped confidence intervals, but bootstrapped +samples may not be appropriate for the multilevel structure of the model. +There is also an option \code{ci_method = "analytical"}, which tries to calculate +analytical confidence assuming a chi-squared distribution. However, these +intervals are rather inaccurate and often too narrow. It is recommended to +calculate bootstrapped confidence intervals for mixed models.} + +\item{verbose}{Toggle warnings and messages.} + +\item{...}{Arguments passed down to \code{lme4::bootMer()} or \code{boot::boot()} +for bootstrapped ICC, R2, RMSE etc.; for \code{variance_decomposition()}, +arguments are passed down to \code{brms::posterior_predict()}.} } \value{ Numeric, the root mean squared error. diff --git a/man/r2_nakagawa.Rd b/man/r2_nakagawa.Rd index c8eb219c3..b8c8601e3 100644 --- a/man/r2_nakagawa.Rd +++ b/man/r2_nakagawa.Rd @@ -32,23 +32,23 @@ or not. Indicates up to which value the convergence result is accepted. When can't be computed (and thus, the conditional r-squared is \code{NA}), decrease the tolerance-level. See also \code{\link[=check_singularity]{check_singularity()}}.} -\item{ci}{Confidence resp. credible interval level. For \code{icc()} and \code{r2()}, -confidence intervals are based on bootstrapped samples from the ICC resp. -R2 value. See \code{iterations}.} +\item{ci}{Confidence resp. credible interval level. For \code{icc()}, \code{r2()}, and +\code{rmse()}, confidence intervals are based on bootstrapped samples from the +ICC, R2 or RMSE value. See \code{iterations}.} \item{iterations}{Number of bootstrap-replicates when computing confidence -intervals for the ICC or R2.} +intervals for the ICC, R2, RMSE etc.} \item{ci_method}{Character string, indicating the bootstrap-method. Should -be \code{NULL} (default), in which case \code{lme4::bootMer()} is used for -bootstrapped confidence intervals. However, if bootstrapped intervals cannot -be calculated this was, try \code{ci_method = "boot"}, which falls back to -\code{boot::boot()}. This may successfully return bootstrapped confidence intervals, -but bootstrapped samples may not be appropriate for the multilevel structure -of the model. There is also an option \code{ci_method = "analytical"}, which tries -to calculate analytical confidence assuming a chi-squared distribution. -However, these intervals are rather inaccurate and often too narrow. It is -recommended to calculate bootstrapped confidence intervals for mixed models.} +be \code{NULL} (default), in which case \code{lme4::bootMer()} is used for bootstrapped +confidence intervals. However, if bootstrapped intervals cannot be calculated +this way, try \code{ci_method = "boot"}, which falls back to \code{boot::boot()}. This +may successfully return bootstrapped confidence intervals, but bootstrapped +samples may not be appropriate for the multilevel structure of the model. +There is also an option \code{ci_method = "analytical"}, which tries to calculate +analytical confidence assuming a chi-squared distribution. However, these +intervals are rather inaccurate and often too narrow. It is recommended to +calculate bootstrapped confidence intervals for mixed models.} \item{null_model}{Optional, a null model to compute the random effect variances, which is passed to \code{\link[insight:get_variance]{insight::get_variance()}}. Usually only required if @@ -70,7 +70,9 @@ into account. If \code{"conditional"}, only the conditional component is conside \item{verbose}{Toggle warnings and messages.} -\item{...}{Arguments passed down to \code{brms::posterior_predict()}.} +\item{...}{Arguments passed down to \code{lme4::bootMer()} or \code{boot::boot()} +for bootstrapped ICC, R2, RMSE etc.; for \code{variance_decomposition()}, +arguments are passed down to \code{brms::posterior_predict()}.} } \value{ A list with the conditional and marginal R2 values. From 5bcb2322c7bb1f115f820b7f3de77c0ee94042b7 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 5 Jul 2024 11:49:56 +0200 Subject: [PATCH 2/3] add tests --- tests/testthat/test-rmse.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/testthat/test-rmse.R b/tests/testthat/test-rmse.R index 151aabb34..8628760ad 100644 --- a/tests/testthat/test-rmse.R +++ b/tests/testthat/test-rmse.R @@ -16,3 +16,18 @@ test_that("rmse", { ) expect_equal(cp$RMSE, c(47.4489, 47.39881, 47.38701, 47.41375, 47.39979, 47.38705), tolerance = 1e-3) }) + +test_that("rmse, ci", { + data(mtcars) + model <- lm(mpg ~ hp + gear, data = mtcars) + # analytical + out <- performance_rmse(model, ci = 0.95, ci_method = "analytical") + expect_equal(out$CI_low, 2.30486, tolerance = 1e-4) + expect_equal(out$CI_high, 3.79093, tolerance = 1e-4) + + # bootstrapped + set.seed(123) + out <- performance_rmse(model, ci = 0.95, ci_method = "boot") + expect_equal(out$CI_low, 1.9494, tolerance = 1e-4) + expect_equal(out$CI_high, 3.38406, tolerance = 1e-4) +}) From d8433867fb7783c407322964a5b108785d884394 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 5 Jul 2024 11:53:43 +0200 Subject: [PATCH 3/3] add tests --- tests/testthat/test-rmse.R | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-rmse.R b/tests/testthat/test-rmse.R index 8628760ad..e19bd49b3 100644 --- a/tests/testthat/test-rmse.R +++ b/tests/testthat/test-rmse.R @@ -28,6 +28,16 @@ test_that("rmse, ci", { # bootstrapped set.seed(123) out <- performance_rmse(model, ci = 0.95, ci_method = "boot") - expect_equal(out$CI_low, 1.9494, tolerance = 1e-4) - expect_equal(out$CI_high, 3.38406, tolerance = 1e-4) + expect_equal(out$CI_low, 1.9494, tolerance = 1e-3) + expect_equal(out$CI_high, 3.38406, tolerance = 1e-3) + + # bootstrapped, mixed models + skip_on_cran() + skip_if_not_installed("lme4") + data(sleepstudy, package = "lme4") + m <- lme4::lmer(Reaction ~ Days + (1 | Subject), data = sleepstudy) + set.seed(123) + out <- performance_rmse(m, ci = 0.95, iterations = 100) + expect_equal(out$CI_low, 26.26066, tolerance = 1e-3) + expect_equal(out$CI_high, 32.5642, tolerance = 1e-3) })