diff --git a/DESCRIPTION b/DESCRIPTION index 5de8f8c62..cc1d14cbe 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.0.4 -Authors@R: +Version: 0.12.0.9 +Authors@R: c(person(given = "Daniel", family = "Lüdecke", role = c("aut", "cre"), @@ -39,8 +39,8 @@ Authors@R: email = "remi.theriault@mail.mcgill.ca", comment = c(ORCID = "0000-0003-4315-6788", Twitter = "@rempsyc")), person(given = "Vincent", - family = "Arel-Bundock", - email = "vincent.arel-bundock@umontreal.ca", + family = "Arel-Bundock", + email = "vincent.arel-bundock@umontreal.ca", role = "ctb", comment = c(ORCID = "0000-0003-2042-7063")), person(given = "Martin", @@ -48,9 +48,9 @@ Authors@R: role = "rev"), person(given = "gjo11", role = "rev"), - person("Etienne", - "Bacher", , - "etienne.bacher@protonmail.com", + person("Etienne", + "Bacher", , + "etienne.bacher@protonmail.com", role = "ctb", comment = c(ORCID = "0000-0002-9271-5075"))) Maintainer: Daniel Lüdecke @@ -70,8 +70,8 @@ Depends: R (>= 3.6) Imports: bayestestR (>= 0.13.2), - insight (>= 0.20.1), - datawizard (>= 0.11.0), + insight (>= 0.20.2), + datawizard (>= 0.10.0), stats, utils Suggests: @@ -93,6 +93,7 @@ Suggests: DHARMa, estimatr, fixest, + flextable, forecast, gamm4, geomtextpath, @@ -100,7 +101,7 @@ Suggests: glmmTMB, graphics, Hmisc, - httr, + httr2, ICS, ICSOutlier, ISLR, @@ -130,6 +131,8 @@ Suggests: quantreg, qqplotr (>= 0.0.6), randomForest, + RcppEigen, + rempsyc, rmarkdown, rstanarm, rstantools, @@ -152,4 +155,3 @@ Config/Needs/website: r-lib/pkgdown, easystats/easystatstemplate Config/rcmdcheck/ignore-inconsequential-notes: true -Remotes: easystats/insight diff --git a/NAMESPACE b/NAMESPACE index 7a20bd1dc..0827931d6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -148,6 +148,7 @@ S3method(display,test_performance) S3method(fitted,BFBayesFactor) S3method(format,compare_performance) S3method(format,performance_model) +S3method(format,performance_rmse) S3method(format,test_performance) S3method(logLik,cpglm) S3method(logLik,iv_robust) @@ -319,6 +320,7 @@ S3method(print,performance_hosmer) S3method(print,performance_model) S3method(print,performance_pcp) S3method(print,performance_pp_check) +S3method(print,performance_rmse) S3method(print,performance_roc) S3method(print,performance_score) S3method(print,performance_simres) @@ -451,6 +453,7 @@ S3method(r2_coxsnell,survreg) S3method(r2_coxsnell,svycoxph) S3method(r2_coxsnell,truncreg) S3method(r2_efron,default) +S3method(r2_ferrari,default) S3method(r2_kullback,default) S3method(r2_kullback,glm) S3method(r2_loo_posterior,BFBayesFactor) @@ -597,6 +600,7 @@ export(r2) export(r2_bayes) export(r2_coxsnell) export(r2_efron) +export(r2_ferrari) export(r2_kullback) export(r2_loo) export(r2_loo_posterior) diff --git a/NEWS.md b/NEWS.md index d032d4e00..14f4c4e3c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,13 @@ * `icc()` and `r2_nakagawa()` get a `model_component` argument indicating the component for zero-inflation or hurdle models. +* `performance_rmse()` (resp. `rmse()`) can now compute analytical and + bootstrapped confidence intervals. The function gains following new arguments: + `ci`, `ci_method` and `iterations`. + +* New function `r2_ferrari()` to compute Ferrari & Cribari-Neto's R2 for + generalized linear models, in particular beta-regression. + # performance 0.12.0 ## Breaking diff --git a/R/check_distribution.R b/R/check_distribution.R index fe26f0f4c..d743b3ac1 100644 --- a/R/check_distribution.R +++ b/R/check_distribution.R @@ -191,25 +191,25 @@ check_distribution.numeric <- function(model) { # validation check, remove missings x <- x[!is.na(x)] - mode <- NULL + mode_value <- NULL # find mode for integer, or MAP for distributions if (all(.is_integer(x))) { - mode <- datawizard::distribution_mode(x) + mode_value <- datawizard::distribution_mode(x) } else { # this might fail, so we wrap in ".safe()" - mode <- tryCatch( + mode_value <- tryCatch( as.numeric(bayestestR::map_estimate(x, bw = "nrd0")), error = function(e) NULL ) - if (is.null(mode)) { - mode <- tryCatch( + if (is.null(mode_value)) { + mode_value <- tryCatch( as.numeric(bayestestR::map_estimate(x, bw = "kernel")), error = function(e) NULL ) } } - if (is.null(mode)) { + if (is.null(mode_value)) { mean_mode_diff <- mean(x) - datawizard::distribution_mode(x) msg <- "Could not accurately estimate the mode." if (!is.null(type)) { @@ -217,7 +217,7 @@ check_distribution.numeric <- function(model) { } insight::format_alert(msg) } else { - mean_mode_diff <- .safe(mean(x) - mode) + mean_mode_diff <- .safe(mean(x) - mode_value) } data.frame( diff --git a/R/check_homogeneity.R b/R/check_homogeneity.R index 76d73bc19..d6b486810 100644 --- a/R/check_homogeneity.R +++ b/R/check_homogeneity.R @@ -165,7 +165,7 @@ check_homogeneity.afex_aov <- function(x, method = "levene", ...) { between <- between[!is_covar] } - form <- stats::formula(paste0(dv, "~", paste0(between, collapse = "*"))) + form <- stats::formula(paste0(dv, "~", paste(between, collapse = "*"))) test <- car::leveneTest(form, ag_data, center = mean, ...) p.val <- test[1, "Pr(>F)"] diff --git a/R/helpers.R b/R/helpers.R index c231d6226..09b211678 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -1,6 +1,6 @@ # small wrapper around this commonly used try-catch .safe <- function(code, on_error = NULL) { - if (getOption("easystats_erros", FALSE) && is.null(on_error)) { + if (isTRUE(getOption("easystats_errors", FALSE) && is.null(on_error))) { code } else { tryCatch(code, error = function(e) on_error) diff --git a/R/icc.R b/R/icc.R index c89df5f42..420893f1e 100644 --- a/R/icc.R +++ b/R/icc.R @@ -1,5 +1,7 @@ -#' Intraclass Correlation Coefficient (ICC) +#' @title Intraclass Correlation Coefficient (ICC) +#' @name icc #' +#' @description #' This function calculates the intraclass-correlation coefficient (ICC) - #' sometimes also called *variance partition coefficient* (VPC) or #' *repeatability* - for mixed effects models. The ICC can be calculated for all @@ -15,24 +17,23 @@ #' Else, for instance for nested models, name a specific group-level effect #' to calculate the variance decomposition for this group-level. See 'Details' #' and `?brms::posterior_predict`. -#' @param ci Confidence resp. credible interval level. For `icc()` and `r2()`, -#' confidence intervals are based on bootstrapped samples from the ICC resp. -#' R2 value. See `iterations`. -#' @param by_group Logical, if `TRUE`, `icc()` returns the variance -#' components for each random-effects level (if there are multiple levels). -#' See 'Details'. +#' @param ci Confidence resp. credible interval level. For `icc()`, `r2()`, and +#' `rmse()`, confidence intervals are based on bootstrapped samples from the +#' ICC, R2 or RMSE value. See `iterations`. +#' @param by_group Logical, if `TRUE`, `icc()` returns the variance components +#' for each random-effects level (if there are multiple levels). See 'Details'. #' @param iterations Number of bootstrap-replicates when computing confidence -#' intervals for the ICC or R2. +#' intervals for the ICC, R2, RMSE etc. #' @param ci_method Character string, indicating the bootstrap-method. Should -#' be `NULL` (default), in which case `lme4::bootMer()` is used for -#' bootstrapped confidence intervals. However, if bootstrapped intervals cannot -#' be calculated this was, try `ci_method = "boot"`, which falls back to -#' `boot::boot()`. This may successfully return bootstrapped confidence intervals, -#' but bootstrapped samples may not be appropriate for the multilevel structure -#' of the model. There is also an option `ci_method = "analytical"`, which tries -#' to calculate analytical confidence assuming a chi-squared distribution. -#' However, these intervals are rather inaccurate and often too narrow. It is -#' recommended to calculate bootstrapped confidence intervals for mixed models. +#' be `NULL` (default), in which case `lme4::bootMer()` is used for bootstrapped +#' confidence intervals. However, if bootstrapped intervals cannot be calculated +#' this way, try `ci_method = "boot"`, which falls back to `boot::boot()`. This +#' may successfully return bootstrapped confidence intervals, but bootstrapped +#' samples may not be appropriate for the multilevel structure of the model. +#' There is also an option `ci_method = "analytical"`, which tries to calculate +#' analytical confidence assuming a chi-squared distribution. However, these +#' intervals are rather inaccurate and often too narrow. It is recommended to +#' calculate bootstrapped confidence intervals for mixed models. #' @param verbose Toggle warnings and messages. #' @param null_model Optional, a null model to compute the random effect variances, #' which is passed to [`insight::get_variance()`]. Usually only required if @@ -40,7 +41,8 @@ #' calculating the null model takes longer and you already have fit the null #' model, you can pass it here, too, to speed up the process. #' @param ... Arguments passed down to `lme4::bootMer()` or `boot::boot()` -#' for bootstrapped ICC or R2. +#' for bootstrapped ICC, R2, RMSE etc.; for `variance_decomposition()`, +#' arguments are passed down to `brms::posterior_predict()`. #' #' @inheritParams r2_bayes #' @inheritParams insight::get_variance @@ -247,7 +249,12 @@ icc <- function(model, # iccs between groups # n_grps <- length(vars$var.intercept) # level_combinations <- utils::combn(1:n_grps, m = n_grps - 1, simplify = FALSE) - # icc_grp <- sapply(level_combinations, function(v) vars$var.intercept[v[1]] / (vars$var.intercept[v[1]] + vars$var.intercept[v[2]])) + # icc_grp <- sapply( + # level_combinations, + # function(v) { + # vars$var.intercept[v[1]] / (vars$var.intercept[v[1]] + vars$var.intercept[v[2]]) + # } + # ) # # out2 <- data.frame( # Group1 = group_names[sapply(level_combinations, function(i) i[1])], @@ -273,11 +280,11 @@ icc <- function(model, # this is experimental! if (identical(ci_method, "analytical")) { result <- .safe(.analytical_icc_ci(model, ci)) - if (!is.null(result)) { + if (is.null(result)) { + icc_ci_adjusted <- icc_ci_unadjusted <- NA + } else { icc_ci_adjusted <- result$ICC_adjusted icc_ci_unadjusted <- result$ICC_unadjusted - } else { - icc_ci_adjusted <- icc_ci_unadjusted <- NA } } else { result <- .bootstrap_icc(model, iterations, tolerance, ci_method, ...) @@ -321,8 +328,6 @@ icc <- function(model, -#' @param ... Arguments passed down to `brms::posterior_predict()`. -#' @inheritParams icc #' @rdname icc #' @export variance_decomposition <- function(model, @@ -428,7 +433,7 @@ print.icc <- function(x, digits = 3, ...) { } # separate lines for multiple R2 - out <- paste0(out, collapse = "\n") + out <- paste(out, collapse = "\n") cat(out) cat("\n") @@ -591,7 +596,11 @@ print.icc_decomposed <- function(x, digits = 2, ...) { .boot_icc_fun <- function(data, indices, model, tolerance) { d <- data[indices, ] # allows boot to select sample fit <- suppressWarnings(suppressMessages(stats::update(model, data = d))) - vars <- .compute_random_vars(fit, tolerance, verbose = FALSE) + vars <- .compute_random_vars( + fit, + tolerance, + verbose = isTRUE(getOption("easystats_errors", FALSE)) + ) if (is.null(vars) || all(is.na(vars))) { return(c(NA, NA)) } @@ -604,7 +613,11 @@ print.icc_decomposed <- function(x, digits = 2, ...) { # bootstrapping using "lme4::bootMer" .boot_icc_fun_lme4 <- function(model) { - vars <- .compute_random_vars(model, tolerance = 1e-05, verbose = FALSE) + vars <- .compute_random_vars( + model, + tolerance = 1e-10, + verbose = isTRUE(getOption("easystats_errors", FALSE)) + ) if (is.null(vars) || all(is.na(vars))) { return(c(NA, NA)) } @@ -685,10 +698,10 @@ print.icc_decomposed <- function(x, digits = 2, ...) { } model_rank <- tryCatch( - if (!is.null(model$rank)) { - model$rank - df_int - } else { + if (is.null(model$rank)) { insight::n_parameters(model) - df_int + } else { + model$rank - df_int }, error = function(e) insight::n_parameters(model) - df_int ) diff --git a/R/performance_rmse.R b/R/performance_rmse.R index 0cc5eac90..2e98d5514 100644 --- a/R/performance_rmse.R +++ b/R/performance_rmse.R @@ -6,7 +6,7 @@ #' #' @param model A model. #' @param normalized Logical, use `TRUE` if normalized rmse should be returned. -#' @inheritParams model_performance.lm +#' @inheritParams icc #' #' @details The RMSE is the square root of the variance of the residuals and indicates #' the absolute fit of the model to the data (difference between observed data @@ -30,33 +30,138 @@ #' # normalized RMSE #' performance_rmse(m, normalized = TRUE) #' @export -performance_rmse <- function(model, normalized = FALSE, verbose = TRUE) { +performance_rmse <- function(model, + normalized = FALSE, + ci = NULL, + iterations = 100, + ci_method = NULL, + verbose = TRUE, + ...) { tryCatch( { - # compute rmse - rmse_val <- sqrt(performance_mse(model, verbose = verbose)) - - # if normalized, divide by range of response - if (normalized) { - # get response - resp <- datawizard::to_numeric(insight::get_response(model, verbose = FALSE), dummy_factors = FALSE, preserve_levels = TRUE) - # compute rmse, normalized - rmse_val <- rmse_val / (max(resp, na.rm = TRUE) - min(resp, na.rm = TRUE)) + out <- .calculate_rmse(model, normalized, verbose) + # check if CIs are requested, and compute CIs + if (!is.null(ci) && !is.na(ci)) { + # analytical CI? + if (identical(ci_method, "analytical")) { + out <- .analytical_rmse_ci(out, model, ci) + } else { + # bootstrapped CI + result <- .bootstrap_rmse(model, iterations, normalized, ci_method, ...) + # CI for RMSE + rmse_ci <- as.vector(result$t[, 1]) + rmse_ci <- rmse_ci[!is.na(rmse_ci)] + # validation check + if (length(rmse_ci) > 0) { + rmse_ci <- bayestestR::eti(rmse_ci, ci = ci) + out <- cbind(data.frame(RMSE = out), rmse_ci) + class(out) <- c("performance_rmse", "data.frame") + } else { + insight::format_warning("Could not compute confidence intervals for RMSE.") + } + } } - - rmse_val }, error = function(e) { if (inherits(e, c("simpleError", "error")) && verbose) { insight::print_color(e$message, "red") cat("\n") } - NA + out <- NA } ) + + out } #' @rdname performance_rmse #' @export rmse <- performance_rmse + + +# methods --------------------------------------------------------------------- + +#' @export +format.performance_rmse <- function(x, ...) { + insight::format_table(x, ...) +} + +#' @export +print.performance_rmse <- function(x, ...) { + cat(insight::export_table(format(x, ...), ...)) +} + + +# helper function to compute RMSE ---------------------------------------------- + +.calculate_rmse <- function(model, normalized = FALSE, verbose = FALSE, ...) { + # compute rmse + rmse_val <- sqrt(performance_mse(model, verbose = verbose)) + + # if normalized, divide by range of response + if (normalized) { + # get response + resp <- datawizard::to_numeric( + insight::get_response(model, verbose = FALSE), + dummy_factors = FALSE, + preserve_levels = TRUE + ) + # compute rmse, normalized + rmse_val <- rmse_val / (max(resp, na.rm = TRUE) - min(resp, na.rm = TRUE)) + } + + rmse_val +} + + +# analytical CIs -------------------------------------------------------------- + +.analytical_rmse_ci <- function(out, model, ci, ...) { + s <- insight::get_sigma(model, ci = ci, verbose = FALSE) + n <- insight::n_obs(model) + conf_ints <- c(attr(s, "CI_low"), attr(s, "CI_high")) * ((n - 1) / n) + out <- data.frame( + RMSE = out, + CI = ci, + CI_low = conf_ints[1], + CI_high = conf_ints[2] + ) + class(out) <- c("performance_rmse", "data.frame") + out +} + + +# bootstrapping CIs ----------------------------------------------------------- + +.boot_calculate_rmse <- function(data, indices, model, normalized, ...) { + d <- data[indices, ] # allows boot to select sample + fit <- suppressWarnings(suppressMessages(stats::update(model, data = d))) + .calculate_rmse(model = fit, normalized = normalized) +} + +.bootstrap_rmse <- function(model, iterations = 100, normalized = FALSE, ci_method = NULL, ...) { + if (inherits(model, c("merMod", "lmerMod", "glmmTMB")) && !identical(ci_method, "boot")) { + # cannot pass argument "normalized" to "lme4::bootMer()" + if (isTRUE(normalized)) { + insight::format_error("Normalized RMSE cannot be used with confidence intervals. Please use `ci_method = \"boot\"`.") # nolint + } + result <- .do_lme4_bootmer( + model, + .calculate_rmse, + iterations, + dots = list(...) + ) + } else { + insight::check_if_installed("boot") + result <- boot::boot( + data = insight::get_data(model, verbose = FALSE), + statistic = .boot_calculate_rmse, + R = iterations, + sim = "ordinary", + model = model, + normalized = normalized + ) + } + result +} diff --git a/R/r2.R b/R/r2.R index 94982b401..dbfc437fc 100644 --- a/R/r2.R +++ b/R/r2.R @@ -23,14 +23,15 @@ #' - Mixed models: [Nakagawa's R2][r2_nakagawa] #' - Bayesian models: [R2 bayes][r2_bayes] #' -#' @note If there is no `r2()`-method defined for the given model class, -#' `r2()` tries to return a "generic" r-quared value, calculated as following: -#' `1-sum((y-y_hat)^2)/sum((y-y_bar)^2))` +#' @note +#' If there is no `r2()`-method defined for the given model class, `r2()` tries +#' to return a "generic" r-quared value, calculated as following: +#' `1-sum((y-y_hat)^2)/sum((y-y_bar)^2)` #' -#' @seealso [`r2_bayes()`], [`r2_coxsnell()`], [`r2_kullback()`], -#' [`r2_loo()`], [`r2_mcfadden()`], [`r2_nagelkerke()`], -#' [`r2_nakagawa()`], [`r2_tjur()`], [`r2_xu()`] and -#' [`r2_zeroinflated()`]. +#' @seealso +#' [`r2_bayes()`], [`r2_coxsnell()`], [`r2_kullback()`], [`r2_loo()`], +#' [`r2_mcfadden()`], [`r2_nagelkerke()`], [`r2_nakagawa()`], [`r2_tjur()`], +#' [`r2_xu()`] and [`r2_zeroinflated()`]. #' #' @examplesIf require("lme4") #' # Pseudo r-quared for GLM @@ -291,6 +292,12 @@ r2.glm <- function(model, ci = NULL, verbose = TRUE, ...) { insight::format_warning("Can't calculate accurate R2 for binomial models that are not Bernoulli models.") } out <- NULL + } else if (info$is_orderedbeta) { + # ordered-beta-regression + out <- r2_ferrari(model, correct_bounds = TRUE) + } else if (info$is_beta) { + # beta-regression + out <- r2_ferrari(model) } else { out <- list(R2_Nagelkerke = r2_nagelkerke(model, ...)) names(out$R2_Nagelkerke) <- "Nagelkerke's R2" @@ -527,8 +534,20 @@ r2.glmmTMB <- function(model, ci = NULL, tolerance = 1e-5, verbose = TRUE, ...) } else if (info$is_zero_inflated) { # zero-inflated models use the default method out <- r2_zeroinflated(model) + } else if (info$is_orderedbeta) { + # ordered-beta-regression + out <- r2_ferrari(model, correct_bounds = TRUE) + } else if (info$is_beta) { + # beta-regression + out <- r2_ferrari(model) } else { - insight::format_error("`r2()` does not support models of class `glmmTMB` without random effects and this link-function.") # nolint + insight::format_error(paste0( + "`r2()` does not support models of class `glmmTMB` without random effects and from ", + info$family, + "-family with ", + info$link_function, + "-link-function." + )) } } out diff --git a/R/r2_bayes.R b/R/r2_bayes.R index ec98f754b..f96e98b41 100644 --- a/R/r2_bayes.R +++ b/R/r2_bayes.R @@ -1,36 +1,37 @@ #' @title Bayesian R2 #' @name r2_bayes #' -#' @description Compute R2 for Bayesian models. For mixed models (including a -#' random part), it additionally computes the R2 related to the fixed effects -#' only (marginal R2). While `r2_bayes()` returns a single R2 value, -#' `r2_posterior()` returns a posterior sample of Bayesian R2 values. +#' @description +#' Compute R2 for Bayesian models. For mixed models (including a random part), +#' it additionally computes the R2 related to the fixed effects only (marginal +#' R2). While `r2_bayes()` returns a single R2 value, `r2_posterior()` returns a +#' posterior sample of Bayesian R2 values. #' #' @param model A Bayesian regression model (from **brms**, -#' **rstanarm**, **BayesFactor**, etc). +#' **rstanarm**, **BayesFactor**, etc). #' @param robust Logical, if `TRUE`, the median instead of mean is used to -#' calculate the central tendency of the variances. +#' calculate the central tendency of the variances. #' @param ci Value or vector of probability of the CI (between 0 and 1) to be -#' estimated. +#' estimated. #' @param ... Arguments passed to `r2_posterior()`. #' @inheritParams model_performance.lm #' #' @return A list with the Bayesian R2 value. For mixed models, a list with the -#' Bayesian R2 value and the marginal Bayesian R2 value. The standard errors -#' and credible intervals for the R2 values are saved as attributes. +#' Bayesian R2 value and the marginal Bayesian R2 value. The standard errors and +#' credible intervals for the R2 values are saved as attributes. #' -#' @details `r2_bayes()` returns an "unadjusted" R2 value. See -#' [r2_loo()] to calculate a LOO-adjusted R2, which comes -#' conceptually closer to an adjusted R2 measure. +#' @details +#' `r2_bayes()` returns an "unadjusted" R2 value. See [r2_loo()] to calculate a +#' LOO-adjusted R2, which comes conceptually closer to an adjusted R2 measure. #' -#' For mixed models, the conditional and marginal R2 are returned. The marginal -#' R2 considers only the variance of the fixed effects, while the conditional -#' R2 takes both the fixed and random effects into account. +#' For mixed models, the conditional and marginal R2 are returned. The marginal +#' R2 considers only the variance of the fixed effects, while the conditional R2 +#' takes both the fixed and random effects into account. #' -#' `r2_posterior()` is the actual workhorse for `r2_bayes()` and -#' returns a posterior sample of Bayesian R2 values. +#' `r2_posterior()` is the actual workhorse for `r2_bayes()` and returns a +#' posterior sample of Bayesian R2 values. #' -#' @examplesIf require("rstanarm") && require("rstantools") && require("brms") +#' @examplesIf require("rstanarm") && require("rstantools") && require("brms") && require("RcppEigen") #' library(performance) #' \donttest{ #' model <- suppressWarnings(rstanarm::stan_glm( @@ -71,8 +72,8 @@ #' r2_bayes(model) #' } #' @references -#' Gelman, A., Goodrich, B., Gabry, J., and Vehtari, A. (2018). -#' R-squared for Bayesian regression models. The American Statistician, 1–6. +#' Gelman, A., Goodrich, B., Gabry, J., and Vehtari, A. (2018). R-squared for +#' Bayesian regression models. The American Statistician, 1–6. #' \doi{10.1080/00031305.2018.1549100} #' @export r2_bayes <- function(model, robust = TRUE, ci = 0.95, verbose = TRUE, ...) { diff --git a/R/r2_ferarri.R b/R/r2_ferarri.R new file mode 100644 index 000000000..402c95fdf --- /dev/null +++ b/R/r2_ferarri.R @@ -0,0 +1,81 @@ +#' @title Ferrari's and Cribari-Neto's R2 +#' @name r2_ferrari +#' +#' @description Calculates Ferrari's and Cribari-Neto's pseudo R2 (for +#' beta-regression models). +#' +#' @param model Generalized linear, in particular beta-regression model. +#' @param correct_bounds Logical, whether to correct the bounds of the response +#' variable to avoid 0 and 1. If `TRUE`, the response variable is normalized +#' and "compressed", i.e. zeros and ones are excluded. +#' @param ... Currently not used. +#' +#' @return A list with the pseudo R2 value. +#' +#' @references +#' - Ferrari, S., and Cribari-Neto, F. (2004). Beta Regression for Modelling Rates +#' and Proportions. Journal of Applied Statistics, 31(7), 799–815. +#' \doi{10.1080/0266476042000214501} +#' +#' @examplesIf require("betareg") +#' data("GasolineYield", package = "betareg") +#' model <- betareg::betareg(yield ~ batch + temp, data = GasolineYield) +#' r2_ferrari(model) +#' @export +r2_ferrari <- function(model, ...) { + UseMethod("r2_ferrari") +} + +#' @rdname r2_ferrari +#' @export +r2_ferrari.default <- function(model, correct_bounds = FALSE, ...) { + # on the reponse scale, beta regression link doesn't workd + predictions <- stats::predict(model, type = "response") + eta <- insight::link_function(model)(predictions) + y <- insight::get_response(model) + + # for ordered beta, fix 0 and 1 to specific bounds + if (correct_bounds) { + y <- datawizard::normalize(y, include_bounds = FALSE) + } + + ferrari <- stats::cor(eta, insight::link_function(model)(y))^2 + out <- list(R2 = c(`Ferrari's R2` = ferrari)) + + attr(out, "model_type") <- "Generalized Linear" + structure(class = "r2_generic", out) +} + + +# helper ----------------------------- + +# .r2_ferrari <- function(model, x) { +# if (inherits(model, "glmmTMB")) { +# insight::check_if_installed("lme4") +# # coefficients, but remove phi parameter +# x <- .collapse_cond(lme4::fixef(model)) +# x <- x[names(x) != "(phi)"] +# } else { +# # coefficients, but remove phi parameter +# x <- stats::coef(model) +# x <- x[names(x) != "(phi)"] +# } + +# # model matrix, check dimensions / length +# mm <- insight::get_modelmatrix(model) + +# if (length(x) != ncol(mm)) { +# insight::format_warning("Model matrix and coefficients do not match.") +# return(NULL) +# } + +# # linear predictor for the mean +# eta <- as.vector(x %*% t(mm)) +# y <- insight::get_response(model) + +# ferrari <- stats::cor(eta, insight::link_function(model)(y))^2 +# out <- list(R2 = c(`Ferrari's R2` = ferrari)) + +# attr(out, "model_type") <- "Generalized Linear" +# structure(class = "r2_generic", out) +# } diff --git a/R/r2_mcfadden.R b/R/r2_mcfadden.R index b04977517..de61fac87 100644 --- a/R/r2_mcfadden.R +++ b/R/r2_mcfadden.R @@ -63,15 +63,16 @@ r2_mcfadden.glm <- function(model, verbose = TRUE, ...) { if (is.null(info)) { info <- suppressWarnings(insight::model_info(model, verbose = FALSE)) } + if (info$is_binomial && !info$is_bernoulli && class(model)[1] == "glm") { if (verbose) { insight::format_warning("Can't calculate accurate R2 for binomial models that are not Bernoulli models.") } return(NULL) - } else { - l_null <- insight::get_loglikelihood(stats::update(model, ~1)) - .r2_mcfadden(model, l_null) } + + l_null <- insight::get_loglikelihood(stats::update(model, ~1)) + .r2_mcfadden(model, l_null) } #' @export diff --git a/R/r2_nakagawa.R b/R/r2_nakagawa.R index 59d4d1906..dfa1eb6dc 100644 --- a/R/r2_nakagawa.R +++ b/R/r2_nakagawa.R @@ -154,7 +154,7 @@ r2_nakagawa <- function(model, if (insight::is_empty_object(vars$var.random) || is.na(vars$var.random)) { if (verbose) { # if no random effect variance, return simple R2 - insight::print_color("Random effect variances not available. Returned R2 does not account for random effects.\n", "red") + insight::print_color("Random effect variances not available. Returned R2 does not account for random effects.\n", "red") # nolint } r2_marginal <- vars$var.fixed / (vars$var.fixed + vars$var.residual) r2_conditional <- NA @@ -172,11 +172,11 @@ r2_nakagawa <- function(model, # this is experimental! if (identical(ci_method, "analytical")) { result <- .safe(.analytical_icc_ci(model, ci, fun = "r2_nakagawa")) - if (!is.null(result)) { + if (is.null(result)) { + r2_ci_marginal <- r2_ci_conditional <- NA + } else { r2_ci_marginal <- result$R2_marginal r2_ci_conditional <- result$R2_conditional - } else { - r2_ci_marginal <- r2_ci_conditional <- NA } } else { result <- .bootstrap_r2_nakagawa(model, iterations, tolerance, ci_method, ...) @@ -266,7 +266,7 @@ print.r2_nakagawa <- function(x, digits = 3, ...) { } # separate lines for multiple R2 - out <- paste0(out, collapse = "\n") + out <- paste(out, collapse = "\n") cat(out) cat("\n") @@ -281,7 +281,11 @@ print.r2_nakagawa <- function(x, digits = 3, ...) { .boot_r2_fun <- function(data, indices, model, tolerance) { d <- data[indices, ] # allows boot to select sample fit <- suppressWarnings(suppressMessages(stats::update(model, data = d))) - vars <- .compute_random_vars(fit, tolerance, verbose = FALSE) + vars <- .compute_random_vars( + fit, + tolerance, + verbose = isTRUE(getOption("easystats_errors", FALSE)) + ) if (is.null(vars) || all(is.na(vars))) { return(c(NA, NA)) } @@ -298,7 +302,11 @@ print.r2_nakagawa <- function(x, digits = 3, ...) { # bootstrapping using "lme4::bootMer" .boot_r2_fun_lme4 <- function(model) { - vars <- .compute_random_vars(model, tolerance = 1e-05, verbose = FALSE) + vars <- .compute_random_vars( + model, + tolerance = 1e-10, + verbose = isTRUE(getOption("easystats_errors", FALSE)) + ) if (is.null(vars) || all(is.na(vars))) { return(c(NA, NA)) } diff --git a/README.Rmd b/README.Rmd index 895fa02a8..e7c540577 100644 --- a/README.Rmd +++ b/README.Rmd @@ -26,7 +26,7 @@ library(performance) ``` [![DOI](https://joss.theoj.org/papers/10.21105/joss.03139/status.svg)](https://doi.org/10.21105/joss.03139) -[![downloads](http://cranlogs.r-pkg.org/badges/performance)](https://cran.r-project.org/package=performance) [![total](https://cranlogs.r-pkg.org/badges/grand-total/performance)](https://cranlogs.r-pkg.org/) [![status](https://tinyverse.netlify.com/badge/performance)](https://CRAN.R-project.org/package=performance) +[![downloads](http://cranlogs.r-pkg.org/badges/performance)](https://cran.r-project.org/package=performance) [![total](https://cranlogs.r-pkg.org/badges/grand-total/performance)](https://cranlogs.r-pkg.org/) ***Test if your model is a good model!*** @@ -145,7 +145,7 @@ icc(model) ``` ...and models of class `brmsfit`. - + ```{r, echo=FALSE, eval=curl::has_internet()} model <- insight::download_model("brms_mixed_1") ``` @@ -224,7 +224,7 @@ model <- lmer( check_singularity(model) ``` -Remedies to cure issues with singular fits can be found [here](https://easystats.github.io/performance/reference/check_singularity.html). +Remedies to cure issues with singular fits can be found [here](https://easystats.github.io/performance/reference/check_singularity.html). #### Check for heteroskedasticity diff --git a/README.md b/README.md index e4e0352dc..5153170da 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ [![DOI](https://joss.theoj.org/papers/10.21105/joss.03139/status.svg)](https://doi.org/10.21105/joss.03139) [![downloads](http://cranlogs.r-pkg.org/badges/performance)](https://cran.r-project.org/package=performance) [![total](https://cranlogs.r-pkg.org/badges/grand-total/performance)](https://cranlogs.r-pkg.org/) -[![status](https://tinyverse.netlify.com/badge/performance)](https://CRAN.R-project.org/package=performance) ***Test if your model is a good model!*** @@ -58,13 +57,13 @@ To cite performance in publications use: ``` r citation("performance") #> To cite package 'performance' in publications use: -#> +#> #> Lüdecke et al., (2021). performance: An R Package for Assessment, Comparison and #> Testing of Statistical Models. Journal of Open Source Software, 6(60), 3139. #> https://doi.org/10.21105/joss.03139 -#> +#> #> A BibTeX entry for LaTeX users is -#> +#> #> @Article{, #> title = {{performance}: An {R} Package for Assessment, Comparison and Testing of Statistical Models}, #> author = {Daniel Lüdecke and Mattan S. Ben-Shachar and Indrajeet Patil and Philip Waggoner and Dominique Makowski}, @@ -146,15 +145,15 @@ model <- stan_glmer( r2(model) #> # Bayesian R2 with Compatibility Interval -#> +#> #> Conditional R2: 0.953 (95% CI [0.942, 0.964]) -#> Marginal R2: 0.824 (95% CI [0.720, 0.898]) +#> Marginal R2: 0.826 (95% CI [0.724, 0.900]) library(lme4) model <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) r2(model) #> # R2 for Mixed Models -#> +#> #> Conditional R2: 0.799 #> Marginal R2: 0.279 ``` @@ -173,7 +172,7 @@ library(lme4) model <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) icc(model) #> # Intraclass Correlation Coefficient -#> +#> #> Adjusted ICC: 0.722 #> Unadjusted ICC: 0.521 ``` @@ -189,9 +188,9 @@ model <- brm(mpg ~ wt + (1 | cyl) + (1 + wt | gear), data = mtcars) ``` r icc(model) #> # Intraclass Correlation Coefficient -#> -#> Adjusted ICC: 0.930 -#> Unadjusted ICC: 0.771 +#> +#> Adjusted ICC: 0.941 +#> Unadjusted ICC: 0.779 ``` ### Model diagnostics @@ -210,7 +209,7 @@ data(Salamanders) model <- glm(count ~ spp + mined, family = poisson, data = Salamanders) check_overdispersion(model) #> # Overdispersion test -#> +#> #> dispersion ratio = 2.946 #> Pearson's Chi-Squared = 1873.710 #> p-value = < 0.001 @@ -235,7 +234,7 @@ fitted model. model <- glm(count ~ spp + mined, family = poisson, data = Salamanders) check_zeroinflation(model) #> # Check for zero-inflation -#> +#> #> Observed zeros: 387 #> Predicted zeros: 298 #> Ratio: 0.77 @@ -323,7 +322,7 @@ be r-squared, AIC, BIC, RMSE, ICC or LOOIC. m1 <- lm(mpg ~ wt + cyl, data = mtcars) model_performance(m1) #> # Indices of model performance -#> +#> #> AIC | AICc | BIC | R2 | R2 (adj.) | RMSE | Sigma #> --------------------------------------------------------------- #> 156.010 | 157.492 | 161.873 | 0.830 | 0.819 | 2.444 | 2.568 @@ -335,7 +334,7 @@ model_performance(m1) m2 <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial") model_performance(m2) #> # Indices of model performance -#> +#> #> AIC | AICc | BIC | Tjur's R2 | RMSE | Sigma | Log_loss | Score_log | Score_spherical | PCP #> ----------------------------------------------------------------------------------------------------- #> 31.298 | 32.155 | 35.695 | 0.478 | 0.359 | 1.000 | 0.395 | -14.903 | 0.095 | 0.743 @@ -348,7 +347,7 @@ library(lme4) m3 <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) model_performance(m3) #> # Indices of model performance -#> +#> #> AIC | AICc | BIC | R2 (cond.) | R2 (marg.) | ICC | RMSE | Sigma #> ---------------------------------------------------------------------------------- #> 1755.628 | 1756.114 | 1774.786 | 0.799 | 0.279 | 0.722 | 23.438 | 25.592 @@ -368,12 +367,12 @@ m4 <- glm(counts ~ outcome + treatment, family = poisson()) compare_performance(m1, m2, m3, m4, verbose = FALSE) #> # Comparison of Model Performance Indices -#> +#> #> Name | Model | AIC (weights) | AICc (weights) | BIC (weights) | RMSE | Sigma | Score_log | Score_spherical | R2 | R2 (adj.) | Tjur's R2 | Log_loss | PCP | R2 (cond.) | R2 (marg.) | ICC | Nagelkerke's R2 #> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ -#> m1 | lm | 156.0 (<.001) | 157.5 (<.001) | 161.9 (<.001) | 2.444 | 2.568 | | | 0.830 | 0.819 | | | | | | | -#> m2 | glm | 31.3 (>.999) | 32.2 (>.999) | 35.7 (>.999) | 0.359 | 1.000 | -14.903 | 0.095 | | | 0.478 | 0.395 | 0.743 | | | | -#> m3 | lmerMod | 1764.0 (<.001) | 1764.5 (<.001) | 1783.1 (<.001) | 23.438 | 25.592 | | | | | | | | 0.799 | 0.279 | 0.722 | +#> m1 | lm | 156.0 (<.001) | 157.5 (<.001) | 161.9 (<.001) | 2.444 | 2.568 | | | 0.830 | 0.819 | | | | | | | +#> m2 | glm | 31.3 (>.999) | 32.2 (>.999) | 35.7 (>.999) | 0.359 | 1.000 | -14.903 | 0.095 | | | 0.478 | 0.395 | 0.743 | | | | +#> m3 | lmerMod | 1764.0 (<.001) | 1764.5 (<.001) | 1783.1 (<.001) | 23.438 | 25.592 | | | | | | | | 0.799 | 0.279 | 0.722 | #> m4 | glm | 56.8 (<.001) | 76.8 (<.001) | 57.7 (<.001) | 3.043 | 1.000 | -2.598 | 0.324 | | | | | | | | | 0.657 ``` @@ -386,7 +385,7 @@ of model performance and sort the models from the best one to the worse. ``` r compare_performance(m1, m2, m3, m4, rank = TRUE, verbose = FALSE) #> # Comparison of Model Performance Indices -#> +#> #> Name | Model | RMSE | Sigma | AIC weights | AICc weights | BIC weights | Performance-Score #> ----------------------------------------------------------------------------------------------- #> m2 | glm | 0.359 | 1.000 | 1.000 | 1.000 | 1.000 | 100.00% @@ -424,7 +423,7 @@ lm4 <- lm(Sepal.Length ~ Species * Sepal.Width + Petal.Length + Petal.Width, dat test_performance(lm1, lm2, lm3, lm4) #> Name | Model | BF | Omega2 | p (Omega2) | LR | p (LR) #> ------------------------------------------------------------ -#> lm1 | lm | | | | | +#> lm1 | lm | | | | | #> lm2 | lm | > 1000 | 0.69 | < .001 | -6.25 | < .001 #> lm3 | lm | > 1000 | 0.36 | < .001 | -3.44 | < .001 #> lm4 | lm | > 1000 | 0.73 | < .001 | -7.77 | < .001 @@ -432,12 +431,12 @@ test_performance(lm1, lm2, lm3, lm4) test_bf(lm1, lm2, lm3, lm4) #> Bayes Factors for Model Comparison -#> +#> #> Model BF #> [lm2] Species + Petal.Length 3.45e+26 #> [lm3] Species * Sepal.Width 4.69e+07 #> [lm4] Species * Sepal.Width + Petal.Length + Petal.Width 7.58e+29 -#> +#> #> * Against Denominator: [lm1] Species #> * Bayes Factor Type: BIC approximation ``` diff --git a/inst/WORDLIST b/inst/WORDLIST index f0e335ab1..e5e90d844 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -39,6 +39,7 @@ Chisq CochransQ CompQuadForm Concurvity +Cribari Cronbach's Crujeiras Csaki @@ -154,6 +155,8 @@ Nagelkerke Nagelkerke's Nakagawa Nakagawa's +Neto +Neto's Nondegenerate Nordhausen Normed diff --git a/man/figures/unnamed-chunk-14-1.png b/man/figures/unnamed-chunk-14-1.png index 67f5eb31d..c8ac9bd3a 100644 Binary files a/man/figures/unnamed-chunk-14-1.png and b/man/figures/unnamed-chunk-14-1.png differ diff --git a/man/figures/unnamed-chunk-20-1.png b/man/figures/unnamed-chunk-20-1.png index a0b924679..17d827a8a 100644 Binary files a/man/figures/unnamed-chunk-20-1.png and b/man/figures/unnamed-chunk-20-1.png differ diff --git a/man/icc.Rd b/man/icc.Rd index f77d87391..273e79cfc 100644 --- a/man/icc.Rd +++ b/man/icc.Rd @@ -24,32 +24,31 @@ variance_decomposition(model, re_formula = NULL, robust = TRUE, ci = 0.95, ...) \arguments{ \item{model}{A (Bayesian) mixed effects model.} -\item{by_group}{Logical, if \code{TRUE}, \code{icc()} returns the variance -components for each random-effects level (if there are multiple levels). -See 'Details'.} +\item{by_group}{Logical, if \code{TRUE}, \code{icc()} returns the variance components +for each random-effects level (if there are multiple levels). See 'Details'.} \item{tolerance}{Tolerance for singularity check of random effects, to decide whether to compute random effect variances or not. Indicates up to which value the convergence result is accepted. The larger tolerance is, the stricter the test will be. See \code{\link[performance:check_singularity]{performance::check_singularity()}}.} -\item{ci}{Confidence resp. credible interval level. For \code{icc()} and \code{r2()}, -confidence intervals are based on bootstrapped samples from the ICC resp. -R2 value. See \code{iterations}.} +\item{ci}{Confidence resp. credible interval level. For \code{icc()}, \code{r2()}, and +\code{rmse()}, confidence intervals are based on bootstrapped samples from the +ICC, R2 or RMSE value. See \code{iterations}.} \item{iterations}{Number of bootstrap-replicates when computing confidence -intervals for the ICC or R2.} +intervals for the ICC, R2, RMSE etc.} \item{ci_method}{Character string, indicating the bootstrap-method. Should -be \code{NULL} (default), in which case \code{lme4::bootMer()} is used for -bootstrapped confidence intervals. However, if bootstrapped intervals cannot -be calculated this was, try \code{ci_method = "boot"}, which falls back to -\code{boot::boot()}. This may successfully return bootstrapped confidence intervals, -but bootstrapped samples may not be appropriate for the multilevel structure -of the model. There is also an option \code{ci_method = "analytical"}, which tries -to calculate analytical confidence assuming a chi-squared distribution. -However, these intervals are rather inaccurate and often too narrow. It is -recommended to calculate bootstrapped confidence intervals for mixed models.} +be \code{NULL} (default), in which case \code{lme4::bootMer()} is used for bootstrapped +confidence intervals. However, if bootstrapped intervals cannot be calculated +this way, try \code{ci_method = "boot"}, which falls back to \code{boot::boot()}. This +may successfully return bootstrapped confidence intervals, but bootstrapped +samples may not be appropriate for the multilevel structure of the model. +There is also an option \code{ci_method = "analytical"}, which tries to calculate +analytical confidence assuming a chi-squared distribution. However, these +intervals are rather inaccurate and often too narrow. It is recommended to +calculate bootstrapped confidence intervals for mixed models.} \item{null_model}{Optional, a null model to compute the random effect variances, which is passed to \code{\link[insight:get_variance]{insight::get_variance()}}. Usually only required if @@ -59,7 +58,9 @@ model, you can pass it here, too, to speed up the process.} \item{verbose}{Toggle warnings and messages.} -\item{...}{Arguments passed down to \code{brms::posterior_predict()}.} +\item{...}{Arguments passed down to \code{lme4::bootMer()} or \code{boot::boot()} +for bootstrapped ICC, R2, RMSE etc.; for \code{variance_decomposition()}, +arguments are passed down to \code{brms::posterior_predict()}.} \item{re_formula}{Formula containing group-level effects to be considered in the prediction. If \code{NULL} (default), include all group-level effects. diff --git a/man/performance_mae.Rd b/man/performance_mae.Rd index 2d68e1606..20874f5ba 100644 --- a/man/performance_mae.Rd +++ b/man/performance_mae.Rd @@ -12,7 +12,9 @@ mae(model, ...) \arguments{ \item{model}{A model.} -\item{...}{Arguments passed to or from other methods.} +\item{...}{Arguments passed down to \code{lme4::bootMer()} or \code{boot::boot()} +for bootstrapped ICC, R2, RMSE etc.; for \code{variance_decomposition()}, +arguments are passed down to \code{brms::posterior_predict()}.} } \value{ Numeric, the mean absolute error of \code{model}. diff --git a/man/performance_mse.Rd b/man/performance_mse.Rd index ef4d6781a..e81327c14 100644 --- a/man/performance_mse.Rd +++ b/man/performance_mse.Rd @@ -12,7 +12,9 @@ mse(model, ...) \arguments{ \item{model}{A model.} -\item{...}{Arguments passed to or from other methods.} +\item{...}{Arguments passed down to \code{lme4::bootMer()} or \code{boot::boot()} +for bootstrapped ICC, R2, RMSE etc.; for \code{variance_decomposition()}, +arguments are passed down to \code{brms::posterior_predict()}.} } \value{ Numeric, the mean square error of \code{model}. diff --git a/man/performance_rmse.Rd b/man/performance_rmse.Rd index bea4534b5..124570c29 100644 --- a/man/performance_rmse.Rd +++ b/man/performance_rmse.Rd @@ -5,16 +5,54 @@ \alias{rmse} \title{Root Mean Squared Error} \usage{ -performance_rmse(model, normalized = FALSE, verbose = TRUE) +performance_rmse( + model, + normalized = FALSE, + ci = NULL, + iterations = 100, + ci_method = NULL, + verbose = TRUE, + ... +) -rmse(model, normalized = FALSE, verbose = TRUE) +rmse( + model, + normalized = FALSE, + ci = NULL, + iterations = 100, + ci_method = NULL, + verbose = TRUE, + ... +) } \arguments{ \item{model}{A model.} \item{normalized}{Logical, use \code{TRUE} if normalized rmse should be returned.} -\item{verbose}{Toggle off warnings.} +\item{ci}{Confidence resp. credible interval level. For \code{icc()}, \code{r2()}, and +\code{rmse()}, confidence intervals are based on bootstrapped samples from the +ICC, R2 or RMSE value. See \code{iterations}.} + +\item{iterations}{Number of bootstrap-replicates when computing confidence +intervals for the ICC, R2, RMSE etc.} + +\item{ci_method}{Character string, indicating the bootstrap-method. Should +be \code{NULL} (default), in which case \code{lme4::bootMer()} is used for bootstrapped +confidence intervals. However, if bootstrapped intervals cannot be calculated +this way, try \code{ci_method = "boot"}, which falls back to \code{boot::boot()}. This +may successfully return bootstrapped confidence intervals, but bootstrapped +samples may not be appropriate for the multilevel structure of the model. +There is also an option \code{ci_method = "analytical"}, which tries to calculate +analytical confidence assuming a chi-squared distribution. However, these +intervals are rather inaccurate and often too narrow. It is recommended to +calculate bootstrapped confidence intervals for mixed models.} + +\item{verbose}{Toggle warnings and messages.} + +\item{...}{Arguments passed down to \code{lme4::bootMer()} or \code{boot::boot()} +for bootstrapped ICC, R2, RMSE etc.; for \code{variance_decomposition()}, +arguments are passed down to \code{brms::posterior_predict()}.} } \value{ Numeric, the root mean squared error. diff --git a/man/r2.Rd b/man/r2.Rd index 9c5c648c3..bf783e8d9 100644 --- a/man/r2.Rd +++ b/man/r2.Rd @@ -49,9 +49,9 @@ determination, value for different model objects. Depending on the model, R2, pseudo-R2, or marginal / adjusted R2 values are returned. } \note{ -If there is no \code{r2()}-method defined for the given model class, -\code{r2()} tries to return a "generic" r-quared value, calculated as following: -\verb{1-sum((y-y_hat)^2)/sum((y-y_bar)^2))} +If there is no \code{r2()}-method defined for the given model class, \code{r2()} tries +to return a "generic" r-quared value, calculated as following: +\code{1-sum((y-y_hat)^2)/sum((y-y_bar)^2)} } \examples{ \dontshow{if (require("lme4")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} @@ -68,8 +68,7 @@ r2(model) \dontshow{\}) # examplesIf} } \seealso{ -\code{\link[=r2_bayes]{r2_bayes()}}, \code{\link[=r2_coxsnell]{r2_coxsnell()}}, \code{\link[=r2_kullback]{r2_kullback()}}, -\code{\link[=r2_loo]{r2_loo()}}, \code{\link[=r2_mcfadden]{r2_mcfadden()}}, \code{\link[=r2_nagelkerke]{r2_nagelkerke()}}, -\code{\link[=r2_nakagawa]{r2_nakagawa()}}, \code{\link[=r2_tjur]{r2_tjur()}}, \code{\link[=r2_xu]{r2_xu()}} and -\code{\link[=r2_zeroinflated]{r2_zeroinflated()}}. +\code{\link[=r2_bayes]{r2_bayes()}}, \code{\link[=r2_coxsnell]{r2_coxsnell()}}, \code{\link[=r2_kullback]{r2_kullback()}}, \code{\link[=r2_loo]{r2_loo()}}, +\code{\link[=r2_mcfadden]{r2_mcfadden()}}, \code{\link[=r2_nagelkerke]{r2_nagelkerke()}}, \code{\link[=r2_nakagawa]{r2_nakagawa()}}, \code{\link[=r2_tjur]{r2_tjur()}}, +\code{\link[=r2_xu]{r2_xu()}} and \code{\link[=r2_zeroinflated]{r2_zeroinflated()}}. } diff --git a/man/r2_bayes.Rd b/man/r2_bayes.Rd index 67200fb7d..63f9843ba 100644 --- a/man/r2_bayes.Rd +++ b/man/r2_bayes.Rd @@ -40,26 +40,25 @@ the first model (or the denominator, for \code{BFBayesFactor} objects). For } \value{ A list with the Bayesian R2 value. For mixed models, a list with the -Bayesian R2 value and the marginal Bayesian R2 value. The standard errors -and credible intervals for the R2 values are saved as attributes. +Bayesian R2 value and the marginal Bayesian R2 value. The standard errors and +credible intervals for the R2 values are saved as attributes. } \description{ -Compute R2 for Bayesian models. For mixed models (including a -random part), it additionally computes the R2 related to the fixed effects -only (marginal R2). While \code{r2_bayes()} returns a single R2 value, -\code{r2_posterior()} returns a posterior sample of Bayesian R2 values. +Compute R2 for Bayesian models. For mixed models (including a random part), +it additionally computes the R2 related to the fixed effects only (marginal +R2). While \code{r2_bayes()} returns a single R2 value, \code{r2_posterior()} returns a +posterior sample of Bayesian R2 values. } \details{ -\code{r2_bayes()} returns an "unadjusted" R2 value. See -\code{\link[=r2_loo]{r2_loo()}} to calculate a LOO-adjusted R2, which comes -conceptually closer to an adjusted R2 measure. +\code{r2_bayes()} returns an "unadjusted" R2 value. See \code{\link[=r2_loo]{r2_loo()}} to calculate a +LOO-adjusted R2, which comes conceptually closer to an adjusted R2 measure. For mixed models, the conditional and marginal R2 are returned. The marginal -R2 considers only the variance of the fixed effects, while the conditional -R2 takes both the fixed and random effects into account. +R2 considers only the variance of the fixed effects, while the conditional R2 +takes both the fixed and random effects into account. -\code{r2_posterior()} is the actual workhorse for \code{r2_bayes()} and -returns a posterior sample of Bayesian R2 values. +\code{r2_posterior()} is the actual workhorse for \code{r2_bayes()} and returns a +posterior sample of Bayesian R2 values. } \examples{ \dontshow{if (require("rstanarm") && require("rstantools") && require("brms")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} @@ -105,7 +104,7 @@ r2_bayes(model) \dontshow{\}) # examplesIf} } \references{ -Gelman, A., Goodrich, B., Gabry, J., and Vehtari, A. (2018). -R-squared for Bayesian regression models. The American Statistician, 1–6. +Gelman, A., Goodrich, B., Gabry, J., and Vehtari, A. (2018). R-squared for +Bayesian regression models. The American Statistician, 1–6. \doi{10.1080/00031305.2018.1549100} } diff --git a/man/r2_ferrari.Rd b/man/r2_ferrari.Rd new file mode 100644 index 000000000..78635539b --- /dev/null +++ b/man/r2_ferrari.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/r2_ferarri.R +\name{r2_ferrari} +\alias{r2_ferrari} +\alias{r2_ferrari.default} +\title{Ferrari's and Cribari-Neto's R2} +\usage{ +r2_ferrari(model, ...) + +\method{r2_ferrari}{default}(model, correct_bounds = FALSE, ...) +} +\arguments{ +\item{model}{Generalized linear, in particular beta-regression model.} + +\item{...}{Currently not used.} + +\item{correct_bounds}{Logical, whether to correct the bounds of the response +variable to avoid 0 and 1. If \code{TRUE}, the response variable is normalized +and "compressed", i.e. zeros and ones are excluded.} +} +\value{ +A list with the pseudo R2 value. +} +\description{ +Calculates Ferrari's and Cribari-Neto's pseudo R2 (for +beta-regression models). +} +\examples{ +\dontshow{if (require("betareg")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +data("GasolineYield", package = "betareg") +model <- betareg::betareg(yield ~ batch + temp, data = GasolineYield) +r2_ferrari(model) +\dontshow{\}) # examplesIf} +} +\references{ +\itemize{ +\item Ferrari, S., and Cribari-Neto, F. (2004). Beta Regression for Modelling Rates +and Proportions. Journal of Applied Statistics, 31(7), 799–815. +\doi{10.1080/0266476042000214501} +} +} diff --git a/man/r2_nakagawa.Rd b/man/r2_nakagawa.Rd index 7360920aa..84374120b 100644 --- a/man/r2_nakagawa.Rd +++ b/man/r2_nakagawa.Rd @@ -32,23 +32,23 @@ or not. Indicates up to which value the convergence result is accepted. When can't be computed (and thus, the conditional r-squared is \code{NA}), decrease the tolerance-level. See also \code{\link[=check_singularity]{check_singularity()}}.} -\item{ci}{Confidence resp. credible interval level. For \code{icc()} and \code{r2()}, -confidence intervals are based on bootstrapped samples from the ICC resp. -R2 value. See \code{iterations}.} +\item{ci}{Confidence resp. credible interval level. For \code{icc()}, \code{r2()}, and +\code{rmse()}, confidence intervals are based on bootstrapped samples from the +ICC, R2 or RMSE value. See \code{iterations}.} \item{iterations}{Number of bootstrap-replicates when computing confidence -intervals for the ICC or R2.} +intervals for the ICC, R2, RMSE etc.} \item{ci_method}{Character string, indicating the bootstrap-method. Should -be \code{NULL} (default), in which case \code{lme4::bootMer()} is used for -bootstrapped confidence intervals. However, if bootstrapped intervals cannot -be calculated this was, try \code{ci_method = "boot"}, which falls back to -\code{boot::boot()}. This may successfully return bootstrapped confidence intervals, -but bootstrapped samples may not be appropriate for the multilevel structure -of the model. There is also an option \code{ci_method = "analytical"}, which tries -to calculate analytical confidence assuming a chi-squared distribution. -However, these intervals are rather inaccurate and often too narrow. It is -recommended to calculate bootstrapped confidence intervals for mixed models.} +be \code{NULL} (default), in which case \code{lme4::bootMer()} is used for bootstrapped +confidence intervals. However, if bootstrapped intervals cannot be calculated +this way, try \code{ci_method = "boot"}, which falls back to \code{boot::boot()}. This +may successfully return bootstrapped confidence intervals, but bootstrapped +samples may not be appropriate for the multilevel structure of the model. +There is also an option \code{ci_method = "analytical"}, which tries to calculate +analytical confidence assuming a chi-squared distribution. However, these +intervals are rather inaccurate and often too narrow. It is recommended to +calculate bootstrapped confidence intervals for mixed models.} \item{null_model}{Optional, a null model to compute the random effect variances, which is passed to \code{\link[insight:get_variance]{insight::get_variance()}}. Usually only required if @@ -58,7 +58,9 @@ model, you can pass it here, too, to speed up the process.} \item{verbose}{Toggle warnings and messages.} -\item{...}{Arguments passed down to \code{brms::posterior_predict()}.} +\item{...}{Arguments passed down to \code{lme4::bootMer()} or \code{boot::boot()} +for bootstrapped ICC, R2, RMSE etc.; for \code{variance_decomposition()}, +arguments are passed down to \code{brms::posterior_predict()}.} } \value{ A list with the conditional and marginal R2 values. diff --git a/tests/testthat/test-helpers.R b/tests/testthat/test-helpers.R index d1d6a5545..91b09db34 100644 --- a/tests/testthat/test-helpers.R +++ b/tests/testthat/test-helpers.R @@ -1,7 +1,7 @@ skip_on_cran() skip_if_not_installed("withr") withr::with_options( - list(easystats_erros = TRUE), + list(easystats_errors = TRUE), test_that(".safe works with options", { expect_error(performance:::.safe(mean(fd)), regex = "object 'fd' not found") expect_identical(performance:::.safe(mean(fd), 1L), 1L) diff --git a/tests/testthat/test-icc.R b/tests/testthat/test-icc.R index efbb30eb6..e537b8b1f 100644 --- a/tests/testthat/test-icc.R +++ b/tests/testthat/test-icc.R @@ -40,7 +40,7 @@ test_that("icc", { skip_on_cran() skip_if_not_installed("curl") skip_if_offline() - skip_if_not_installed("httr") + skip_if_not_installed("httr2") m2 <- insight::download_model("stanreg_lmerMod_1") expect_equal( icc(m2), @@ -57,7 +57,7 @@ test_that("icc", { skip_on_cran() skip_if_not_installed("curl") skip_if_offline() - skip_if_not_installed("httr") + skip_if_not_installed("httr2") m3 <- insight::download_model("brms_mixed_1") set.seed(123) expect_equal( @@ -71,7 +71,7 @@ test_that("icc", { skip_on_cran() skip_if_not_installed("curl") skip_if_offline() - skip_if_not_installed("httr") + skip_if_not_installed("httr2") m3 <- insight::download_model("brms_mixed_1") set.seed(123) expect_equal( diff --git a/tests/testthat/test-model_performance.bayesian.R b/tests/testthat/test-model_performance.bayesian.R index a15e41a10..da4b6ff18 100644 --- a/tests/testthat/test-model_performance.bayesian.R +++ b/tests/testthat/test-model_performance.bayesian.R @@ -2,9 +2,9 @@ test_that("model_performance.stanreg", { skip_on_cran() skip_if_not_installed("curl") skip_if_offline() - skip_if_not_installed("httr") + skip_if_not_installed("httr2") set.seed(333) - model <- tryCatch(insight::download_model("stanreg_lm_1"), error = function(e) NULL) + model <- insight::download_model("stanreg_lm_1") skip_if(is.null(model)) perf <- model_performance(model) @@ -12,7 +12,7 @@ test_that("model_performance.stanreg", { expect_equal(perf$R2_adjusted, 0.7162912, tolerance = 1e-3) expect_equal(perf$ELPD, -83.49838, tolerance = 1e-3) - model <- tryCatch(insight::download_model("stanreg_lm_2"), error = function(e) NULL) + model <- insight::download_model("stanreg_lm_2") skip_if(is.null(model)) perf <- model_performance(model) @@ -20,7 +20,7 @@ test_that("model_performance.stanreg", { expect_equal(perf$R2_adjusted, 0.7979026, tolerance = 1e-3) expect_equal(perf$ELPD, -78.38735, tolerance = 1e-3) - model <- tryCatch(insight::download_model("stanreg_lmerMod_1"), error = function(e) NULL) + model <- insight::download_model("stanreg_lmerMod_1") skip_if(is.null(model)) perf <- model_performance(model) @@ -34,10 +34,10 @@ test_that("model_performance.brmsfit", { skip_on_cran() skip_if_not_installed("curl") skip_if_offline() - skip_if_not_installed("httr") + skip_if_not_installed("httr2") set.seed(333) - model <- tryCatch(insight::download_model("brms_1"), error = function(e) NULL) + model <- insight::download_model("brms_1") skip_if(is.null(model)) expect_message({ perf <- model_performance(model) @@ -50,7 +50,7 @@ test_that("model_performance.brmsfit", { "RMSE", "Sigma" )) - model <- tryCatch(insight::download_model("brms_mixed_4"), error = function(e) NULL) + model <- insight::download_model("brms_mixed_4") skip_if(is.null(model)) expect_message({ perf <- model_performance(model) @@ -63,7 +63,7 @@ test_that("model_performance.brmsfit", { "R2_adjusted", "R2_adjusted_marginal", "ICC", "RMSE", "Sigma" )) - model <- tryCatch(insight::download_model("brms_ordinal_1"), error = function(e) NULL) + model <- insight::download_model("brms_ordinal_1") skip_if(is.null(model)) perf <- suppressWarnings(model_performance(model)) expect_equal(perf$R2, 0.8760015, tolerance = 1e-3) diff --git a/tests/testthat/test-model_performance.merMod.R b/tests/testthat/test-model_performance.merMod.R index 0c70da3c3..82360de46 100644 --- a/tests/testthat/test-model_performance.merMod.R +++ b/tests/testthat/test-model_performance.merMod.R @@ -2,7 +2,7 @@ test_that("model_performance.merMod", { skip_on_cran() skip_if_not_installed("curl") skip_if_offline() - skip_if_not_installed("httr") + skip_if_not_installed("httr2") model <- insight::download_model("lmerMod_1") expect_equal(model_performance(model, estimator = "ML")$AIC, AIC(logLik(model, REML = FALSE)), tolerance = 0.01) diff --git a/tests/testthat/test-r2_ferrari.R b/tests/testthat/test-r2_ferrari.R new file mode 100644 index 000000000..35aab72c1 --- /dev/null +++ b/tests/testthat/test-r2_ferrari.R @@ -0,0 +1,37 @@ +test_that("r2_ferarri", { + skip_if_not_installed("betareg") + data("GasolineYield", package = "betareg") + model <- betareg::betareg(yield ~ batch + temp, data = GasolineYield) + out <- r2_ferrari(model) + expect_equal(out$R2, summary(model)$pseudo.r.squared, tolerance = 1e-3, ignore_attr = TRUE) +}) + + +test_that("r2_ferarri", { + skip_if_not_installed("betareg") + skip_if_not_installed("glmmTMB") + data("GasolineYield", package = "betareg") + model <- glmmTMB::glmmTMB( + yield ~ batch + temp, + data = GasolineYield, + family = glmmTMB::beta_family() + ) + out <- r2_ferrari(model) + expect_equal(out$R2, c(`Ferrari's R2` = 0.96173), tolerance = 1e-3, ignore_attr = TRUE) +}) + + +test_that("r2_ferarri", { + skip_if_not_installed("betareg") + skip_if_not_installed("glmmTMB") + skip_if_not_installed("lme4") + data(sleepstudy, package = "lme4") + sleepstudy$y <- datawizard::normalize(sleepstudy$Reaction) + m <- glmmTMB::glmmTMB( + y ~ Days, + data = sleepstudy, + family = glmmTMB::ordbeta() + ) + out <- r2(m) + expect_equal(out$R2, c(`Ferrari's R2` = 0.2354701), tolerance = 1e-3, ignore_attr = TRUE) +}) diff --git a/tests/testthat/test-r2_nakagawa.R b/tests/testthat/test-r2_nakagawa.R index 541109944..12ea23f00 100644 --- a/tests/testthat/test-r2_nakagawa.R +++ b/tests/testthat/test-r2_nakagawa.R @@ -1,3 +1,4 @@ +skip_on_os("mac") skip_if_not_installed("lme4") model <- lme4::lmer(Sepal.Length ~ Petal.Length + (1 | Species), data = iris) diff --git a/tests/testthat/test-rmse.R b/tests/testthat/test-rmse.R index 151aabb34..e19bd49b3 100644 --- a/tests/testthat/test-rmse.R +++ b/tests/testthat/test-rmse.R @@ -16,3 +16,28 @@ test_that("rmse", { ) expect_equal(cp$RMSE, c(47.4489, 47.39881, 47.38701, 47.41375, 47.39979, 47.38705), tolerance = 1e-3) }) + +test_that("rmse, ci", { + data(mtcars) + model <- lm(mpg ~ hp + gear, data = mtcars) + # analytical + out <- performance_rmse(model, ci = 0.95, ci_method = "analytical") + expect_equal(out$CI_low, 2.30486, tolerance = 1e-4) + expect_equal(out$CI_high, 3.79093, tolerance = 1e-4) + + # bootstrapped + set.seed(123) + out <- performance_rmse(model, ci = 0.95, ci_method = "boot") + expect_equal(out$CI_low, 1.9494, tolerance = 1e-3) + expect_equal(out$CI_high, 3.38406, tolerance = 1e-3) + + # bootstrapped, mixed models + skip_on_cran() + skip_if_not_installed("lme4") + data(sleepstudy, package = "lme4") + m <- lme4::lmer(Reaction ~ Days + (1 | Subject), data = sleepstudy) + set.seed(123) + out <- performance_rmse(m, ci = 0.95, iterations = 100) + expect_equal(out$CI_low, 26.26066, tolerance = 1e-3) + expect_equal(out$CI_high, 32.5642, tolerance = 1e-3) +})