diff --git a/DESCRIPTION b/DESCRIPTION index 89c9a9d57..b1ce379bf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.4.6 +Version: 0.12.4.7 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 9eb5ded26..1bddce2ce 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,11 @@ * Deprecated arguments and alias-function-names have been removed. +* Argument names in `check_model()` that refer to plot-aesthetics (like + `dot_size`) are now harmonized across *easystats* packages, meaning that + these have been renamed. They now follow the pattern `aesthetic_type`, e.g. + `size_dot` (instead of `dot_size`). + ## Changes * Increased accuracy for `check_convergence()` for *glmmTMB* models. diff --git a/R/check_model.R b/R/check_model.R index baa224feb..11562a351 100644 --- a/R/check_model.R +++ b/R/check_model.R @@ -10,8 +10,8 @@ #' get hints about possible problems. #' #' @param x A model object. -#' @param dot_size,line_size Size of line and dot-geoms. -#' @param base_size,title_size,axis_title_size Base font size for axis and plot titles. +#' @param size_dot,size_line Size of line and dot-geoms. +#' @param base_size,size_title,size_axis_title Base font size for axis and plot titles. #' @param panel Logical, if `TRUE`, plots are arranged as panels; else, #' single plots for each diagnostic are returned. #' @param check Character vector, indicating which checks for should be performed @@ -23,7 +23,7 @@ #' `"linearity"`, and checks for non-constant variance, i.e. for #' heteroscedasticity, as well as the linear relationship. By default, all #' possible checks are performed and plotted. -#' @param alpha,dot_alpha The alpha level of the confidence bands and dot-geoms. +#' @param alpha,alpha_dot The alpha level of the confidence bands and dot-geoms. #' Scalar from 0 to 1. #' @param colors Character vector with color codes (hex-format). Must be of #' length 3. First color is usually used for reference lines, second color @@ -196,13 +196,13 @@ check_model.default <- function(x, type = "density", residual_type = NULL, show_dots = NULL, - dot_size = 2, - line_size = 0.8, - title_size = 12, - axis_title_size = base_size, + size_dot = 2, + size_line = 0.8, + size_title = 12, + size_axis_title = base_size, base_size = 10, alpha = 0.2, - dot_alpha = 0.8, + alpha_dot = 0.8, colors = c("#3aaf85", "#1b6ca8", "#cd201f"), theme = "see::theme_lucid", verbose = FALSE, @@ -277,14 +277,14 @@ check_model.default <- function(x, } attr(assumptions_data, "panel") <- panel - attr(assumptions_data, "dot_size") <- dot_size - attr(assumptions_data, "line_size") <- line_size + attr(assumptions_data, "dot_size") <- size_dot + attr(assumptions_data, "line_size") <- size_line attr(assumptions_data, "base_size") <- base_size - attr(assumptions_data, "axis_title_size") <- axis_title_size - attr(assumptions_data, "title_size") <- title_size + attr(assumptions_data, "axis_title_size") <- size_axis_title + attr(assumptions_data, "title_size") <- size_title attr(assumptions_data, "check") <- check attr(assumptions_data, "alpha") <- alpha - attr(assumptions_data, "dot_alpha") <- dot_alpha + attr(assumptions_data, "dot_alpha") <- alpha_dot attr(assumptions_data, "show_dots") <- isTRUE(show_dots) attr(assumptions_data, "detrend") <- detrend attr(assumptions_data, "colors") <- colors @@ -328,28 +328,28 @@ check_model.stanreg <- function(x, type = "density", residual_type = NULL, show_dots = NULL, - dot_size = 2, - line_size = 0.8, - title_size = 12, - axis_title_size = base_size, + size_dot = 2, + size_line = 0.8, + size_title = 12, + size_axis_title = base_size, base_size = 10, alpha = 0.2, - dot_alpha = 0.8, + alpha_dot = 0.8, colors = c("#3aaf85", "#1b6ca8", "#cd201f"), theme = "see::theme_lucid", verbose = FALSE, ...) { check_model(bayestestR::bayesian_as_frequentist(x), - dot_size = dot_size, - line_size = line_size, + size_dot = size_dot, + size_line = size_line, panel = panel, check = check, alpha = alpha, - dot_alpha = dot_alpha, + alpha_dot = alpha_dot, colors = colors, theme = theme, base_size = base_size, - axis_title_size = axis_title_size, + size_axis_title = size_axis_title, detrend = detrend, show_dots = show_dots, bandwidth = bandwidth, @@ -374,26 +374,26 @@ check_model.model_fit <- function(x, type = "density", residual_type = NULL, show_dots = NULL, - dot_size = 2, - line_size = 0.8, - title_size = 12, - axis_title_size = base_size, + size_dot = 2, + size_line = 0.8, + size_title = 12, + size_axis_title = base_size, base_size = 10, alpha = 0.2, - dot_alpha = 0.8, + alpha_dot = 0.8, colors = c("#3aaf85", "#1b6ca8", "#cd201f"), theme = "see::theme_lucid", verbose = FALSE, ...) { check_model( x$fit, - dot_size = dot_size, - line_size = line_size, + size_dot = size_dot, + size_line = size_line, panel = panel, check = check, alpha = alpha, - axis_title_size = axis_title_size, - dot_alpha = dot_alpha, + size_axis_title = size_axis_title, + alpha_dot = alpha_dot, colors = colors, theme = theme, base_size = base_size, @@ -417,26 +417,26 @@ check_model.performance_simres <- function(x, type = "density", residual_type = NULL, show_dots = NULL, - dot_size = 2, - line_size = 0.8, - title_size = 12, - axis_title_size = base_size, + size_dot = 2, + size_line = 0.8, + size_title = 12, + size_axis_title = base_size, base_size = 10, alpha = 0.2, - dot_alpha = 0.8, + alpha_dot = 0.8, colors = c("#3aaf85", "#1b6ca8", "#cd201f"), theme = "see::theme_lucid", verbose = FALSE, ...) { check_model( x$fittedModel, - dot_size = dot_size, - line_size = line_size, + size_dot = size_dot, + size_line = size_line, panel = panel, check = check, alpha = alpha, - dot_alpha = dot_alpha, - axis_title_size = axis_title_size, + alpha_dot = alpha_dot, + size_axis_title = size_axis_title, colors = colors, theme = theme, base_size = base_size, @@ -462,35 +462,35 @@ check_model.DHARMa <- check_model.performance_simres # multicollinearity -------------- if (any(c("all", "vif") %in% check)) { - dat$VIF <- .diag_vif(model, verbose = verbose) + dat$VIF <- .model_diagnostic_vif(model, verbose = verbose) } # Q-Q plot (normality/uniformity of residuals) -------------- if (any(c("all", "qq") %in% check)) { dat$QQ <- switch(residual_type, simulated = .safe(simulate_residuals(model, ...)), - .diag_qq(model, model_info = model_info, verbose = verbose) + .model_diagnostic_qq(model, model_info = model_info, verbose = verbose) ) } # Random Effects Q-Q plot (normality of BLUPs) -------------- if (any(c("all", "reqq") %in% check)) { - dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose) + dat$REQQ <- .model_diagnostic_ranef_qq(model, level = 0.95, model_info = model_info, verbose = verbose) } # normal-curve plot (normality of residuals) -------------- if (any(c("all", "normality") %in% check)) { - dat$NORM <- .diag_norm(model, verbose = verbose) + dat$NORM <- .model_diagnostic_normality(model, verbose = verbose) } # non-constant variance (heteroskedasticity, liniearity) -------------- if (any(c("all", "ncv", "linearity") %in% check)) { - dat$NCV <- .diag_ncv(model, verbose = verbose) + dat$NCV <- .model_diagnostic_ncv(model, verbose = verbose) } # homogeneity of variance -------------- if (any(c("all", "homogeneity") %in% check)) { - dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose) + dat$HOMOGENEITY <- .model_diagnostic_homogeneity(model, verbose = verbose) } # outliers -------------- @@ -501,7 +501,7 @@ check_model.DHARMa <- check_model.performance_simres } else { threshold <- attributes(dat$OUTLIERS)$threshold$cook } - dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold) + dat$INFLUENTIAL <- .safe(.model_diagnostic_outlier(model, threshold = threshold)) } # posterior predictive checks -------------- @@ -523,25 +523,25 @@ check_model.DHARMa <- check_model.performance_simres # multicollinearity -------------- if (any(c("all", "vif") %in% check)) { - dat$VIF <- .diag_vif(model, verbose = verbose) + dat$VIF <- .model_diagnostic_vif(model, verbose = verbose) } # Q-Q plot (normality/uniformity of residuals) -------------- if (any(c("all", "qq") %in% check)) { dat$QQ <- switch(residual_type, simulated = .safe(simulate_residuals(model, ...)), - .diag_qq(model, model_info = model_info, verbose = verbose) + .model_diagnostic_qq(model, model_info = model_info, verbose = verbose) ) } # homogeneity of variance -------------- if (any(c("all", "homogeneity") %in% check)) { - dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose) + dat$HOMOGENEITY <- .model_diagnostic_homogeneity(model, verbose = verbose) } # Random Effects Q-Q plot (normality of BLUPs) -------------- if (any(c("all", "reqq") %in% check)) { - dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose) + dat$REQQ <- .model_diagnostic_ranef_qq(model, level = 0.95, model_info = model_info, verbose = verbose) } # outliers -------------- @@ -552,7 +552,7 @@ check_model.DHARMa <- check_model.performance_simres } else { threshold <- attributes(dat$OUTLIERS)$threshold$cook } - dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold) + dat$INFLUENTIAL <- .safe(.model_diagnostic_outlier(model, threshold = threshold)) } # posterior predictive checks -------------- @@ -567,7 +567,7 @@ check_model.DHARMa <- check_model.performance_simres # misspecified dispersion and zero-inflation -------------- if (isTRUE(model_info$is_count) && any(c("all", "overdispersion") %in% check)) { - dat$OVERDISPERSION <- .diag_overdispersion(model) + dat$OVERDISPERSION <- .model_diagnostic_overdispersion(model) } dat <- insight::compact_list(dat) diff --git a/R/check_model_diagnostics.R b/R/check_model_diagnostics.R index 8897c685e..9595ed968 100644 --- a/R/check_model_diagnostics.R +++ b/R/check_model_diagnostics.R @@ -1,6 +1,6 @@ # prepare data for VIF plot ---------------------------------- -.diag_vif <- function(model, verbose = TRUE) { +.model_diagnostic_vif <- function(model, verbose = TRUE) { out <- check_collinearity(model, verbose = verbose) dat <- insight::compact_list(out) if (is.null(dat)) { @@ -35,7 +35,7 @@ # prepare data for QQ plot ---------------------------------- -.diag_qq <- function(model, model_info = NULL, verbose = TRUE) { +.model_diagnostic_qq <- function(model, model_info = NULL, verbose = TRUE) { if (inherits(model, c("lme", "lmerMod", "merMod", "gam"))) { res_ <- stats::residuals(model) } else if (inherits(model, "geeglm")) { @@ -98,7 +98,7 @@ # prepare data for random effects QQ plot ---------------------------------- -.diag_reqq <- function(model, level = 0.95, model_info = NULL, verbose = TRUE) { +.model_diagnostic_ranef_qq <- function(model, level = 0.95, model_info = NULL, verbose = TRUE) { # check if we have mixed model if (is.null(model_info) || !model_info$is_mixed) { return(NULL) @@ -161,7 +161,7 @@ # prepare data for normality of residuals plot ---------------------------------- -.diag_norm <- function(model, verbose = TRUE) { +.model_diagnostic_normality <- function(model, verbose = TRUE) { r <- try(as.numeric(stats::residuals(model)), silent = TRUE) if (inherits(r, "try-error")) { @@ -181,7 +181,7 @@ # prepare data for influential obs plot ---------------------------------- -.diag_influential_obs <- function(model, threshold = NULL) { +.model_diagnostic_outlier <- function(model, threshold = NULL) { s <- summary(model) if (inherits(model, "lm", which = TRUE) == 1) { @@ -220,7 +220,7 @@ # prepare data for non-constant variance plot ---------------------------------- -.diag_ncv <- function(model, verbose = TRUE) { +.model_diagnostic_ncv <- function(model, verbose = TRUE) { ncv <- tryCatch( data.frame( x = as.numeric(stats::fitted(model)), @@ -248,7 +248,7 @@ # prepare data for homogeneity of variance plot ---------------------------------- -.diag_homogeneity <- function(model, verbose = TRUE) { +.model_diagnostic_homogeneity <- function(model, verbose = TRUE) { faminfo <- insight::model_info(model) r <- tryCatch( if (inherits(model, "merMod")) { @@ -367,7 +367,7 @@ -.diag_overdispersion <- function(model, ...) { +.model_diagnostic_overdispersion <- function(model, ...) { faminfo <- insight::model_info(model) # data for poisson models diff --git a/R/check_normality.R b/R/check_normality.R index 7ea6970ec..ffd4ba824 100644 --- a/R/check_normality.R +++ b/R/check_normality.R @@ -231,7 +231,7 @@ check_normality.merMod <- function(x, effects = c("fixed", "random"), ...) { p.val <- c(p.val, .check_normality(re[[i]][[j]], x, "random effects")) } } - attr(p.val, "re_qq") <- .diag_reqq(x, level = 0.95, model_info = info) + attr(p.val, "re_qq") <- .model_diagnostic_ranef_qq(x, level = 0.95, model_info = info) attr(p.val, "type") <- "random effects" attr(p.val, "re_groups") <- re_groups } diff --git a/R/check_outliers.R b/R/check_outliers.R index 071bd39a3..7729f9931 100644 --- a/R/check_outliers.R +++ b/R/check_outliers.R @@ -588,7 +588,7 @@ check_outliers.default <- function(x, attr(outlier, "threshold") <- thresholds attr(outlier, "method") <- method attr(outlier, "text_size") <- 3 - attr(outlier, "influential_obs") <- .influential_obs(x, threshold = unlist(thresholds)) + attr(outlier, "influential_obs") <- .safe(.model_diagnostic_outlier(x, threshold = unlist(thresholds))) # nolint attr(outlier, "variables") <- "(Whole model)" attr(outlier, "raw_data") <- my_data attr(outlier, "outlier_var") <- outlier_var @@ -2036,14 +2036,6 @@ check_outliers.DHARMa <- check_outliers.performance_simres -# influential observations data -------- - -.influential_obs <- function(x, threshold = NULL) { - .safe(.diag_influential_obs(x, threshold = threshold)) -} - - - # Non-supported model classes --------------------------------------- #' @export diff --git a/R/check_overdispersion.R b/R/check_overdispersion.R index 0d877b0ad..fc8e25e32 100644 --- a/R/check_overdispersion.R +++ b/R/check_overdispersion.R @@ -101,7 +101,7 @@ plot.check_overdisp <- function(x, ...) { } } if (!is.null(model)) { - x <- .diag_overdispersion(model) + x <- .model_diagnostic_overdispersion(model) class(x) <- c("see_check_overdisp", "data.frame") attr(x, "colors") <- list(...)$colors attr(x, "line_size") <- list(...)$size_line diff --git a/man/check_model.Rd b/man/check_model.Rd index c07add5f2..5d06a7192 100644 --- a/man/check_model.Rd +++ b/man/check_model.Rd @@ -16,13 +16,13 @@ check_model(x, ...) type = "density", residual_type = NULL, show_dots = NULL, - dot_size = 2, - line_size = 0.8, - title_size = 12, - axis_title_size = base_size, + size_dot = 2, + size_line = 0.8, + size_title = 12, + size_axis_title = base_size, base_size = 10, alpha = 0.2, - dot_alpha = 0.8, + alpha_dot = 0.8, colors = c("#3aaf85", "#1b6ca8", "#cd201f"), theme = "see::theme_lucid", verbose = FALSE, @@ -79,11 +79,11 @@ time-consuming. By default, \code{show_dots = NULL}. In this case \code{check_mo tries to guess whether performance will be poor due to a very large model and thus automatically shows or hides dots.} -\item{dot_size, line_size}{Size of line and dot-geoms.} +\item{size_dot, size_line}{Size of line and dot-geoms.} -\item{base_size, title_size, axis_title_size}{Base font size for axis and plot titles.} +\item{base_size, size_title, size_axis_title}{Base font size for axis and plot titles.} -\item{alpha, dot_alpha}{The alpha level of the confidence bands and dot-geoms. +\item{alpha, alpha_dot}{The alpha level of the confidence bands and dot-geoms. Scalar from 0 to 1.} \item{colors}{Character vector with color codes (hex-format). Must be of diff --git a/vignettes/check_model_practical.Rmd b/vignettes/check_model_practical.Rmd index 53ca7ab58..5b6673910 100644 --- a/vignettes/check_model_practical.Rmd +++ b/vignettes/check_model_practical.Rmd @@ -1,6 +1,6 @@ --- title: "How to arrive at the best model fit" -output: +output: rmarkdown::html_vignette: toc: true tags: [r, performance] @@ -8,7 +8,7 @@ vignette: > \usepackage[utf8]{inputenc} %\VignetteIndexEntry{How to arrive at the best model fit} %\VignetteEngine{knitr::rmarkdown} -editor_options: +editor_options: chunk_output_type: console --- @@ -82,7 +82,7 @@ In *performance*, we can conduct a comprehensive visual inspection of our model For now, we want to focus on the _posterior predictive checks_, _dispersion and zero-inflation_ as well as the Q-Q plot (_uniformity of residuals_). ```{r fig.height=12, fig.width=10} -check_model(model1, dot_size = 1.2) +check_model(model1, size_dot = 1.2) ``` Note that unlike `plot()`, which is a base R function to create diagnostic plots, `check_model()` relies on *simulated residuals* for the Q-Q plot, which is more accurate for non-Gaussian models. See [this vignette](https://easystats.github.io/performance/articles/simulate_residuals.html) and the documentation of `simulate_residuals()` for further details. @@ -108,7 +108,7 @@ model2 <- glmmTMB::glmmTMB( family = poisson, data = glmmTMB::Salamanders ) -check_model(model2, dot_size = 1.2) +check_model(model2, size_dot = 1.2) ``` Looking at the above plots, the zero-inflation seems to be addressed properly (see especially _posterior predictive checks_ and _uniformity of residuals_, the Q-Q plot). However, the overdispersion still could be present. We can check for these problems using `check_overdispersion()` and `check_zeroinflation()` again. @@ -132,7 +132,7 @@ model3 <- glmmTMB::glmmTMB( family = glmmTMB::nbinom1, data = glmmTMB::Salamanders ) -check_model(model3, dot_size = 1.2) +check_model(model3, size_dot = 1.2) ``` Now we see that the plot showing _misspecified dispersion and zero-inflation_ suggests that the overdispersion is better addressed than before. Let us check again: @@ -195,7 +195,7 @@ model4 <- glmmTMB::glmmTMB( family = glmmTMB::nbinom2, data = glmmTMB::Salamanders ) -check_model(model4, dot_size = 1.2) +check_model(model4, size_dot = 1.2) check_overdispersion(model4) diff --git a/vignettes/simulate_residuals.Rmd b/vignettes/simulate_residuals.Rmd index 89f826ba6..8842205a8 100644 --- a/vignettes/simulate_residuals.Rmd +++ b/vignettes/simulate_residuals.Rmd @@ -1,6 +1,6 @@ --- title: "Checking simulated residuals" -output: +output: rmarkdown::html_vignette: toc: true tags: [r, performance] @@ -8,7 +8,7 @@ vignette: > \usepackage[utf8]{inputenc} %\VignetteIndexEntry{Checking simulated residuals} %\VignetteEngine{knitr::rmarkdown} -editor_options: +editor_options: chunk_output_type: console --- @@ -93,10 +93,10 @@ check_outliers(simulated_residuals) The above three functions internally call `simulate_residuals()` for more complex models automatically, so you don't need to call `simulate_residuals()` yourself. Simulated residuals are usually more reliable than the standard residuals, especially for complex models. -Finally, you can even perform a visual check for the entire model, either by passing the model object directly, or the object returned from `simulate_residuals()`. +Finally, you can even perform a visual check for the entire model, either by passing the model object directly, or the object returned from `simulate_residuals()`. ```{r fig.height=12, fig.width=10} -check_model(simulated_residuals, dot_size = 1.5) +check_model(simulated_residuals, size_dot = 1.5) ``` The `check_model()` function is the main reason we don't want to prematurely extract the residuals in `simulate_residuals()`, because if we do then the simulated residual won't contain the model fit (`fittedModel` in the output below), so we won't be able to do all of the checks we would want to do using the model (e.g., posterior predictive checks).