diff --git a/DESCRIPTION b/DESCRIPTION index d72f3909f..cf4108aa7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.10.6.1 +Version: 0.10.6.2 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index bc7687113..e3eedf313 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,12 @@ +# performance 0.10.7 + +## Changes to functions + +* `binned_residuals()` - like `check_model()` - gains a `show_dots` argument to + show or hide data points that lie inside error bounds. This is particular + useful for models with many observations, where generating the plot would be + very slow. + # performance 0.10.6 ## General diff --git a/R/binned_residuals.R b/R/binned_residuals.R index 9df75e74e..eb45aee82 100644 --- a/R/binned_residuals.R +++ b/R/binned_residuals.R @@ -11,6 +11,11 @@ #' @param n_bins Numeric, the number of bins to divide the data. If #' `n_bins = NULL`, the square root of the number of observations is #' taken. +#' @param show_dots Logical, if `TRUE`, will show data points in the plot. Set +#' to `FALSE` for models with many observations, if generating the plot is too +#' time-consuming. By default, `show_dots = NULL`. In this case `binned_residuals()` +#' tries to guess whether performance will be poor due to a very large model +#' and thus automatically shows or hides dots. #' @param ... Currently not used. #' #' @return A data frame representing the data that is mapped in the accompanying @@ -57,7 +62,7 @@ #' } #' #' @export -binned_residuals <- function(model, term = NULL, n_bins = NULL, ...) { +binned_residuals <- function(model, term = NULL, n_bins = NULL, show_dots = NULL, ...) { fv <- stats::fitted(model) mf <- insight::get_data(model, verbose = FALSE) @@ -67,8 +72,14 @@ binned_residuals <- function(model, term = NULL, n_bins = NULL, ...) { pred <- mf[[term]] } - y0 <- .recode_to_zero(insight::get_response(model, verbose = FALSE)) - y <- y0 - fv + # set default for show_dots, based on "model size" + if (is.null(show_dots)) { + n <- .safe(insight::n_obs(model)) + show_dots <- is.null(n) || n <= 1e5 + } + + y <- .recode_to_zero(insight::get_response(model, verbose = FALSE)) - fv +>>>>>>> upstream/main if (is.null(n_bins)) n_bins <- round(sqrt(length(pred))) @@ -114,6 +125,7 @@ binned_residuals <- function(model, term = NULL, n_bins = NULL, ...) { attr(d, "resid_ok") <- resid_ok attr(d, "resp_var") <- insight::find_response(model) attr(d, "term") <- term + attr(d, "show_dots") <- show_dots d } diff --git a/R/check_outliers.R b/R/check_outliers.R index 9f147cb83..f7b304c5e 100644 --- a/R/check_outliers.R +++ b/R/check_outliers.R @@ -261,6 +261,10 @@ #' statistical models. Journal of Open Source Software, 6(60), 3139. #' \doi{10.21105/joss.03139} #' +#' - Thériault, R., Ben-Shachar, M. S., Patil, I., Lüdecke, D., Wiernik, B. M., +#' and Makowski, D. (2023). Check your outliers! An introduction to identifying +#' statistical outliers in R with easystats. https://doi.org/10.31234/osf.io/bu6nt +#' #' - Rousseeuw, P. J., and Van Zomeren, B. C. (1990). Unmasking multivariate #' outliers and leverage points. Journal of the American Statistical #' association, 85(411), 633-639. diff --git a/man/binned_residuals.Rd b/man/binned_residuals.Rd index ff6fb5784..b8aee666e 100644 --- a/man/binned_residuals.Rd +++ b/man/binned_residuals.Rd @@ -4,7 +4,7 @@ \alias{binned_residuals} \title{Binned residuals for binomial logistic regression} \usage{ -binned_residuals(model, term = NULL, n_bins = NULL, ...) +binned_residuals(model, term = NULL, n_bins = NULL, show_dots = NULL, ...) } \arguments{ \item{model}{A \code{glm}-object with \emph{binomial}-family.} @@ -18,6 +18,12 @@ plotted.} \code{n_bins = NULL}, the square root of the number of observations is taken.} +\item{show_dots}{Logical, if \code{TRUE}, will show data points in the plot. Set +to \code{FALSE} for models with many observations, if generating the plot is too +time-consuming. By default, \code{show_dots = NULL}. In this case \code{binned_residuals()} +tries to guess whether performance will be poor due to a very large model +and thus automatically shows or hides dots.} + \item{...}{Currently not used.} } \value{ diff --git a/man/check_outliers.Rd b/man/check_outliers.Rd index c19d4ecb0..f22a51f6a 100644 --- a/man/check_outliers.Rd +++ b/man/check_outliers.Rd @@ -343,6 +343,9 @@ IEEE. (2021). performance: An R package for assessment, comparison and testing of statistical models. Journal of Open Source Software, 6(60), 3139. \doi{10.21105/joss.03139} +\item Thériault, R., Ben-Shachar, M. S., Patil, I., Lüdecke, D., Wiernik, B. M., +and Makowski, D. (2023). Check your outliers! An introduction to identifying +statistical outliers in R with easystats. https://doi.org/10.31234/osf.io/bu6nt \item Rousseeuw, P. J., and Van Zomeren, B. C. (1990). Unmasking multivariate outliers and leverage points. Journal of the American Statistical association, 85(411), 633-639.