diff --git a/DESCRIPTION b/DESCRIPTION index 30285f897..9391ecf34 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.10.8.10 +Version: 0.10.8.11 Authors@R: c(person(given = "Daniel", family = "Lüdecke", @@ -144,7 +144,7 @@ Suggests: withr (>= 3.0.0) Encoding: UTF-8 Language: en-US -RoxygenNote: 7.2.3.9000 +RoxygenNote: 7.3.1 Roxygen: list(markdown = TRUE) Config/testthat/edition: 3 Config/testthat/parallel: true diff --git a/NEWS.md b/NEWS.md index 5475776cc..0cc49b785 100644 --- a/NEWS.md +++ b/NEWS.md @@ -23,6 +23,9 @@ * `performance_score()` should no longer fail for models where scoring rules can't be calculated. Instead, an informative message is returned. +* `check_outliers()` now properly accept the `percentage_central` argument when + using the `"mcd"` method. + # performance 0.10.8 ## Changes diff --git a/R/check_heterogeneity_bias.R b/R/check_heterogeneity_bias.R index b87aa9962..d9bb337f9 100644 --- a/R/check_heterogeneity_bias.R +++ b/R/check_heterogeneity_bias.R @@ -31,9 +31,9 @@ check_heterogeneity_bias <- function(x, select = NULL, group = NULL) { if (insight::is_model(x)) { group <- insight::find_random(x, split_nested = TRUE, flatten = TRUE) if (is.null(group)) { - insight::format_error("Model is no mixed model. Please provide a mixed model, or a data frame and arguments `select` and `group`.") + insight::format_error("Model is no mixed model. Please provide a mixed model, or a data frame and arguments `select` and `group`.") # nolint } - data <- insight::get_data(x, source = "mf", verbose = FALSE) + my_data <- insight::get_data(x, source = "mf", verbose = FALSE) select <- insight::find_predictors(x, effects = "fixed", component = "conditional", flatten = TRUE) } else { if (inherits(select, "formula")) { @@ -42,15 +42,15 @@ check_heterogeneity_bias <- function(x, select = NULL, group = NULL) { if (inherits(group, "formula")) { group <- all.vars(group) } - data <- x + my_data <- x } - unique_groups <- .n_unique(data[[group]]) + unique_groups <- .n_unique(my_data[[group]]) combinations <- expand.grid(select, group) result <- Map(function(predictor, id) { # demean predictor - d <- datawizard::demean(data, select = predictor, group = id, verbose = FALSE) + d <- datawizard::demean(my_data, select = predictor, group = id, verbose = FALSE) # get new names within_name <- paste0(predictor, "_within") diff --git a/R/check_model.R b/R/check_model.R index e261704a5..2a8672823 100644 --- a/R/check_model.R +++ b/R/check_model.R @@ -184,14 +184,12 @@ check_model.default <- function(x, minfo <- insight::model_info(x, verbose = FALSE) ca <- tryCatch( - { - if (minfo$is_bayesian) { - suppressWarnings(.check_assumptions_stan(x, ...)) - } else if (minfo$is_linear) { - suppressWarnings(.check_assumptions_linear(x, minfo, verbose, ...)) - } else { - suppressWarnings(.check_assumptions_glm(x, minfo, verbose, ...)) - } + if (minfo$is_bayesian) { + suppressWarnings(.check_assumptions_stan(x, ...)) + } else if (minfo$is_linear) { + suppressWarnings(.check_assumptions_linear(x, minfo, verbose, ...)) + } else { + suppressWarnings(.check_assumptions_glm(x, minfo, verbose, ...)) }, error = function(e) { NULL diff --git a/R/check_model_diagnostics.R b/R/check_model_diagnostics.R index 6e398efa8..754616ee6 100644 --- a/R/check_model_diagnostics.R +++ b/R/check_model_diagnostics.R @@ -94,29 +94,25 @@ insight::check_if_installed("lme4") tryCatch( - { - if (inherits(model, "glmmTMB")) { - var_attr <- "condVar" - re <- .collapse_cond(lme4::ranef(model, condVar = TRUE)) - } else { - var_attr <- "postVar" - re <- lme4::ranef(model, condVar = TRUE) - } + if (inherits(model, "glmmTMB")) { + var_attr <- "condVar" + re <- .collapse_cond(lme4::ranef(model, condVar = TRUE)) + } else { + var_attr <- "postVar" + re <- lme4::ranef(model, condVar = TRUE) }, error = function(e) { - return(NULL) + NULL } ) se <- tryCatch( - { - suppressWarnings(lapply(re, function(.x) { - pv <- attr(.x, var_attr, exact = TRUE) - cols <- seq_len(dim(pv)[1]) - unlist(lapply(cols, function(.y) sqrt(pv[.y, .y, ]))) - })) - }, + suppressWarnings(lapply(re, function(.x) { + pv <- attr(.x, var_attr, exact = TRUE) + cols <- seq_len(dim(pv)[1]) + unlist(lapply(cols, function(.y) sqrt(pv[.y, .y, ]))) + })), error = function(e) { NULL } @@ -186,15 +182,15 @@ n_params <- tryCatch(model$rank, error = function(e) insight::n_parameters(model)) infl <- stats::influence(model, do.coef = FALSE) - resid <- as.numeric(insight::get_residuals(model)) + model_resid <- as.numeric(insight::get_residuals(model)) - std_resid <- tryCatch(stats::rstandard(model, infl), error = function(e) resid) + std_resid <- tryCatch(stats::rstandard(model, infl), error = function(e) model_resid) plot_data <- data.frame( Hat = infl$hat, Cooks_Distance = stats::cooks.distance(model, infl), Fitted = insight::get_predicted(model, ci = NULL), - Residuals = resid, + Residuals = model_resid, Std_Residuals = std_resid, stringsAsFactors = FALSE ) @@ -213,12 +209,10 @@ .diag_ncv <- function(model, verbose = TRUE) { ncv <- tryCatch( - { - data.frame( - x = as.numeric(stats::fitted(model)), - y = as.numeric(stats::residuals(model)) - ) - }, + data.frame( + x = as.numeric(stats::fitted(model)), + y = as.numeric(stats::residuals(model)) + ), error = function(e) { NULL } @@ -244,24 +238,22 @@ .diag_homogeneity <- function(model, verbose = TRUE) { faminfo <- insight::model_info(model) r <- tryCatch( - { - if (inherits(model, "merMod")) { - stats::residuals(model, scaled = TRUE) - } else if (inherits(model, "gam")) { - stats::residuals(model, type = "scaled.pearson") - } else if (inherits(model, c("glmmTMB", "MixMod"))) { - sigma <- if (faminfo$is_mixed) { - sqrt(insight::get_variance_residual(model)) - } else { - .sigma_glmmTMB_nonmixed(model, faminfo) - } - stats::residuals(model) / sigma - } else if (inherits(model, "glm")) { - ## TODO: check if we can / should use deviance residuals (as for QQ plots) here as well? - stats::rstandard(model, type = "pearson") + if (inherits(model, "merMod")) { + stats::residuals(model, scaled = TRUE) + } else if (inherits(model, "gam")) { + stats::residuals(model, type = "scaled.pearson") + } else if (inherits(model, c("glmmTMB", "MixMod"))) { + residual_sigma <- if (faminfo$is_mixed) { + sqrt(insight::get_variance_residual(model)) } else { - stats::rstandard(model) + .sigma_glmmTMB_nonmixed(model, faminfo) } + stats::residuals(model) / residual_sigma + } else if (inherits(model, "glm")) { + ## TODO: check if we can / should use deviance residuals (as for QQ plots) here as well? + stats::rstandard(model, type = "pearson") + } else { + stats::rstandard(model) }, error = function(e) { NULL diff --git a/R/check_outliers.R b/R/check_outliers.R index 2a1d185d3..f3bf7f5a9 100644 --- a/R/check_outliers.R +++ b/R/check_outliers.R @@ -25,7 +25,8 @@ #' @param ID Optional, to report an ID column along with the row number. #' @param ... When `method = "ics"`, further arguments in `...` are passed #' down to [ICSOutlier::ics.outlier()]. When `method = "mahalanobis"`, -#' they are passed down to [stats::mahalanobis()]. +#' they are passed down to [stats::mahalanobis()]. `percentage_central` can +#' be specified when `method = "mcd"`. #' #' @return A logical vector of the detected outliers with a nice printing #' method: a check (message) on whether outliers were detected or not. The @@ -160,6 +161,9 @@ #' the data (by default, 66\%), before computing the Mahalanobis Distance. This #' is deemed to be a more robust method of identifying and removing outliers #' than regular Mahalanobis distance. +#' This method has a `percentage_central` argument that allows specifying +#' the breakdown point (0.75, the default, is recommended by Leys et al. 2018, +#' but a commonly used alternative is 0.50). #' #' - **Invariant Coordinate Selection (ICS)**: #' The outlier are detected using ICS, which by default uses an alpha threshold @@ -308,7 +312,7 @@ #' @examplesIf require("see") && require("bigutilsr") && require("loo") && require("MASS") && require("ICSOutlier") && require("ICS") && require("dbscan") #' \donttest{ #' # You can also run all the methods -#' check_outliers(data, method = "all") +#' check_outliers(data, method = "all", verbose = FALSE) #' #' # For statistical models --------------------------------------------- #' # select only mpg and disp (continuous) @@ -385,16 +389,16 @@ check_outliers.default <- function(x, ) # Get data - data <- insight::get_data(x, verbose = FALSE) + my_data <- insight::get_data(x, verbose = FALSE) # Remove non-numerics - data <- datawizard::data_select(data, select = is.numeric) + my_data <- datawizard::data_select(my_data, select = is.numeric) # Thresholds if (is.null(threshold)) { - thresholds <- .check_outliers_thresholds(data) + thresholds <- .check_outliers_thresholds(my_data) } else if (is.list(threshold)) { - thresholds <- .check_outliers_thresholds(data) + thresholds <- .check_outliers_thresholds(my_data) thresholds[names(threshold)] <- threshold[names(threshold)] } else { insight::format_error( @@ -411,15 +415,15 @@ check_outliers.default <- function(x, # Others if (all(method %in% c("cook", "pareto"))) { - df <- data.frame(Row = seq_len(nrow(as.data.frame(data)))) + my_df <- data.frame(Row = seq_len(nrow(as.data.frame(my_data)))) outlier_count <- list() outlier_var <- list() } else { - out <- check_outliers(data, method, threshold) + out <- check_outliers(my_data, method, threshold) outlier_var <- attributes(out)$outlier_var outlier_count <- attributes(out)$outlier_count - df <- attributes(out)$data - df <- df[!names(df) == "Outlier"] + my_df <- attributes(out)$data + my_df <- my_df[names(my_df) != "Outlier"] } # Cook @@ -429,7 +433,7 @@ check_outliers.default <- function(x, threshold = thresholds$cook )$data_cook - df <- datawizard::data_merge(list(df, data_cook), + my_df <- datawizard::data_merge(list(my_df, data_cook), join = "full", by = "Row" ) @@ -459,7 +463,7 @@ check_outliers.default <- function(x, ) } } else { - method <- method[!(method == "cook")] + method <- method[method != "cook"] } # Pareto @@ -469,7 +473,7 @@ check_outliers.default <- function(x, threshold = thresholds$pareto )$data_pareto - df <- datawizard::data_merge(list(df, data_pareto), + my_df <- datawizard::data_merge(list(my_df, data_pareto), join = "full", by = "Row" ) @@ -499,7 +503,7 @@ check_outliers.default <- function(x, ) } } else { - method <- method[!(method == "pareto")] + method <- method[method != "pareto"] } outlier_count$all <- datawizard::convert_na_to(outlier_count$all, @@ -531,21 +535,21 @@ check_outliers.default <- function(x, thresholds <- thresholds[names(thresholds) %in% method] # Composite outlier score - df$Outlier <- rowMeans(df[grepl("Outlier_", names(df), fixed = TRUE)]) - df <- df[c(names(df)[names(df) != "Outlier"], "Outlier")] + my_df$Outlier <- rowMeans(my_df[grepl("Outlier_", names(my_df), fixed = TRUE)]) + my_df <- my_df[c(names(my_df)[names(my_df) != "Outlier"], "Outlier")] # Out - outlier <- df$Outlier > 0.5 + outlier <- my_df$Outlier > 0.5 # Attributes class(outlier) <- c("check_outliers", "see_check_outliers", class(outlier)) - attr(outlier, "data") <- df + attr(outlier, "data") <- my_df attr(outlier, "threshold") <- thresholds attr(outlier, "method") <- method attr(outlier, "text_size") <- 3 attr(outlier, "influential_obs") <- .influential_obs(x) attr(outlier, "variables") <- "(Whole model)" - attr(outlier, "raw_data") <- data + attr(outlier, "raw_data") <- my_data attr(outlier, "outlier_var") <- outlier_var attr(outlier, "outlier_count") <- outlier_count @@ -798,7 +802,7 @@ check_outliers.data.frame <- function(x, ) # Remove non-numerics - data <- x + my_data <- x x <- x[, vapply(x, is.numeric, logical(1)), drop = FALSE] # Check args @@ -841,20 +845,20 @@ check_outliers.data.frame <- function(x, outlier_var <- out.meta$outlier_var # Combine outlier data - df <- out[vapply(out, is.data.frame, logical(1))] - if (length(df) > 1 && !is.null(ID)) { - df <- datawizard::data_merge(df, by = c("Row", ID)) - } else if (length(df) > 1) { - df <- datawizard::data_merge(df, by = "Row") + my_df <- out[vapply(out, is.data.frame, logical(1))] + if (length(my_df) > 1 && !is.null(ID)) { + my_df <- datawizard::data_merge(my_df, by = c("Row", ID)) + } else if (length(my_df) > 1) { + my_df <- datawizard::data_merge(my_df, by = "Row") } else { - df <- df[[1]] + my_df <- my_df[[1]] } # Composite outlier score - df$Outlier <- rowMeans(df[grepl("Outlier_", names(df), fixed = TRUE)]) + my_df$Outlier <- rowMeans(my_df[grepl("Outlier_", names(my_df), fixed = TRUE)]) # Out - outlier <- df$Outlier > 0.5 + outlier <- my_df$Outlier > 0.5 # Combine outlier frequency table if (length(outlier_count) > 1 && !is.null(ID)) { @@ -896,12 +900,12 @@ check_outliers.data.frame <- function(x, # Attributes class(outlier) <- c("check_outliers", "see_check_outliers", class(outlier)) - attr(outlier, "data") <- df + attr(outlier, "data") <- my_df attr(outlier, "threshold") <- thresholds attr(outlier, "method") <- method attr(outlier, "text_size") <- 3 attr(outlier, "variables") <- names(x) - attr(outlier, "raw_data") <- data + attr(outlier, "raw_data") <- my_data attr(outlier, "outlier_var") <- outlier_var attr(outlier, "outlier_count") <- outlier_count outlier @@ -914,7 +918,7 @@ check_outliers.data.frame <- function(x, outlier.list <- lapply(outlier.list, function(x) { x[x[[Outlier_method]] >= 0.5, ] }) - outlier.list <- outlier.list[lapply(outlier.list, nrow) > 0] + outlier.list <- outlier.list[vapply(outlier.list, nrow, numeric(1)) > 0] outlier.list <- lapply(outlier.list, datawizard::data_remove, Outlier_method, as_data_frame = TRUE @@ -1098,8 +1102,8 @@ check_outliers.data.frame <- function(x, out <- c(out, .check_outliers_mcd( x, threshold = thresholds$mcd, - percentage_central = 0.66, - ID.names = ID.names + ID.names = ID.names, + ... )) count.table <- datawizard::data_filter( @@ -1217,7 +1221,7 @@ check_outliers.grouped_df <- function(x, } # Initialize elements - data <- data.frame() + my_data <- data.frame() out <- NULL thresholds <- list() outlier_var <- list() @@ -1226,24 +1230,24 @@ check_outliers.grouped_df <- function(x, # Loop through groups for (i in seq_along(grps)) { rows <- grps[[i]] - subset <- check_outliers( + outliers_subset <- check_outliers( as.data.frame(x[rows, ]), method = method, threshold = threshold, ID = ID, ... ) - data <- rbind(data, as.data.frame(subset)) - out <- c(out, subset) - thresholds[[paste0("group_", i)]] <- attributes(subset)$threshold + my_data <- rbind(my_data, as.data.frame(outliers_subset)) + out <- c(out, outliers_subset) + thresholds[[paste0("group_", i)]] <- attributes(outliers_subset)$threshold outlier_var[[i]] <- lapply( - attributes(subset)$outlier_var, lapply, function(y) { + attributes(outliers_subset)$outlier_var, lapply, function(y) { y$Row <- rows[which(seq_along(rows) %in% y$Row)] y } ) outlier_count[[i]] <- lapply( - attributes(subset)$outlier_count, function(y) { + attributes(outliers_subset)$outlier_count, function(y) { y$Row <- rows[which(seq_along(rows) %in% y$Row)] y } @@ -1264,16 +1268,16 @@ check_outliers.grouped_df <- function(x, info$groups$.rows[[x]] <- as.data.frame(info$groups$.rows[[x]]) }) - data[names(info$groups)[1]] <- do.call(rbind, groups) - data <- datawizard::data_relocate( - data, + my_data[names(info$groups)[1]] <- do.call(rbind, groups) + my_data <- datawizard::data_relocate( + my_data, select = names(info$groups)[1], after = "Row" ) - data$Row <- seq_len(nrow(data)) + my_data$Row <- seq_len(nrow(my_data)) class(out) <- c("check_outliers", "see_check_outliers", class(out)) - attr(out, "data") <- data + attr(out, "data") <- my_data attr(out, "method") <- method attr(out, "threshold") <- thresholds[[1]] attr(out, "text_size") <- 3 @@ -1434,7 +1438,7 @@ check_outliers.metabin <- check_outliers.metagen bci <- 1 - 0.001 cook <- stats::qf(0.5, ncol(x), nrow(x) - ncol(x)) pareto <- 0.7 - mahalanobis <- stats::qchisq(p = 1 - 0.001, df = ncol(x)) + mahalanobis_value <- stats::qchisq(p = 1 - 0.001, df = ncol(x)) mahalanobis_robust <- stats::qchisq(p = 1 - 0.001, df = ncol(x)) mcd <- stats::qchisq(p = 1 - 0.001, df = ncol(x)) ics <- 0.001 @@ -1451,7 +1455,7 @@ check_outliers.metabin <- check_outliers.metagen bci = bci, cook = cook, pareto = pareto, - mahalanobis = mahalanobis, + mahalanobis = mahalanobis_value, mahalanobis_robust = mahalanobis_robust, mcd = mcd, ics = ics, @@ -1726,14 +1730,31 @@ check_outliers.metabin <- check_outliers.metagen .check_outliers_mcd <- function(x, threshold = stats::qchisq(p = 1 - 0.001, df = ncol(x)), - percentage_central = 0.50, - ID.names = NULL) { + percentage_central = 0.75, + ID.names = NULL, + verbose = TRUE, + ...) { out <- data.frame(Row = seq_len(nrow(x))) if (!is.null(ID.names)) { out <- cbind(out, ID.names) } + # check whether N to p ratio is not too large, else MCD flags too many outliers + # See #672: This does seem to be a function of the N/p (N = sample size; p = + # number of parameters) ratio. When it is larger than 10, the % of outliers + # flagged is okay (in well behaved data). This makes sense: the MCD looks at + # the cov matrix of subsamples of the data - with high dimensional data, small + # samples sizes will give highly variable cov matrices, as so the "smallest" + # one will probably miss-represent the data. + + if ((nrow(x) / ncol(x)) <= 10 && isTRUE(verbose)) { + insight::format_warning( + "The sample size is too small in your data, relative to the number of variables, for MCD to be reliable.", + "You may try to increase the `percentage_central` argument (must be between 0 and 1), or choose another method." + ) + } + insight::check_if_installed("MASS") # Compute diff --git a/R/icc.R b/R/icc.R index 46c1a331a..47fbb9a45 100644 --- a/R/icc.R +++ b/R/icc.R @@ -532,14 +532,12 @@ print.icc_decomposed <- function(x, digits = 2, ...) { name_full = "ICC", verbose = TRUE) { vars <- tryCatch( - { - insight::get_variance(model, - name_fun = name_fun, - name_full = name_full, - tolerance = tolerance, - verbose = verbose - ) - }, + insight::get_variance(model, + name_fun = name_fun, + name_full = name_full, + tolerance = tolerance, + verbose = verbose + ), error = function(e) { if (inherits(e, c("simpleError", "error")) && verbose) { insight::print_color(e$message, "red") @@ -597,7 +595,7 @@ print.icc_decomposed <- function(x, digits = 2, ...) { # prepare arguments for "lme4::bootMer" .do_lme4_bootmer <- function(model, .boot_fun, iterations, dots) { insight::check_if_installed(c("lme4", "boot")) - args <- list( + my_args <- list( model, .boot_fun, nsim = iterations, @@ -608,25 +606,25 @@ print.icc_decomposed <- function(x, digits = 2, ...) { ) # add/overwrite dot-args if (!is.null(dots[["use.u"]])) { - args$use.u <- dots[["use.u"]] + my_args$use.u <- dots[["use.u"]] } if (!is.null(dots[["re.form"]])) { - args$re.form <- dots[["re.form"]] + my_args$re.form <- dots[["re.form"]] } if (!is.null(dots[["type"]])) { - args$type <- dots[["type"]] - if (args$type == "semiparametric") { - args$use.u <- TRUE + my_args$type <- dots[["type"]] + if (my_args$type == "semiparametric") { + my_args$use.u <- TRUE } } if (!is.null(dots[["parallel"]])) { - args$parallel <- dots[["parallel"]] + my_args$parallel <- dots[["parallel"]] } if (!is.null(dots[["ncpus"]])) { - args$ncpus <- dots[["ncpus"]] + my_args$ncpus <- dots[["ncpus"]] } # bootsrap - do.call(lme4::bootMer, args) + do.call(lme4::bootMer, args = my_args) } @@ -664,12 +662,10 @@ print.icc_decomposed <- function(x, digits = 2, ...) { } model_rank <- tryCatch( - { - if (!is.null(model$rank)) { - model$rank - df_int - } else { - insight::n_parameters(model) - df_int - } + if (!is.null(model$rank)) { + model$rank - df_int + } else { + insight::n_parameters(model) - df_int }, error = function(e) insight::n_parameters(model) - df_int ) diff --git a/R/r2.R b/R/r2.R index dacdef614..2789b5efe 100644 --- a/R/r2.R +++ b/R/r2.R @@ -896,12 +896,10 @@ r2.DirichletRegModel <- function(model, ...) { m <- sum(w * f / sum(w)) mss <- sum(w * (f - m)^2) } + } else if (is.null(w)) { + mss <- sum(f^2) } else { - if (is.null(w)) { - mss <- sum(f^2) - } else { - mss <- sum(w * f^2) - } + mss <- sum(w * f^2) } if (is.null(w)) { rss <- sum(r^2) diff --git a/R/r2_bayes.R b/R/r2_bayes.R index 24e8269ba..71a44ef43 100644 --- a/R/r2_bayes.R +++ b/R/r2_bayes.R @@ -201,28 +201,26 @@ r2_posterior.brmsfit <- function(model, verbose = TRUE, ...) { }) names(br2) <- res } + } else if (mi$is_mixed) { + br2 <- list( + R2_Bayes = as.vector(rstantools::bayes_R2( + model, + re.form = NULL, + re_formula = NULL, + summary = FALSE + )), + R2_Bayes_marginal = as.vector(rstantools::bayes_R2( + model, + re.form = NA, + re_formula = NA, + summary = FALSE + )) + ) + names(br2$R2_Bayes) <- rep("Conditional R2", length(br2$R2_Bayes)) + names(br2$R2_Bayes_marginal) <- rep("Marginal R2", length(br2$R2_Bayes)) } else { - if (mi$is_mixed) { - br2 <- list( - R2_Bayes = as.vector(rstantools::bayes_R2( - model, - re.form = NULL, - re_formula = NULL, - summary = FALSE - )), - R2_Bayes_marginal = as.vector(rstantools::bayes_R2( - model, - re.form = NA, - re_formula = NA, - summary = FALSE - )) - ) - names(br2$R2_Bayes) <- rep("Conditional R2", length(br2$R2_Bayes)) - names(br2$R2_Bayes_marginal) <- rep("Marginal R2", length(br2$R2_Bayes)) - } else { - br2 <- list(R2_Bayes = as.vector(rstantools::bayes_R2(model, summary = FALSE))) - names(br2$R2_Bayes) <- rep("R2", length(br2$R2_Bayes)) - } + br2 <- list(R2_Bayes = as.vector(rstantools::bayes_R2(model, summary = FALSE))) + names(br2$R2_Bayes) <- rep("R2", length(br2$R2_Bayes)) } br2 @@ -400,7 +398,7 @@ as.data.frame.r2_bayes <- function(x, ...) { # remove sig and g cols params_theta <- params[, !grepl(pattern = "^sig2$|^g_|^g$", colnames(params))] - params_sigma <- sqrt(params[, grepl(pattern = "^sig2$", colnames(params))]) + params_sigma <- sqrt(params[, colnames(params) == "sig2"]) # Model Matrix mm <- insight::get_modelmatrix(model[1]) diff --git a/R/r2_coxsnell.R b/R/r2_coxsnell.R index d56803f5c..042e5461a 100644 --- a/R/r2_coxsnell.R +++ b/R/r2_coxsnell.R @@ -69,20 +69,20 @@ r2_coxsnell.glm <- function(model, verbose = TRUE, ...) { if (is.null(info)) { info <- suppressWarnings(insight::model_info(model, verbose = FALSE)) } + # Cox & Snell's R2 is not defined for binomial models that are not Bernoulli models if (info$is_binomial && !info$is_bernoulli && class(model)[1] == "glm") { if (verbose) { insight::format_alert("Can't calculate accurate R2 for binomial models that are not Bernoulli models.") } return(NULL) - } else { - # if no deviance, return NA - if (is.null(model$deviance)) { - return(NULL) - } - r2_coxsnell <- (1 - exp((model$deviance - model$null.deviance) / insight::n_obs(model, disaggregate = TRUE))) - names(r2_coxsnell) <- "Cox & Snell's R2" - r2_coxsnell } + # if no deviance, return NULL + if (is.null(model$deviance)) { + return(NULL) + } + r2_coxsnell <- (1 - exp((model$deviance - model$null.deviance) / insight::n_obs(model, disaggregate = TRUE))) + names(r2_coxsnell) <- "Cox & Snell's R2" + r2_coxsnell } #' @export @@ -95,22 +95,22 @@ r2_coxsnell.glmmTMB <- function(model, verbose = TRUE, ...) { if (is.null(info)) { info <- suppressWarnings(insight::model_info(model, verbose = FALSE)) } + # Cox & Snell's R2 is not defined for binomial models that are not Bernoulli models if (info$is_binomial && !info$is_bernoulli) { if (verbose) { insight::format_alert("Can't calculate accurate R2 for binomial models that are not Bernoulli models.") } return(NULL) - } else { - dev <- stats::deviance(model) - # if no deviance, return NA - if (is.null(dev)) { - return(NULL) - } - null_dev <- stats::deviance(insight::null_model(model)) - r2_coxsnell <- (1 - exp((dev - null_dev) / insight::n_obs(model, disaggregate = TRUE))) - names(r2_coxsnell) <- "Cox & Snell's R2" - r2_coxsnell } + dev <- stats::deviance(model) + # if no deviance, return NULL + if (is.null(dev)) { + return(NULL) + } + null_dev <- stats::deviance(insight::null_model(model)) + r2_coxsnell <- (1 - exp((dev - null_dev) / insight::n_obs(model, disaggregate = TRUE))) + names(r2_coxsnell) <- "Cox & Snell's R2" + r2_coxsnell } diff --git a/R/r2_loo.R b/R/r2_loo.R index 040d9b572..dbcaf38b3 100644 --- a/R/r2_loo.R +++ b/R/r2_loo.R @@ -50,10 +50,10 @@ r2_loo <- function(model, robust = TRUE, ci = 0.95, verbose = TRUE, ...) { loo_r2 <- structure( class = "r2_loo", lapply(loo_r2, ifelse(robust, stats::median, mean)), - "SE" = lapply(loo_r2, ifelse(robust, stats::mad, stats::sd)), + SE = lapply(loo_r2, ifelse(robust, stats::mad, stats::sd)), # "Estimates" = lapply(r2_bayesian, bayestestR::point_estimate, centrality = "all", dispersion = TRUE), - "CI" = lapply(loo_r2, bayestestR::hdi, ci = ci), - "robust" = robust + CI = lapply(loo_r2, bayestestR::hdi, ci = ci), + robust = robust ) return(loo_r2) } @@ -84,13 +84,13 @@ r2_loo_posterior.brmsfit <- function(model, verbose = TRUE, ...) { res <- insight::find_response(model) if (mi[[1]]$is_mixed) { br2_mv <- list( - "R2_loo" = rstantools::loo_R2( + R2_loo = rstantools::loo_R2( model, re.form = NULL, re_formula = NULL, summary = FALSE ), - "R2_loo_marginal" = rstantools::loo_R2( + R2_loo_marginal = rstantools::loo_R2( model, re.form = NA, re_formula = NA, @@ -99,40 +99,38 @@ r2_loo_posterior.brmsfit <- function(model, verbose = TRUE, ...) { ) br2 <- lapply(seq_along(res), function(x) { list( - "R2_loo" = unname(as.vector(br2_mv$R2_loo[, x])), - "R2_loo_marginal" = unname(as.vector(br2_mv$R2_loo_marginal[, x])) + R2_loo = unname(as.vector(br2_mv$R2_loo[, x])), + R2_loo_marginal = unname(as.vector(br2_mv$R2_loo_marginal[, x])) ) }) names(br2) <- res } else { - br2_mv <- list("R2_loo" = rstantools::loo_R2(model, summary = FALSE)) + br2_mv <- list(R2_loo = rstantools::loo_R2(model, summary = FALSE)) br2 <- lapply(seq_along(res), function(x) { - list("R2_loo" = unname(as.vector(br2_mv$R2_loo[, x]))) + list(R2_loo = unname(as.vector(br2_mv$R2_loo[, x]))) }) names(br2) <- res } + } else if (mi$is_mixed) { + br2 <- list( + R2_loo = as.vector(rstantools::loo_R2( + model, + re.form = NULL, + re_formula = NULL, + summary = FALSE + )), + R2_loo_marginal = as.vector(rstantools::loo_R2( + model, + re.form = NA, + re_formula = NA, + summary = FALSE + )) + ) + names(br2$R2_loo) <- rep("Conditional R2_adjusted", length(br2$R2_loo)) + names(br2$R2_loo_marginal) <- rep("Marginal R2_adjusted", length(br2$R2_loo)) } else { - if (mi$is_mixed) { - br2 <- list( - "R2_loo" = as.vector(rstantools::loo_R2( - model, - re.form = NULL, - re_formula = NULL, - summary = FALSE - )), - "R2_loo_marginal" = as.vector(rstantools::loo_R2( - model, - re.form = NA, - re_formula = NA, - summary = FALSE - )) - ) - names(br2$R2_loo) <- rep("Conditional R2_adjusted", length(br2$R2_loo)) - names(br2$R2_loo_marginal) <- rep("Marginal R2_adjusted", length(br2$R2_loo)) - } else { - br2 <- list("R2_loo" = as.vector(rstantools::loo_R2(model, summary = FALSE))) - names(br2$R2_loo) <- rep("R2_adjusted", length(br2$R2_loo)) - } + br2 <- list(R2_loo = as.vector(rstantools::loo_R2(model, summary = FALSE))) + names(br2$R2_loo) <- rep("R2_adjusted", length(br2$R2_loo)) } br2 diff --git a/R/test_bf.R b/R/test_bf.R index 60cb3a129..38b0e421d 100644 --- a/R/test_bf.R +++ b/R/test_bf.R @@ -9,21 +9,21 @@ test_bf <- function(...) { #' @export test_bf.default <- function(..., reference = 1, text_length = NULL) { # Attribute class to list and get names from the global environment - objects <- insight::ellipsis_info(..., only_models = TRUE) - names(objects) <- match.call(expand.dots = FALSE)$`...` + my_objects <- insight::ellipsis_info(..., only_models = TRUE) + names(my_objects) <- match.call(expand.dots = FALSE)[["..."]] # validation checks (will throw error if non-valid objects) - .test_performance_checks(objects, multiple = FALSE) + .test_performance_checks(objects = my_objects, multiple = FALSE) - if (length(objects) == 1 && isTRUE(insight::is_model(objects))) { + if (length(my_objects) == 1 && isTRUE(insight::is_model(my_objects))) { insight::format_error( - "`test_bf()` is designed to compare multiple models together. For a single model, you might want to run `bayestestR::bf_parameters()` instead." + "`test_bf()` is designed to compare multiple models together. For a single model, you might want to run `bayestestR::bf_parameters()` instead." # nolint ) } # If a suitable class is found, run the more specific method on it - if (inherits(objects, c("ListNestedRegressions", "ListNonNestedRegressions", "ListLavaan"))) { - test_bf(objects, reference = reference, text_length = text_length) + if (inherits(my_objects, c("ListNestedRegressions", "ListNonNestedRegressions", "ListLavaan"))) { + test_bf(my_objects, reference = reference, text_length = text_length) } else { insight::format_error("The models cannot be compared for some reason :/") } @@ -87,9 +87,9 @@ test_bf.ListModels <- function(objects, reference = 1, text_length = NULL, ...) if (all(bayesian_models)) { "yes" - } else if (!all(bayesian_models)) { - "no" - } else { + } else if (any(bayesian_models)) { "mixed" + } else { + "no" } } diff --git a/R/test_likelihoodratio.R b/R/test_likelihoodratio.R index 9784b13a9..529516487 100644 --- a/R/test_likelihoodratio.R +++ b/R/test_likelihoodratio.R @@ -22,29 +22,29 @@ test_lrt <- test_likelihoodratio #' @export test_likelihoodratio.default <- function(..., estimator = "OLS", verbose = TRUE) { # Attribute class to list - objects <- insight::ellipsis_info(..., only_models = TRUE) + my_objects <- insight::ellipsis_info(..., only_models = TRUE) # validation checks (will throw error if non-valid objects) - objects <- .test_performance_checks(objects, verbose = verbose) + my_objects <- .test_performance_checks(my_objects, verbose = verbose) # different default when mixed model or glm is included if (missing(estimator)) { - mixed_models <- sapply(objects, insight::is_mixed_model) - if (all(mixed_models) && all(sapply(objects, .is_lmer_reml)) && isTRUE(attributes(objects)$same_fixef)) { + mixed_models <- sapply(my_objects, insight::is_mixed_model) + if (all(mixed_models) && all(sapply(my_objects, .is_lmer_reml)) && isTRUE(attributes(my_objects)$same_fixef)) { estimator <- "REML" - } else if (any(mixed_models) || !all(attributes(objects)$is_linear)) { + } else if (any(mixed_models) || !all(attributes(my_objects)$is_linear)) { estimator <- "ML" } } # ensure proper object names - objects <- .check_objectnames(objects, sapply(match.call(expand.dots = FALSE)$`...`, as.character)) + my_objects <- .check_objectnames(my_objects, sapply(match.call(expand.dots = FALSE)[["..."]], as.character)) # If a suitable class is found, run the more specific method on it - if (inherits(objects, "ListNestedRegressions")) { - test_likelihoodratio(objects, estimator = estimator) - } else if (inherits(objects, "ListLavaan")) { - test_likelihoodratio_ListLavaan(..., objects = objects) # Because lavaanLRT requires the ellipsis + if (inherits(my_objects, "ListNestedRegressions")) { + test_likelihoodratio(my_objects, estimator = estimator) + } else if (inherits(my_objects, "ListLavaan")) { + test_likelihoodratio_ListLavaan(..., objects = my_objects) # Because lavaanLRT requires the ellipsis } else { insight::format_error( "The models are not nested, which is a prerequisite for `test_likelihoodratio()`.", @@ -106,7 +106,7 @@ test_likelihoodratio.ListNestedRegressions <- function(objects, estimator = "ML" same_fixef <- attributes(objects)$same_fixef # sort by df - if (!all(sort(dfs) == dfs) && !all(sort(dfs) == rev(dfs))) { + if (is.unsorted(dfs) && is.unsorted(rev(dfs))) { objects <- objects[order(dfs)] dfs <- sort(dfs, na.last = TRUE) } diff --git a/R/test_performance.R b/R/test_performance.R index b643143f7..f818b02dd 100644 --- a/R/test_performance.R +++ b/R/test_performance.R @@ -236,17 +236,17 @@ test_performance <- function(..., reference = 1, verbose = TRUE) { #' @export test_performance.default <- function(..., reference = 1, include_formula = FALSE, verbose = TRUE) { # Attribute class to list and get names from the global environment - objects <- insight::ellipsis_info(..., only_models = TRUE) + my_objects <- insight::ellipsis_info(..., only_models = TRUE) # validation checks (will throw error if non-valid objects) - objects <- .test_performance_checks(objects, verbose = verbose) + my_objects <- .test_performance_checks(my_objects, verbose = verbose) # ensure proper object names - objects <- .check_objectnames(objects, sapply(match.call(expand.dots = FALSE)$`...`, as.character)) + my_objects <- .check_objectnames(my_objects, sapply(match.call(expand.dots = FALSE)[["..."]], as.character)) # If a suitable class is found, run the more specific method on it - if (inherits(objects, c("ListNestedRegressions", "ListNonNestedRegressions", "ListLavaan"))) { - test_performance(objects, reference = reference, include_formula = include_formula) + if (inherits(my_objects, c("ListNestedRegressions", "ListNonNestedRegressions", "ListLavaan"))) { + test_performance(my_objects, reference = reference, include_formula = include_formula) } else { insight::format_error("The models cannot be compared for some reason :/") } @@ -421,10 +421,10 @@ test_performance.ListNonNestedRegressions <- function(objects, .test_performance_init <- function(objects, include_formula = FALSE) { - names <- insight::model_name(objects, include_formula = include_formula) + model_names <- insight::model_name(objects, include_formula = include_formula) out <- data.frame( Name = names(objects), - Model = names, + Model = model_names, stringsAsFactors = FALSE ) row.names(out) <- NULL @@ -453,7 +453,7 @@ test_performance.ListNonNestedRegressions <- function(objects, if (same_response && !inherits(objects, "ListLavaan") && isFALSE(attributes(objects)$same_response)) { insight::format_error( - "The models' dependent variables don't have the same data, which is a prerequisite to compare them. Probably the proportion of missing data differs between models." + "The models' dependent variables don't have the same data, which is a prerequisite to compare them. Probably the proportion of missing data differs between models." # nolint ) } diff --git a/R/test_vuong.R b/R/test_vuong.R index 449f55ddb..f0cce6d16 100644 --- a/R/test_vuong.R +++ b/R/test_vuong.R @@ -8,17 +8,17 @@ test_vuong <- function(..., verbose = TRUE) { #' @export test_vuong.default <- function(..., reference = 1, verbose = TRUE) { # Attribute class to list and get names from the global environment - objects <- insight::ellipsis_info(..., only_models = TRUE) + my_objects <- insight::ellipsis_info(..., only_models = TRUE) # validation checks (will throw error if non-valid objects) - objects <- .test_performance_checks(objects, verbose = verbose) + my_objects <- .test_performance_checks(my_objects, verbose = verbose) # ensure proper object names - objects <- .check_objectnames(objects, sapply(match.call(expand.dots = FALSE)$`...`, as.character)) + my_objects <- .check_objectnames(my_objects, sapply(match.call(expand.dots = FALSE)[["..."]], as.character)) # If a suitable class is found, run the more specific method on it - if (inherits(objects, c("ListNestedRegressions", "ListNonNestedRegressions", "ListLavaan"))) { - test_vuong(objects, reference = reference) + if (inherits(my_objects, c("ListNestedRegressions", "ListNonNestedRegressions", "ListLavaan"))) { + test_vuong(my_objects, reference = reference) } else { insight::format_error("The models cannot be compared for some reason :/") } diff --git a/R/test_wald.R b/R/test_wald.R index 0e043502a..ddfe21829 100644 --- a/R/test_wald.R +++ b/R/test_wald.R @@ -8,17 +8,17 @@ test_wald <- function(..., verbose = TRUE) { #' @export test_wald.default <- function(..., verbose = TRUE) { # Attribute class to list and get names from the global environment - objects <- insight::ellipsis_info(..., only_models = TRUE) + my_objects <- insight::ellipsis_info(..., only_models = TRUE) # validation checks (will throw error if non-valid objects) - objects <- .test_performance_checks(objects, verbose = verbose) + my_objects <- .test_performance_checks(my_objects, verbose = verbose) # ensure proper object names - objects <- .check_objectnames(objects, sapply(match.call(expand.dots = FALSE)$`...`, as.character)) + my_objects <- .check_objectnames(my_objects, sapply(match.call(expand.dots = FALSE)[["..."]], as.character)) # If a suitable class is found, run the more specific method on it - if (inherits(objects, c("ListNestedRegressions", "ListNonNestedRegressions", "ListLavaan"))) { - test_wald(objects) + if (inherits(my_objects, c("ListNestedRegressions", "ListNonNestedRegressions", "ListLavaan"))) { + test_wald(my_objects) } else { insight::format_error("The models cannot be compared for some reason :/") } @@ -37,10 +37,10 @@ test_wald.ListNestedRegressions <- function(objects, verbose = TRUE, ...) { ) } return(test_likelihoodratio(objects)) - } else { - out <- .test_wald(objects, test = "F") } + out <- .test_wald(objects, test = "F") + attr(out, "is_nested") <- TRUE class(out) <- c("test_performance", class(out)) out @@ -60,7 +60,7 @@ test_wald.ListNonNestedRegressions <- function(objects, verbose = TRUE, ...) { dfs <- sapply(objects, insight::get_df, type = "residual") # sort by df - if (!all(sort(dfs) == dfs) && !all(sort(dfs) == rev(dfs))) { + if (is.unsorted(dfs) && is.unsorted(rev(dfs))) { objects <- objects[order(dfs)] dfs <- sort(dfs, na.last = TRUE) } @@ -78,18 +78,18 @@ test_wald.ListNonNestedRegressions <- function(objects, verbose = TRUE, ...) { # Find reference-model related stuff refmodel <- order(dfs)[1] - scale <- dev[refmodel] / dfs[refmodel] + my_scale <- dev[refmodel] / dfs[refmodel] # test = "F" if (test == "F") { - f_value <- (dev_diff / dfs_diff) / scale + f_value <- (dev_diff / dfs_diff) / my_scale f_value[!is.na(f_value) & f_value < 0] <- NA # rather than p = 0 - out$`F` <- f_value + out[["F"]] <- f_value p <- stats::pf(f_value, abs(dfs_diff), dfs[refmodel], lower.tail = FALSE) # test = "LRT" } else { - chi2 <- dev_diff / scale * sign(dfs_diff) + chi2 <- dev_diff / my_scale * sign(dfs_diff) chi2[!is.na(chi2) & chi2 < 0] <- NA # rather than p = 0 out$Chi2 <- chi2 p <- stats::pchisq(chi2, abs(dfs_diff), lower.tail = FALSE) diff --git a/man/check_outliers.Rd b/man/check_outliers.Rd index 74c992b6f..8dc0326bb 100644 --- a/man/check_outliers.Rd +++ b/man/check_outliers.Rd @@ -26,7 +26,8 @@ check_outliers(x, ...) \item{...}{When \code{method = "ics"}, further arguments in \code{...} are passed down to \code{\link[ICSOutlier:ics.outlier]{ICSOutlier::ics.outlier()}}. When \code{method = "mahalanobis"}, -they are passed down to \code{\link[stats:mahalanobis]{stats::mahalanobis()}}.} +they are passed down to \code{\link[stats:mahalanobis]{stats::mahalanobis()}}. \code{percentage_central} can +be specified when \code{method = "mcd"}.} \item{method}{The outlier detection method(s). Can be \code{"all"} or some of \code{"cook"}, \code{"pareto"}, \code{"zscore"}, \code{"zscore_robust"}, \code{"iqr"}, \code{"ci"}, \code{"eti"}, @@ -189,6 +190,9 @@ calculates the mean and covariance matrix based on the most central subset of the data (by default, 66\\%), before computing the Mahalanobis Distance. This is deemed to be a more robust method of identifying and removing outliers than regular Mahalanobis distance. +This method has a \code{percentage_central} argument that allows specifying +the breakdown point (0.75, the default, is recommended by Leys et al. 2018, +but a commonly used alternative is 0.50). \item \strong{Invariant Coordinate Selection (ICS)}: The outlier are detected using ICS, which by default uses an alpha threshold of 0.025 (corresponding to the 2.5\\% most extreme observations) as a cut-off @@ -295,7 +299,7 @@ check_outliers(group_iris) \dontshow{if (require("see") && require("bigutilsr") && require("loo") && require("MASS") && require("ICSOutlier") && require("ICS") && require("dbscan")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ # You can also run all the methods -check_outliers(data, method = "all") +check_outliers(data, method = "all", verbose = FALSE) # For statistical models --------------------------------------------- # select only mpg and disp (continuous) diff --git a/man/performance-package.Rd b/man/performance-package.Rd index abbdcf21f..b3f1d5a3f 100644 --- a/man/performance-package.Rd +++ b/man/performance-package.Rd @@ -3,7 +3,6 @@ \docType{package} \name{performance-package} \alias{performance-package} -\alias{_PACKAGE} \title{performance: An R Package for Assessment, Comparison and Testing of Statistical Models} \description{ diff --git a/tests/testthat/test-check_outliers.R b/tests/testthat/test-check_outliers.R index e464028d0..259f3eac9 100644 --- a/tests/testthat/test-check_outliers.R +++ b/tests/testthat/test-check_outliers.R @@ -87,9 +87,18 @@ test_that("mcd which", { # (not clear why method mcd needs a seed) set.seed(42) expect_identical( - tail(which(check_outliers(mtcars[1:4], method = "mcd", threshold = 45))), + tail(which(check_outliers(mtcars[1:4], method = "mcd", threshold = 45, verbose = FALSE))), 31L ) + expect_warning( + { + out <- check_outliers(mtcars, method = "mcd") + }, + regex = "The sample size is too small" + ) + expect_identical(sum(out), 8L) + out <- check_outliers(mtcars, method = "mcd", percentage_central = 0.5, verbose = FALSE) + expect_identical(sum(out), 15L) }) ## FIXME: Fails on CRAN/windows @@ -197,11 +206,12 @@ test_that("all methods which", { "mahalanobis", "mahalanobis_robust", "mcd", "optics", "lof" ), threshold = list( - "zscore" = 2.2, "zscore_robust" = 2.2, "iqr" = 1.2, - "ci" = 0.95, "eti" = 0.95, "hdi" = 0.90, "bci" = 0.95, - "mahalanobis" = 20, "mahalanobis_robust" = 25, "mcd" = 25, - "optics" = 14, "lof" = 0.005 - ) + zscore = 2.2, zscore_robust = 2.2, iqr = 1.2, + ci = 0.95, eti = 0.95, hdi = 0.90, bci = 0.95, + mahalanobis = 20, mahalanobis_robust = 25, mcd = 25, + optics = 14, lof = 0.005 + ), + verbose = FALSE )), as.integer(c(9, 15, 16, 19, 20, 28, 29, 31)) ) @@ -219,12 +229,13 @@ test_that("multiple methods with ID", { "mahalanobis", "mahalanobis_robust", "mcd", "optics", "lof" ), threshold = list( - "zscore" = 2.2, "zscore_robust" = 2.2, "iqr" = 1.2, - "ci" = 0.95, "eti" = 0.95, "hdi" = 0.90, "bci" = 0.95, - "mahalanobis" = 20, "mahalanobis_robust" = 25, "mcd" = 25, - "optics" = 14, "lof" = 0.005 + zscore = 2.2, zscore_robust = 2.2, iqr = 1.2, + ci = 0.95, eti = 0.95, hdi = 0.90, bci = 0.95, + mahalanobis = 20, mahalanobis_robust = 25, mcd = 25, + optics = 14, lof = 0.005 ), - ID = "car" + ID = "car", + verbose = FALSE )) expect_identical( x$outlier_var$zscore$mpg$car, diff --git a/vignettes/check_outliers.Rmd b/vignettes/check_outliers.Rmd index d9f906d6c..0c567489a 100644 --- a/vignettes/check_outliers.Rmd +++ b/vignettes/check_outliers.Rmd @@ -132,7 +132,7 @@ One common approach for this is to compute multivariate distance metrics such as In *{performance}*'s `check_outliers()`, one can use this approach with `method = "mcd"`.^[Our default threshold for the MCD method is defined by `stats::qchisq(p = 1 - 0.001, df = ncol(x))`, which again is an approximation of the critical value for _p_ < .001 consistent with the thresholds of our other methods.] ```{r multivariate} -outliers <- check_outliers(data, method = "mcd") +outliers <- check_outliers(data, method = "mcd", verbose = FALSE) outliers ``` @@ -227,7 +227,7 @@ rempsyc::nice_scatter(data, "height", "weight") Using either the *z*-score or MCD methods, our model-consistent observation will be incorrectly flagged as an outlier or influential observation. ```{r} -outliers <- check_outliers(model, method = c("zscore_robust", "mcd")) +outliers <- check_outliers(model, method = c("zscore_robust", "mcd"), verbose = FALSE) which(outliers) ``` @@ -248,7 +248,7 @@ The *{performance}* package also offers an alternative, consensus-based approach In practice, this approach computes a composite outlier score, formed of the average of the binary (0 or 1) classification results of each method. It represents the probability that each observation is classified as an outlier by at least one method. The default decision rule classifies rows with composite outlier scores superior or equal to 0.5 as outlier observations (i.e., that were classified as outliers by at least half of the methods). In *{performance}*'s `check_outliers()`, one can use this approach by including all desired methods in the corresponding argument. ```{r multimethod, fig.cap = "Visual depiction of outliers using several different statistical outlier detection methods."} -outliers <- check_outliers(model, method = c("zscore_robust", "mcd", "cook")) +outliers <- check_outliers(model, method = c("zscore_robust", "mcd", "cook"), verbose = FALSE) which(outliers) ```