Skip to content

Commit

Permalink
Merge branch 'main' into strengejacke/issue697
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke authored Jun 5, 2024
2 parents 5fa72a2 + 7701624 commit dda4683
Show file tree
Hide file tree
Showing 68 changed files with 160 additions and 132 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: performance
Title: Assessment of Regression Models Performance
Version: 0.11.0.6
Version: 0.11.0.9
Authors@R:
c(person(given = "Daniel",
family = "Lüdecke",
Expand Down Expand Up @@ -124,7 +124,7 @@ Suggests:
nonnest2,
ordinal,
parallel,
parameters (>= 0.21.4),
parameters (>= 0.21.6),
patchwork,
pscl,
psych,
Expand Down Expand Up @@ -154,3 +154,4 @@ Config/Needs/website:
r-lib/pkgdown,
easystats/easystatstemplate
Config/rcmdcheck/ignore-inconsequential-notes: true
Remotes: easystats/insight, easystats/datawizard, easystats/parameters, easystats/bayestestR
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
* Aliases `posterior_predictive_check()` and `check_posterior_predictions()` for
`check_predictions()` are deprecated.

* Arguments named `group` or `group_by` will be deprecated in a future release.
Please use `by` instead. This affects `check_heterogeneity_bias()` in
*performance*.

## General

* Improved documentation and new vignettes added.
Expand All @@ -15,6 +19,9 @@
the usual style as for other models and no longer returns plots from
`bayesplot::pp_check()`.

* Updated the trained model that is used to prediction distributions in
`check_distribution()`.

## Bug fixes

* `check_model()` now falls back on normal Q-Q plots when a model is not supported
Expand Down
1 change: 0 additions & 1 deletion R/binned_residuals.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ binned_residuals <- function(model,
iterations = 1000,
verbose = TRUE,
...) {
# match arguments
ci_type <- match.arg(ci_type)
residuals <- match.arg(residuals)

Expand Down
1 change: 0 additions & 1 deletion R/check_autocorrelation.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ check_autocorrelation <- function(x, ...) {
#' @rdname check_autocorrelation
#' @export
check_autocorrelation.default <- function(x, nsim = 1000, ...) {
# check for valid input
.is_model_valid(x)

.residuals <- stats::residuals(x)
Expand Down
2 changes: 1 addition & 1 deletion R/check_clusterstructure.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' number of square shaped blocks along the diagonal.
#'
#' @param x A data frame.
#' @param standardize Standardize the dataframe before clustering (default).
#' @param standardize Standardize the data frame before clustering (default).
#' @param distance Distance method used. Other methods than "euclidean"
#' (default) are exploratory in the context of clustering tendency. See
#' [stats::dist()] for list of available methods.
Expand Down
1 change: 0 additions & 1 deletion R/check_collinearity.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,6 @@ multicollinearity <- check_collinearity
#' @rdname check_collinearity
#' @export
check_collinearity.default <- function(x, ci = 0.95, verbose = TRUE, ...) {
# check for valid input
.is_model_valid(x)
.check_collinearity(x, component = "conditional", ci = ci, verbose = verbose)
}
Expand Down
1 change: 0 additions & 1 deletion R/check_convergence.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ check_convergence <- function(x, tolerance = 0.001, ...) {

#' @export
check_convergence.default <- function(x, tolerance = 0.001, ...) {
# check for valid input
.is_model_valid(x)
message(sprintf("`check_convergence()` does not work for models of class '%s'.", class(x)[1]))
}
Expand Down
40 changes: 28 additions & 12 deletions R/check_distribution.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,11 @@ NULL
#' This function uses an internal random forest model to classify the
#' distribution from a model-family. Currently, following distributions are
#' trained (i.e. results of `check_distribution()` may be one of the
#' following): `"bernoulli"`, `"beta"`, `"beta-binomial"`,
#' `"binomial"`, `"chi"`, `"exponential"`, `"F"`,
#' `"gamma"`, `"lognormal"`, `"normal"`, `"negative
#' binomial"`, `"negative binomial (zero-inflated)"`, `"pareto"`,
#' `"poisson"`, `"poisson (zero-inflated)"`, `"uniform"` and
#' `"weibull"`.
#' following): `"bernoulli"`, `"beta"`, `"beta-binomial"`, `"binomial"`,
#' `"cauchy"`, `"chi"`, `"exponential"`, `"F"`, `"gamma"`, `"half-cauchy"`,
#' `"inverse-gamma"`, `"lognormal"`, `"normal"`, `"negative binomial"`,
#' `"negative binomial (zero-inflated)"`, `"pareto"`, `"poisson"`,
#' `"poisson (zero-inflated)"`, `"tweedie"`, `"uniform"` and `"weibull"`.
#' \cr \cr
#' Note the similarity between certain distributions according to shape, skewness,
#' etc. Thus, the predicted distribution may not be perfectly representing the
Expand Down Expand Up @@ -67,7 +66,6 @@ check_distribution <- function(model) {

#' @export
check_distribution.default <- function(model) {
# check for valid input
.is_model_valid(model)

insight::check_if_installed("randomForest")
Expand Down Expand Up @@ -193,23 +191,40 @@ check_distribution.numeric <- function(model) {
# validation check, remove missings
x <- x[!is.na(x)]

# this might fail, so we wrap in ".safe()"
map_est <- .safe(mean(x) - as.numeric(bayestestR::map_estimate(x, bw = "nrd0")))
mode <- NULL

Check warning on line 194 in R/check_distribution.R

View workflow job for this annotation

GitHub Actions / lint / lint

file=R/check_distribution.R,line=194,col=3,[object_overwrite_linter] 'mode' is an exported object from package 'base'. Avoid re-using such symbols.
# find mode for integer, or MAP for distributions
if (all(.is_integer(x))) {
mode <- datawizard::distribution_mode(x)

Check warning on line 197 in R/check_distribution.R

View workflow job for this annotation

GitHub Actions / lint / lint

file=R/check_distribution.R,line=197,col=5,[object_overwrite_linter] 'mode' is an exported object from package 'base'. Avoid re-using such symbols.
} else {
# this might fail, so we wrap in ".safe()"
mode <- tryCatch(

Check warning on line 200 in R/check_distribution.R

View workflow job for this annotation

GitHub Actions / lint / lint

file=R/check_distribution.R,line=200,col=5,[object_overwrite_linter] 'mode' is an exported object from package 'base'. Avoid re-using such symbols.
as.numeric(bayestestR::map_estimate(x, bw = "nrd0")),
error = function(e) NULL
)
if (is.null(mode)) {
mode <- tryCatch(

Check warning on line 205 in R/check_distribution.R

View workflow job for this annotation

GitHub Actions / lint / lint

file=R/check_distribution.R,line=205,col=7,[object_overwrite_linter] 'mode' is an exported object from package 'base'. Avoid re-using such symbols.
as.numeric(bayestestR::map_estimate(x, bw = "kernel")),
error = function(e) NULL
)
}
}

if (is.null(map_est)) {
map_est <- mean(x) - datawizard::distribution_mode(x)
if (is.null(mode)) {
mean_mode_diff <- mean(x) - datawizard::distribution_mode(x)
msg <- "Could not accurately estimate the mode."
if (!is.null(type)) {
msg <- paste(msg, "Predicted distribution of the", type, "may be less accurate.")
}
insight::format_alert(msg)
} else {
mean_mode_diff <- .safe(mean(x) - mode)
}

data.frame(
SD = stats::sd(x),
MAD = stats::mad(x, constant = 1),
Mean_Median_Distance = mean(x) - stats::median(x),
Mean_Mode_Distance = map_est,
Mean_Mode_Distance = mean_mode_diff,
SD_MAD_Distance = stats::sd(x) - stats::mad(x, constant = 1),
Var_Mean_Distance = stats::var(x) - mean(x),
Range_SD = diff(range(x)) / stats::sd(x),
Expand All @@ -222,6 +237,7 @@ check_distribution.numeric <- function(model) {
Min = min(x),
Max = max(x),
Proportion_Positive = sum(x >= 0) / length(x),
Proportion_Zero = sum(x == 0) / length(x),
Integer = all(.is_integer(x))
)
}
Expand Down
2 changes: 1 addition & 1 deletion R/check_factorstructure.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
#' exclusion them from the analysis (note that you would need to re-compute the
#' KMO indices as they are dependent on the whole dataset).
#'
#' @param x A dataframe or a correlation matrix. If the latter is passed, `n`
#' @param x A data frame or a correlation matrix. If the latter is passed, `n`
#' must be provided.
#' @param n If a correlation matrix was passed, the number of observations must
#' be specified.
Expand Down
29 changes: 18 additions & 11 deletions R/check_heterogeneity_bias.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
#' @param select Character vector (or formula) with names of variables to select
#' that should be checked. If `x` is a mixed model object, this argument
#' will be ignored.
#' @param group Character vector (or formula) with the name of the variable that
#' @param by Character vector (or formula) with the name of the variable that
#' indicates the group- or cluster-ID. If `x` is a model object, this
#' argument will be ignored.
#' @param group Deprecated. Use `by` instead.
#'
#' @seealso
#' For further details, read the vignette
Expand All @@ -25,32 +26,38 @@
#' @examples
#' data(iris)
#' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID
#' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID")
#' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID")
#' @export
check_heterogeneity_bias <- function(x, select = NULL, group = NULL) {
check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL) {
## TODO: deprecate later
if (!is.null(group)) {
insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint
by <- group
}
if (insight::is_model(x)) {
group <- insight::find_random(x, split_nested = TRUE, flatten = TRUE)
if (is.null(group)) {
insight::format_error("Model is no mixed model. Please provide a mixed model, or a data frame and arguments `select` and `group`.") # nolint
by <- insight::find_random(x, split_nested = TRUE, flatten = TRUE)
if (is.null(by)) {
insight::format_error("Model is no mixed model. Please provide a mixed model, or a data frame and arguments `select` and `by`.") # nolint
}
my_data <- insight::get_data(x, source = "mf", verbose = FALSE)
select <- insight::find_predictors(x, effects = "fixed", component = "conditional", flatten = TRUE)
} else {
if (inherits(select, "formula")) {
select <- all.vars(select)
}
if (inherits(group, "formula")) {
group <- all.vars(group)
if (inherits(by, "formula")) {
by <- all.vars(by)
}
my_data <- x
}

unique_groups <- .n_unique(my_data[[group]])
combinations <- expand.grid(select, group)
unique_groups <- .n_unique(my_data[[by]])
combinations <- expand.grid(select, by)

result <- Map(function(predictor, id) {
# demean predictor
d <- datawizard::demean(my_data, select = predictor, group = id, verbose = FALSE)

d <- datawizard::demean(my_data, select = predictor, by = id, verbose = FALSE)

# get new names
within_name <- paste0(predictor, "_within")
Expand Down
1 change: 0 additions & 1 deletion R/check_heteroscedasticity.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ check_heteroskedasticity <- check_heteroscedasticity

#' @export
check_heteroscedasticity.default <- function(x, ...) {
# check for valid input
.is_model_valid(x)

# only for linear models
Expand Down
1 change: 0 additions & 1 deletion R/check_itemscale.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
#' )
#' @export
check_itemscale <- function(x, factor_index = NULL) {
# check for valid input
if (!inherits(x, c("parameters_pca", "data.frame"))) {
insight::format_error(
"`x` must be an object of class `parameters_pca`, as returned by `parameters::principal_components()`, or a data frame." # nolint
Expand Down
1 change: 0 additions & 1 deletion R/check_normality.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ check_normality <- function(x, ...) {

#' @export
check_normality.default <- function(x, ...) {
# check for valid input
.is_model_valid(x)

if (!insight::model_info(x)$is_linear) {
Expand Down
8 changes: 4 additions & 4 deletions R/check_outliers.R
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@
#' default threshold to classify outliers is 1.959 (`threshold = list("zscore" = 1.959)`),
#' corresponding to the 2.5% (`qnorm(0.975)`) most extreme observations
#' (assuming the data is normally distributed). Importantly, the Z-score
#' method is univariate: it is computed column by column. If a dataframe is
#' method is univariate: it is computed column by column. If a data frame is
#' passed, the Z-score is calculated for each variable separately, and the
#' maximum (absolute) Z-score is kept for each observations. Thus, all
#' observations that are extreme on at least one variable might be detected
Expand Down Expand Up @@ -304,14 +304,14 @@
#' outliers_list <- check_outliers(data$mpg) # Find outliers
#' outliers_list # Show the row index of the outliers
#' as.numeric(outliers_list) # The object is a binary vector...
#' filtered_data <- data[!outliers_list, ] # And can be used to filter a dataframe
#' filtered_data <- data[!outliers_list, ] # And can be used to filter a data frame
#' nrow(filtered_data) # New size, 28 (4 outliers removed)
#'
#' # Find all observations beyond +/- 2 SD
#' check_outliers(data$mpg, method = "zscore", threshold = 2)
#'
#' # For dataframes ------------------------------------------------------
#' check_outliers(data) # It works the same way on dataframes
#' check_outliers(data) # It works the same way on data frames
#'
#' # You can also use multiple methods at once
#' outliers_list <- check_outliers(data, method = c(
Expand Down Expand Up @@ -948,7 +948,7 @@ check_outliers.data.frame <- function(x,
outlier_count <- lapply(outlier_count, function(x) {
num.df <- x[!names(x) %in% c("Row", ID)]
if (isTRUE(nrow(num.df) >= 1)) {
num.df <- datawizard::change_code(
num.df <- datawizard::recode_values(
num.df,
recode = list(`2` = "(Multivariate)")
)
Expand Down
2 changes: 0 additions & 2 deletions R/check_overdispersion.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ check_overdispersion <- function(x, ...) {

#' @export
check_overdispersion.default <- function(x, ...) {
# check for valid input
.is_model_valid(x)
insight::format_error(
paste0("`check_overdisperion()` not yet implemented for models of class `", class(x)[1], "`.")
Expand Down Expand Up @@ -289,7 +288,6 @@ check_overdispersion.glmmTMB <- check_overdispersion.merMod
#' @rdname check_overdispersion
#' @export
check_overdispersion.performance_simres <- function(x, alternative = c("two.sided", "less", "greater"), ...) {
# match arguments
alternative <- match.arg(alternative)

# check for special arguments - we may pass "object_name" from other methods
Expand Down
1 change: 0 additions & 1 deletion R/check_predictions.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ check_predictions.default <- function(object,
type = "density",
verbose = TRUE,
...) {
# check for valid input
.is_model_valid(object)

# retrieve model information
Expand Down
7 changes: 5 additions & 2 deletions R/check_singularity.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,11 @@
#'
#' # Fixing singularity issues using priors in glmmTMB
#' # Example taken from `vignette("priors", package = "glmmTMB")`
#' dat <- readRDS(system.file("vignette_data", "gophertortoise.rds",
#' package = "glmmTMB"))
#' dat <- readRDS(system.file(
#' "vignette_data",
#' "gophertortoise.rds",
#' package = "glmmTMB"
#' ))
#' model <- glmmTMB::glmmTMB(
#' shells ~ prev + offset(log(Area)) + factor(year) + (1 | Site),
#' family = poisson,
Expand Down
1 change: 0 additions & 1 deletion R/check_zeroinflation.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ check_zeroinflation.performance_simres <- function(x,
tolerance = 0.1,
alternative = c("two.sided", "less", "greater"),
...) {
# match arguments
alternative <- match.arg(alternative)

# compute test results
Expand Down
9 changes: 0 additions & 9 deletions R/display.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,3 @@ display.compare_performance <- display.performance_model

#' @export
display.check_itemscale <- display.performance_model




# Reexports models ------------------------

#' @importFrom insight display
#' @export
insight::display
4 changes: 2 additions & 2 deletions R/item_discrimination.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#' This function calculates the item discriminations (corrected item-total
#' correlations for each item of `x` with the remaining items) for each item
#' of a scale. The absolute value of the item discrimination indices should be
#' above 0.2. An index between 0.2 and 0.4 is considered as "fair", while a
#' satisfactory index ranges from 0.4 to 0.7. Items with low discrimination
#' above `0.2`. An index between `0.2` and `0.4` is considered as "fair", while a
#' satisfactory index ranges from `0.4` to `0.7`. Items with low discrimination
#' indices are often ambiguously worded and should be examined. Items with
#' negative indices should be examined to determine why a negative value was
#' obtained (e.g. reversed answer categories regarding positive and negative
Expand Down
4 changes: 2 additions & 2 deletions R/model_performance.bayesian.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ model_performance.stanreg <- function(model, metrics = "all", verbose = TRUE, ..
metrics <- c("LOOIC", "WAIC", "R2", "RMSE")
}

# check for valid input

metrics <- toupper(.check_bad_metrics(metrics, all_metrics, verbose))

algorithm <- insight::find_algorithm(model)
Expand Down Expand Up @@ -232,7 +232,7 @@ model_performance.BFBayesFactor <- function(model,
metrics <- all_metrics
}

# check for valid input

metrics <- toupper(.check_bad_metrics(metrics, all_metrics, verbose))

# check for valid BFBayesFactor object
Expand Down
2 changes: 1 addition & 1 deletion R/model_performance.bife.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ model_performance.bife <- function(model, metrics = "all", verbose = TRUE, ...)
metrics <- c("AIC", "R2")
}

# check for valid input

metrics <- .check_bad_metrics(metrics, all_metrics, verbose)

info <- insight::model_info(model)
Expand Down
2 changes: 1 addition & 1 deletion R/model_performance.ivreg.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ model_performance.ivreg <- function(model, metrics = "all", verbose = TRUE, ...)
metrics <- c("AIC", "BIC", "R2", "R2_adj", "RMSE")
}

# check for valid input

metrics <- .check_bad_metrics(metrics, all_metrics, verbose)

# the lm-method does not accept ivreg-specific metrics
Expand Down
Loading

0 comments on commit dda4683

Please sign in to comment.