Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename group/group_by arguments into by #721

Merged
merged 3 commits into from
May 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: performance
Title: Assessment of Regression Models Performance
Version: 0.11.0.8
Version: 0.11.0.9
Authors@R:
c(person(given = "Daniel",
family = "Lüdecke",
Expand Down Expand Up @@ -154,3 +154,4 @@ Config/Needs/website:
r-lib/pkgdown,
easystats/easystatstemplate
Config/rcmdcheck/ignore-inconsequential-notes: true
Remotes: easystats/insight
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
* Aliases `posterior_predictive_check()` and `check_posterior_predictions()` for
`check_predictions()` are deprecated.

* Arguments named `group` or `group_by` will be deprecated in a future release.
Please use `by` instead. This affects `check_heterogeneity_bias()` in
*performance*.

## General

* Improved documentation and new vignettes added.
Expand Down
28 changes: 18 additions & 10 deletions R/check_heterogeneity_bias.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
#' @param select Character vector (or formula) with names of variables to select
#' that should be checked. If `x` is a mixed model object, this argument
#' will be ignored.
#' @param group Character vector (or formula) with the name of the variable that
#' @param by Character vector (or formula) with the name of the variable that
#' indicates the group- or cluster-ID. If `x` is a model object, this
#' argument will be ignored.
#' @param group Deprecated. Use `by` instead.
#'
#' @seealso
#' For further details, read the vignette
Expand All @@ -25,31 +26,38 @@
#' @examples
#' data(iris)
#' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID
#' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID")
#' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID")
#' @export
check_heterogeneity_bias <- function(x, select = NULL, group = NULL) {
check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL) {
## TODO: deprecate later
if (!is.null(group)) {
insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint
by <- group
}
if (insight::is_model(x)) {
group <- insight::find_random(x, split_nested = TRUE, flatten = TRUE)
if (is.null(group)) {
insight::format_error("Model is no mixed model. Please provide a mixed model, or a data frame and arguments `select` and `group`.") # nolint
by <- insight::find_random(x, split_nested = TRUE, flatten = TRUE)
if (is.null(by)) {
insight::format_error("Model is no mixed model. Please provide a mixed model, or a data frame and arguments `select` and `by`.") # nolint
}
my_data <- insight::get_data(x, source = "mf", verbose = FALSE)
select <- insight::find_predictors(x, effects = "fixed", component = "conditional", flatten = TRUE)
} else {
if (inherits(select, "formula")) {
select <- all.vars(select)
}
if (inherits(group, "formula")) {
group <- all.vars(group)
if (inherits(by, "formula")) {
by <- all.vars(by)
}
my_data <- x
}

unique_groups <- .n_unique(my_data[[group]])
combinations <- expand.grid(select, group)
unique_groups <- .n_unique(my_data[[by]])
combinations <- expand.grid(select, by)

result <- Map(function(predictor, id) {
# demean predictor

## FIXME: update argument name later!
d <- datawizard::demean(my_data, select = predictor, group = id, verbose = FALSE)

# get new names
Expand Down
2 changes: 1 addition & 1 deletion R/performance_cv.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
test_pred <- insight::get_predicted(model, ci = NULL, data = data)
test_resd <- test_resp - test_pred
} else if (method == "holdout") {
train_i <- sample(seq_len(nrow(model_data)), size = round((1 - prop) * nrow(model_data)), replace = FALSE)
train_i <- sample.int(nrow(model_data), size = round((1 - prop) * nrow(model_data)), replace = FALSE)
model_upd <- stats::update(model, data = model_data[train_i, ])
test_resp <- model_data[-train_i, resp.name]
test_pred <- insight::get_predicted(model_upd, ci = NULL, data = model_data[-train_i, ])
Expand Down Expand Up @@ -105,7 +105,7 @@
models_upd <- lapply(cv_folds, function(.x) {
stats::update(model, data = model_data[.x$train, ])
})
test_pred <- mapply(function(.x, .y) {

Check warning on line 108 in R/performance_cv.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/performance_cv.R,line=108,col=20,[undesirable_function_linter] Avoid undesirable function "mapply".
insight::get_predicted(.y, ci = NULL, data = model_data[.x$test, ])
}, cv_folds, models_upd, SIMPLIFY = FALSE)
test_resp <- lapply(cv_folds, function(.x) {
Expand All @@ -124,7 +124,7 @@
R2 <- 1 - MSE / mean((test_resp - mean(test_resp, na.rm = TRUE))^2, na.rm = TRUE)
out <- data.frame(MSE = MSE, RMSE = RMSE, R2 = R2)
} else {
test_resd <- mapply(function(.x, .y) {

Check warning on line 127 in R/performance_cv.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/performance_cv.R,line=127,col=18,[undesirable_function_linter] Avoid undesirable function "mapply".
.x - .y
}, test_resp, test_pred, SIMPLIFY = FALSE)
MSEs <- sapply(test_resd, function(x) mean(x^2, na.rm = TRUE))
Expand All @@ -148,7 +148,7 @@
paste0(
"Metric",
ifelse(length(missing_metrics) > 1, "s '", " '"),
paste0(missing_metrics, collapse = "', '"),

Check warning on line 151 in R/performance_cv.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/performance_cv.R,line=151,col=9,[paste_linter] Use paste(), not paste0(), to collapse a character vector when sep= is not used.
"' not yet supported."
)
), colour = "red"))
Expand All @@ -158,7 +158,7 @@
}

# TODO: implement performance::log_lik() function for deviance/elpd metrics
# - When given a model, it should pass it to insight::get_loglikelihood, stats4::logLik, stats::logLik, or rstantools::log_lik

Check warning on line 161 in R/performance_cv.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/performance_cv.R,line=161,col=121,[line_length_linter] Lines should not be more than 120 characters. This line is 128 characters.
# - When given a model and new data, it should pass to rstantools::log_lik if stan
# or compute a df like this:
# df <- list(residuals = cv_residuals); class(df) <- class(model)
Expand Down
8 changes: 5 additions & 3 deletions man/check_heterogeneity_bias.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions tests/testthat/test-check_heterogeneity_bias.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,29 @@ test_that("check_heterogeneity_bias", {
data(iris)
set.seed(123)
iris$ID <- sample.int(4, nrow(iris), replace = TRUE) # fake-ID
out <- check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID")
out <- check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID")
expect_equal(out, c("Sepal.Length", "Petal.Length"), ignore_attr = TRUE)
expect_output(print(out), "Possible heterogeneity bias due to following predictors: Sepal\\.Length, Petal\\.Length")

out <- check_heterogeneity_bias(iris, select = ~ Sepal.Length + Petal.Length, group = ~ID)
out <- check_heterogeneity_bias(iris, select = ~ Sepal.Length + Petal.Length, by = ~ID)
expect_equal(out, c("Sepal.Length", "Petal.Length"), ignore_attr = TRUE)
expect_output(print(out), "Possible heterogeneity bias due to following predictors: Sepal\\.Length, Petal\\.Length")

m <- lm(Sepal.Length ~ Petal.Length + Petal.Width + Species + ID, data = iris)
expect_error(
check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), group = "ID"),
check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), by = "ID"),
regex = "no mixed model"
)

skip_if_not_installed("lme4")
m <- lme4::lmer(Sepal.Length ~ Petal.Length + Petal.Width + Species + (1 | ID), data = iris)
out <- check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), group = "ID")
out <- check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), by = "ID")
expect_equal(out, c("Petal.Length", "Petal.Width", "Species"), ignore_attr = TRUE)
expect_output(
print(out),
"Possible heterogeneity bias due to following predictors: Petal\\.Length, Petal\\.Width, Species"
)
out <- check_heterogeneity_bias(m, select = ~ Sepal.Length + Petal.Length, group = ~ID)
out <- check_heterogeneity_bias(m, select = ~ Sepal.Length + Petal.Length, by = ~ID)
expect_equal(out, c("Petal.Length", "Petal.Width", "Species"), ignore_attr = TRUE)
expect_output(
print(out),
Expand Down
Loading