diff --git a/DESCRIPTION b/DESCRIPTION index 1bfba562d..02e58aba7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: parameters Title: Processing of Model Parameters -Version: 0.21.5.7 +Version: 0.21.5.8 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 3e9bfe8f9..1d9f08f9a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -25,6 +25,9 @@ of class `clmm` (package *ordinal*), when model had no `component` column (e.g., no scale or location parameters were returned). +* `include_reference` now also works when factor were created "on-the-fly" inside + the model formula (i.e. `y ~ as.factor(x)`). + # parameters 0.21.5 ## Bug fixes diff --git a/R/display.R b/R/display.R index ba45e6a66..334854250 100644 --- a/R/display.R +++ b/R/display.R @@ -115,7 +115,8 @@ display.parameters_model <- function(object, footer_digits = footer_digits, align = align, ci_brackets = ci_brackets, show_sigma = show_sigma, show_formula = show_formula, zap_small = zap_small, font_size = font_size, line_padding = line_padding, - column_labels = column_labels, verbose = verbose, ... + column_labels = column_labels, include_reference = include_reference, + verbose = verbose, ... ) } else { print_md( @@ -124,7 +125,7 @@ display.parameters_model <- function(object, footer = footer, ci_digits = ci_digits, p_digits = p_digits, footer_digits = footer_digits, ci_brackets = ci_brackets, show_sigma = show_sigma, show_formula = show_formula, zap_small = zap_small, - verbose = verbose, ... + include_reference = include_reference, verbose = verbose, ... ) } } diff --git a/R/print_md.R b/R/print_md.R index 98a7adbec..5a4990f54 100644 --- a/R/print_md.R +++ b/R/print_md.R @@ -443,8 +443,10 @@ print_md.parameters_distribution <- function(x, digits = 2, ci_brackets = c("(", formatted_table[is.na(formatted_table)] <- "" # create base table out <- tinytable::tt(formatted_table, notes = footer, caption = caption) - # insert sub header rows and column spans - out <- tinytable::group_tt(out, i = row_groups, j = col_groups) + # insert sub header rows and column spans, if we have them + if (!(is.null(row_groups) && is.null(col_groups))) { + out <- tinytable::group_tt(out, i = row_groups, j = col_groups) + } out@output <- outformat out } diff --git a/R/utils.R b/R/utils.R index 149856496..12a14fa3b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -140,16 +140,39 @@ } -.find_factor_levels <- function(data) { - out <- lapply(colnames(data), function(i) { - v <- data[[i]] +.find_factor_levels <- function(model_data, model = NULL, model_call = NULL) { + # check whether we have on-the-fly conversion of factors + if (!is.null(model)) { + model_terms <- insight::find_terms(model) + } else if (!is.null(model_call)) { # nolint + model_terms <- insight::find_terms(model_call) + } else { + model_terms <- NULL + } + # extract all model terms, we now have "as.factor(term)" etc., if any + if (!is.null(model_terms$conditional)) { + # extract variable names from "as.factor(term)" etc. + factor_terms <- grep("(as\\.factor|factor|as\\.character)", model_terms$conditional, value = TRUE) + cleaned <- gsub("(as\\.factor|factor|as\\.character)\\((.*)\\)", "\\2", factor_terms) + # convert on-the-fly factors into real factors + if (length(cleaned)) { + for (i in seq_along(cleaned)) { + model_data[[factor_terms[i]]] <- as.factor(model_data[[cleaned[i]]]) + } + } + } + # extract levels from factors, so we know the reference level + out <- lapply(colnames(model_data), function(i) { + v <- model_data[[i]] if (is.factor(v)) { paste0(i, levels(v)) + } else if (is.character(v)) { + paste0(i, unique(v)) } else { NULL } }) - names(out) <- names(data) + names(out) <- names(model_data) insight::compact_list(out) } diff --git a/R/utils_format.R b/R/utils_format.R index aba6a8381..00dc350d5 100644 --- a/R/utils_format.R +++ b/R/utils_format.R @@ -53,6 +53,7 @@ # here we either have "
" or " " as line breaks, followed by empty "()" i <- gsub("
()", "", i, fixed = TRUE) i <- gsub(" ()", "", i, fixed = TRUE) + i <- gsub("
(, )", "", i, fixed = TRUE) i <- gsub(" (, )", "", i, fixed = TRUE) i[i == "()"] <- "" i[i == "(, )"] <- "" @@ -374,7 +375,7 @@ } # find factors and factor levels and check if we have any factors in the data - factors <- .find_factor_levels(model_data) + factors <- .find_factor_levels(model_data, model, model_call = attributes(params)$model_call) if (!length(factors)) { params } @@ -419,8 +420,16 @@ if (length(found)) { # the reference level is *not* in the pretty names yet reference_level <- f[!f %in% names(pretty_names)] + + # for on-the-fly conversion of factors, the names of the factors can + # can also contain "factor()" or "as.factor()" - we need to remove these + if (any(grepl("(as\\.factor|factor|as\\.character)", fn))) { + fn_clean <- gsub("(as\\.factor|factor|as\\.character)\\((.*)\\)", "\\2", fn) + } else { + fn_clean <- fn + } # create a pretty level for the reference category - pretty_level <- paste0(fn, " [", sub(fn, "", reference_level, fixed = TRUE), "]") + pretty_level <- paste0(fn_clean, " [", sub(fn, "", reference_level, fixed = TRUE), "]") # insert new pretty level at the correct position in "pretty_names" pretty_names <- .insert_element_at( pretty_names, @@ -520,7 +529,7 @@ is_zero_inflated, is_ordinal_model, is_multivariate = FALSE, - ran_pars, + ran_pars, # nolint formatted_table = NULL) { # prepare component names .conditional_fixed_text <- if (is_zero_inflated) { @@ -944,7 +953,7 @@ # fix column output if (inherits(attributes(x)$model, c("lavaan", "blavaan")) && "Label" %in% colnames(x)) { - x$From <- ifelse(!nzchar(as.character(x$Label), keepNA = TRUE) | x$Label == x$To, x$From, paste0(x$From, " (", x$Label, ")")) + x$From <- ifelse(!nzchar(as.character(x$Label), keepNA = TRUE) | x$Label == x$To, x$From, paste0(x$From, " (", x$Label, ")")) # nolint x$Label <- NULL } @@ -1062,7 +1071,7 @@ # rename columns for zero-inflation part if (startsWith(type, "zero") && !is.null(zi_coef_name) && !is.null(coef_column)) { colnames(tables[[type]])[which(colnames(tables[[type]]) == coef_column)] <- zi_coef_name - colnames(tables[[type]])[which(colnames(tables[[type]]) == paste0("Std_", coef_column))] <- paste0("Std_", zi_coef_name) + colnames(tables[[type]])[which(colnames(tables[[type]]) == paste0("Std_", coef_column))] <- paste0("Std_", zi_coef_name) # nolint } # rename columns for correlation, location or scale part @@ -1142,8 +1151,8 @@ } # replace brackets by parenthesis if (!is.null(parameter_column) && parameter_column %in% colnames(formatted_table)) { - formatted_table[[parameter_column]] <- gsub("[", ci_brackets[1], formatted_table[[parameter_column]], fixed = TRUE) - formatted_table[[parameter_column]] <- gsub("]", ci_brackets[2], formatted_table[[parameter_column]], fixed = TRUE) + formatted_table[[parameter_column]] <- gsub("[", ci_brackets[1], formatted_table[[parameter_column]], fixed = TRUE) # nolint + formatted_table[[parameter_column]] <- gsub("]", ci_brackets[2], formatted_table[[parameter_column]], fixed = TRUE) # nolint } } diff --git a/tests/testthat/_snaps/include_reference.md b/tests/testthat/_snaps/include_reference.md new file mode 100644 index 000000000..535f59af7 --- /dev/null +++ b/tests/testthat/_snaps/include_reference.md @@ -0,0 +1,66 @@ +# include_reference, on-the-fly factors + + Code + print(out1) + Output + Parameter | Coefficient | SE | 95% CI | t(27) | p + ------------------------------------------------------------------ + (Intercept) | 27.48 | 1.97 | [23.43, 31.53] | 13.92 | < .001 + gear [3] | 0.00 | | | | + gear [4] | 0.08 | 1.83 | [-3.68, 3.83] | 0.04 | 0.967 + gear [5] | 2.39 | 2.38 | [-2.50, 7.29] | 1.00 | 0.324 + am [0] | 0.00 | | | | + am [1] | 4.14 | 1.81 | [ 0.42, 7.85] | 2.29 | 0.030 + hp | -0.06 | 0.01 | [-0.09, -0.04] | -6.24 | < .001 + Message + + Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed + using a Wald t-distribution approximation. + +--- + + Code + print(out2) + Output + Parameter | Coefficient | SE | 95% CI | t(27) | p + ------------------------------------------------------------------ + (Intercept) | 27.48 | 1.97 | [23.43, 31.53] | 13.92 | < .001 + gear [3] | 0.00 | | | | + gear [4] | 0.08 | 1.83 | [-3.68, 3.83] | 0.04 | 0.967 + gear [5] | 2.39 | 2.38 | [-2.50, 7.29] | 1.00 | 0.324 + am [0] | 0.00 | | | | + am [1] | 4.14 | 1.81 | [ 0.42, 7.85] | 2.29 | 0.030 + hp | -0.06 | 0.01 | [-0.09, -0.04] | -6.24 | < .001 + Message + + Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed + using a Wald t-distribution approximation. + +--- + + Code + print_md(out, engine = "tt") + Output + + +--------------+----------------------+----------------------+ + | Parameter | m1 | m2 | + +==============+======================+======================+ + | (Intercept) | 27.48 (23.43, 31.53) | 27.48 (23.43, 31.53) | + +--------------+----------------------+----------------------+ + | gear (3) | 0.00 | 0.00 | + +--------------+----------------------+----------------------+ + | gear (4) | 0.08 (-3.68, 3.83) | 0.08 (-3.68, 3.83) | + +--------------+----------------------+----------------------+ + | gear (5) | 2.39 (-2.50, 7.29) | 2.39 (-2.50, 7.29) | + +--------------+----------------------+----------------------+ + | am (0) | 0.00 | 0.00 | + +--------------+----------------------+----------------------+ + | am (1) | 4.14 (0.42, 7.85) | 4.14 (0.42, 7.85) | + +--------------+----------------------+----------------------+ + | hp | -0.06 (-0.09, -0.04) | -0.06 (-0.09, -0.04) | + +--------------+----------------------+----------------------+ + | | | | + +--------------+----------------------+----------------------+ + | Observations | 32 | 32 | + +--------------+----------------------+----------------------+ + diff --git a/tests/testthat/test-include_reference.R b/tests/testthat/test-include_reference.R new file mode 100644 index 000000000..8e63890e8 --- /dev/null +++ b/tests/testthat/test-include_reference.R @@ -0,0 +1,44 @@ +skip_if_not_installed("tinytable") + +test_that("include_reference, on-the-fly factors", { + data(mtcars) + d <- as.data.frame(mtcars) + d$gear <- as.factor(d$gear) + d$am <- as.factor(d$am) + + m1 <- lm(mpg ~ as.factor(gear) + factor(am) + hp, data = mtcars) + m2 <- lm(mpg ~ gear + am + hp, data = d) + + out1 <- model_parameters(m1, include_reference = TRUE) + out2 <- model_parameters(m2, include_reference = TRUE) + + expect_snapshot(print(out1)) + expect_snapshot(print(out2)) + + expect_equal(attributes(out1)$pretty_names, attributes(out2)$pretty_names, ignore_attr = TRUE) + expect_equal(out1$Coefficient, out2$Coefficient, tolerance = 1e-4) + + out <- compare_parameters(m1, m2, include_reference = TRUE) + expect_snapshot(print_md(out, engine = "tt")) +}) + +skip_if(getRversion() < "4.3.3") +skip_if_not_installed("datawizard") + +test_that("include_reference, on-the-fly factors", { + data(mtcars) + d <- as.data.frame(mtcars) + d$gear <- as.factor(d$gear) + d$am <- as.factor(d$am) + + m1 <- lm(mpg ~ as.factor(gear) + factor(am) + hp, data = mtcars) + m2 <- lm(mpg ~ gear + am + hp, data = d) + + out1 <- model_parameters(m1, include_reference = TRUE) + out3 <- mtcars |> + datawizard::data_modify(gear = factor(gear), am = as.factor(am)) |> + lm(mpg ~ gear + am + hp, data = _) |> + model_parameters(include_reference = TRUE) + + expect_equal(attributes(out1)$pretty_names, attributes(out3)$pretty_names, ignore_attr = TRUE) +})