diff --git a/DESCRIPTION b/DESCRIPTION
index 1bfba562d..02e58aba7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Type: Package
Package: parameters
Title: Processing of Model Parameters
-Version: 0.21.5.7
+Version: 0.21.5.8
Authors@R:
c(person(given = "Daniel",
family = "Lüdecke",
diff --git a/NEWS.md b/NEWS.md
index 3e9bfe8f9..1d9f08f9a 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -25,6 +25,9 @@
of class `clmm` (package *ordinal*), when model had no `component` column
(e.g., no scale or location parameters were returned).
+* `include_reference` now also works when factor were created "on-the-fly" inside
+ the model formula (i.e. `y ~ as.factor(x)`).
+
# parameters 0.21.5
## Bug fixes
diff --git a/R/display.R b/R/display.R
index ba45e6a66..334854250 100644
--- a/R/display.R
+++ b/R/display.R
@@ -115,7 +115,8 @@ display.parameters_model <- function(object,
footer_digits = footer_digits, align = align, ci_brackets = ci_brackets,
show_sigma = show_sigma, show_formula = show_formula, zap_small = zap_small,
font_size = font_size, line_padding = line_padding,
- column_labels = column_labels, verbose = verbose, ...
+ column_labels = column_labels, include_reference = include_reference,
+ verbose = verbose, ...
)
} else {
print_md(
@@ -124,7 +125,7 @@ display.parameters_model <- function(object,
footer = footer, ci_digits = ci_digits, p_digits = p_digits,
footer_digits = footer_digits, ci_brackets = ci_brackets,
show_sigma = show_sigma, show_formula = show_formula, zap_small = zap_small,
- verbose = verbose, ...
+ include_reference = include_reference, verbose = verbose, ...
)
}
}
diff --git a/R/print_md.R b/R/print_md.R
index 98a7adbec..5a4990f54 100644
--- a/R/print_md.R
+++ b/R/print_md.R
@@ -443,8 +443,10 @@ print_md.parameters_distribution <- function(x, digits = 2, ci_brackets = c("(",
formatted_table[is.na(formatted_table)] <- ""
# create base table
out <- tinytable::tt(formatted_table, notes = footer, caption = caption)
- # insert sub header rows and column spans
- out <- tinytable::group_tt(out, i = row_groups, j = col_groups)
+ # insert sub header rows and column spans, if we have them
+ if (!(is.null(row_groups) && is.null(col_groups))) {
+ out <- tinytable::group_tt(out, i = row_groups, j = col_groups)
+ }
out@output <- outformat
out
}
diff --git a/R/utils.R b/R/utils.R
index 149856496..12a14fa3b 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -140,16 +140,39 @@
}
-.find_factor_levels <- function(data) {
- out <- lapply(colnames(data), function(i) {
- v <- data[[i]]
+.find_factor_levels <- function(model_data, model = NULL, model_call = NULL) {
+ # check whether we have on-the-fly conversion of factors
+ if (!is.null(model)) {
+ model_terms <- insight::find_terms(model)
+ } else if (!is.null(model_call)) { # nolint
+ model_terms <- insight::find_terms(model_call)
+ } else {
+ model_terms <- NULL
+ }
+ # extract all model terms, we now have "as.factor(term)" etc., if any
+ if (!is.null(model_terms$conditional)) {
+ # extract variable names from "as.factor(term)" etc.
+ factor_terms <- grep("(as\\.factor|factor|as\\.character)", model_terms$conditional, value = TRUE)
+ cleaned <- gsub("(as\\.factor|factor|as\\.character)\\((.*)\\)", "\\2", factor_terms)
+ # convert on-the-fly factors into real factors
+ if (length(cleaned)) {
+ for (i in seq_along(cleaned)) {
+ model_data[[factor_terms[i]]] <- as.factor(model_data[[cleaned[i]]])
+ }
+ }
+ }
+ # extract levels from factors, so we know the reference level
+ out <- lapply(colnames(model_data), function(i) {
+ v <- model_data[[i]]
if (is.factor(v)) {
paste0(i, levels(v))
+ } else if (is.character(v)) {
+ paste0(i, unique(v))
} else {
NULL
}
})
- names(out) <- names(data)
+ names(out) <- names(model_data)
insight::compact_list(out)
}
diff --git a/R/utils_format.R b/R/utils_format.R
index aba6a8381..00dc350d5 100644
--- a/R/utils_format.R
+++ b/R/utils_format.R
@@ -53,6 +53,7 @@
# here we either have "
" or " " as line breaks, followed by empty "()"
i <- gsub("
()", "", i, fixed = TRUE)
i <- gsub(" ()", "", i, fixed = TRUE)
+ i <- gsub("
(, )", "", i, fixed = TRUE)
i <- gsub(" (, )", "", i, fixed = TRUE)
i[i == "()"] <- ""
i[i == "(, )"] <- ""
@@ -374,7 +375,7 @@
}
# find factors and factor levels and check if we have any factors in the data
- factors <- .find_factor_levels(model_data)
+ factors <- .find_factor_levels(model_data, model, model_call = attributes(params)$model_call)
if (!length(factors)) {
params
}
@@ -419,8 +420,16 @@
if (length(found)) {
# the reference level is *not* in the pretty names yet
reference_level <- f[!f %in% names(pretty_names)]
+
+ # for on-the-fly conversion of factors, the names of the factors can
+ # can also contain "factor()" or "as.factor()" - we need to remove these
+ if (any(grepl("(as\\.factor|factor|as\\.character)", fn))) {
+ fn_clean <- gsub("(as\\.factor|factor|as\\.character)\\((.*)\\)", "\\2", fn)
+ } else {
+ fn_clean <- fn
+ }
# create a pretty level for the reference category
- pretty_level <- paste0(fn, " [", sub(fn, "", reference_level, fixed = TRUE), "]")
+ pretty_level <- paste0(fn_clean, " [", sub(fn, "", reference_level, fixed = TRUE), "]")
# insert new pretty level at the correct position in "pretty_names"
pretty_names <- .insert_element_at(
pretty_names,
@@ -520,7 +529,7 @@
is_zero_inflated,
is_ordinal_model,
is_multivariate = FALSE,
- ran_pars,
+ ran_pars, # nolint
formatted_table = NULL) {
# prepare component names
.conditional_fixed_text <- if (is_zero_inflated) {
@@ -944,7 +953,7 @@
# fix column output
if (inherits(attributes(x)$model, c("lavaan", "blavaan")) && "Label" %in% colnames(x)) {
- x$From <- ifelse(!nzchar(as.character(x$Label), keepNA = TRUE) | x$Label == x$To, x$From, paste0(x$From, " (", x$Label, ")"))
+ x$From <- ifelse(!nzchar(as.character(x$Label), keepNA = TRUE) | x$Label == x$To, x$From, paste0(x$From, " (", x$Label, ")")) # nolint
x$Label <- NULL
}
@@ -1062,7 +1071,7 @@
# rename columns for zero-inflation part
if (startsWith(type, "zero") && !is.null(zi_coef_name) && !is.null(coef_column)) {
colnames(tables[[type]])[which(colnames(tables[[type]]) == coef_column)] <- zi_coef_name
- colnames(tables[[type]])[which(colnames(tables[[type]]) == paste0("Std_", coef_column))] <- paste0("Std_", zi_coef_name)
+ colnames(tables[[type]])[which(colnames(tables[[type]]) == paste0("Std_", coef_column))] <- paste0("Std_", zi_coef_name) # nolint
}
# rename columns for correlation, location or scale part
@@ -1142,8 +1151,8 @@
}
# replace brackets by parenthesis
if (!is.null(parameter_column) && parameter_column %in% colnames(formatted_table)) {
- formatted_table[[parameter_column]] <- gsub("[", ci_brackets[1], formatted_table[[parameter_column]], fixed = TRUE)
- formatted_table[[parameter_column]] <- gsub("]", ci_brackets[2], formatted_table[[parameter_column]], fixed = TRUE)
+ formatted_table[[parameter_column]] <- gsub("[", ci_brackets[1], formatted_table[[parameter_column]], fixed = TRUE) # nolint
+ formatted_table[[parameter_column]] <- gsub("]", ci_brackets[2], formatted_table[[parameter_column]], fixed = TRUE) # nolint
}
}
diff --git a/tests/testthat/_snaps/include_reference.md b/tests/testthat/_snaps/include_reference.md
new file mode 100644
index 000000000..535f59af7
--- /dev/null
+++ b/tests/testthat/_snaps/include_reference.md
@@ -0,0 +1,66 @@
+# include_reference, on-the-fly factors
+
+ Code
+ print(out1)
+ Output
+ Parameter | Coefficient | SE | 95% CI | t(27) | p
+ ------------------------------------------------------------------
+ (Intercept) | 27.48 | 1.97 | [23.43, 31.53] | 13.92 | < .001
+ gear [3] | 0.00 | | | |
+ gear [4] | 0.08 | 1.83 | [-3.68, 3.83] | 0.04 | 0.967
+ gear [5] | 2.39 | 2.38 | [-2.50, 7.29] | 1.00 | 0.324
+ am [0] | 0.00 | | | |
+ am [1] | 4.14 | 1.81 | [ 0.42, 7.85] | 2.29 | 0.030
+ hp | -0.06 | 0.01 | [-0.09, -0.04] | -6.24 | < .001
+ Message
+
+ Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
+ using a Wald t-distribution approximation.
+
+---
+
+ Code
+ print(out2)
+ Output
+ Parameter | Coefficient | SE | 95% CI | t(27) | p
+ ------------------------------------------------------------------
+ (Intercept) | 27.48 | 1.97 | [23.43, 31.53] | 13.92 | < .001
+ gear [3] | 0.00 | | | |
+ gear [4] | 0.08 | 1.83 | [-3.68, 3.83] | 0.04 | 0.967
+ gear [5] | 2.39 | 2.38 | [-2.50, 7.29] | 1.00 | 0.324
+ am [0] | 0.00 | | | |
+ am [1] | 4.14 | 1.81 | [ 0.42, 7.85] | 2.29 | 0.030
+ hp | -0.06 | 0.01 | [-0.09, -0.04] | -6.24 | < .001
+ Message
+
+ Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
+ using a Wald t-distribution approximation.
+
+---
+
+ Code
+ print_md(out, engine = "tt")
+ Output
+
+ +--------------+----------------------+----------------------+
+ | Parameter | m1 | m2 |
+ +==============+======================+======================+
+ | (Intercept) | 27.48 (23.43, 31.53) | 27.48 (23.43, 31.53) |
+ +--------------+----------------------+----------------------+
+ | gear (3) | 0.00 | 0.00 |
+ +--------------+----------------------+----------------------+
+ | gear (4) | 0.08 (-3.68, 3.83) | 0.08 (-3.68, 3.83) |
+ +--------------+----------------------+----------------------+
+ | gear (5) | 2.39 (-2.50, 7.29) | 2.39 (-2.50, 7.29) |
+ +--------------+----------------------+----------------------+
+ | am (0) | 0.00 | 0.00 |
+ +--------------+----------------------+----------------------+
+ | am (1) | 4.14 (0.42, 7.85) | 4.14 (0.42, 7.85) |
+ +--------------+----------------------+----------------------+
+ | hp | -0.06 (-0.09, -0.04) | -0.06 (-0.09, -0.04) |
+ +--------------+----------------------+----------------------+
+ | | | |
+ +--------------+----------------------+----------------------+
+ | Observations | 32 | 32 |
+ +--------------+----------------------+----------------------+
+
diff --git a/tests/testthat/test-include_reference.R b/tests/testthat/test-include_reference.R
new file mode 100644
index 000000000..8e63890e8
--- /dev/null
+++ b/tests/testthat/test-include_reference.R
@@ -0,0 +1,44 @@
+skip_if_not_installed("tinytable")
+
+test_that("include_reference, on-the-fly factors", {
+ data(mtcars)
+ d <- as.data.frame(mtcars)
+ d$gear <- as.factor(d$gear)
+ d$am <- as.factor(d$am)
+
+ m1 <- lm(mpg ~ as.factor(gear) + factor(am) + hp, data = mtcars)
+ m2 <- lm(mpg ~ gear + am + hp, data = d)
+
+ out1 <- model_parameters(m1, include_reference = TRUE)
+ out2 <- model_parameters(m2, include_reference = TRUE)
+
+ expect_snapshot(print(out1))
+ expect_snapshot(print(out2))
+
+ expect_equal(attributes(out1)$pretty_names, attributes(out2)$pretty_names, ignore_attr = TRUE)
+ expect_equal(out1$Coefficient, out2$Coefficient, tolerance = 1e-4)
+
+ out <- compare_parameters(m1, m2, include_reference = TRUE)
+ expect_snapshot(print_md(out, engine = "tt"))
+})
+
+skip_if(getRversion() < "4.3.3")
+skip_if_not_installed("datawizard")
+
+test_that("include_reference, on-the-fly factors", {
+ data(mtcars)
+ d <- as.data.frame(mtcars)
+ d$gear <- as.factor(d$gear)
+ d$am <- as.factor(d$am)
+
+ m1 <- lm(mpg ~ as.factor(gear) + factor(am) + hp, data = mtcars)
+ m2 <- lm(mpg ~ gear + am + hp, data = d)
+
+ out1 <- model_parameters(m1, include_reference = TRUE)
+ out3 <- mtcars |>
+ datawizard::data_modify(gear = factor(gear), am = as.factor(am)) |>
+ lm(mpg ~ gear + am + hp, data = _) |>
+ model_parameters(include_reference = TRUE)
+
+ expect_equal(attributes(out1)$pretty_names, attributes(out3)$pretty_names, ignore_attr = TRUE)
+})