Skip to content

Commit

Permalink
include_reference = TRUE doesn't work in combination with `as.facto…
Browse files Browse the repository at this point in the history
…r()` (#957)

* `include_reference = TRUE` doesn't work in combination with `as.factor()`
Fixes #956

* desc, news

* add tests

* lintr

* update test

* comment non working test

* update snapshots

* update snapshot

* skip test

* like that?
  • Loading branch information
strengejacke authored Mar 15, 2024
1 parent 043b3ff commit 00b5ec8
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 16 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: parameters
Title: Processing of Model Parameters
Version: 0.21.5.7
Version: 0.21.5.8
Authors@R:
c(person(given = "Daniel",
family = "Lüdecke",
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
of class `clmm` (package *ordinal*), when model had no `component` column
(e.g., no scale or location parameters were returned).

* `include_reference` now also works when factor were created "on-the-fly" inside
the model formula (i.e. `y ~ as.factor(x)`).

# parameters 0.21.5

## Bug fixes
Expand Down
5 changes: 3 additions & 2 deletions R/display.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ display.parameters_model <- function(object,
footer_digits = footer_digits, align = align, ci_brackets = ci_brackets,
show_sigma = show_sigma, show_formula = show_formula, zap_small = zap_small,
font_size = font_size, line_padding = line_padding,
column_labels = column_labels, verbose = verbose, ...
column_labels = column_labels, include_reference = include_reference,
verbose = verbose, ...
)
} else {
print_md(
Expand All @@ -124,7 +125,7 @@ display.parameters_model <- function(object,
footer = footer, ci_digits = ci_digits, p_digits = p_digits,
footer_digits = footer_digits, ci_brackets = ci_brackets,
show_sigma = show_sigma, show_formula = show_formula, zap_small = zap_small,
verbose = verbose, ...
include_reference = include_reference, verbose = verbose, ...
)
}
}
Expand Down
6 changes: 4 additions & 2 deletions R/print_md.R
Original file line number Diff line number Diff line change
Expand Up @@ -443,8 +443,10 @@ print_md.parameters_distribution <- function(x, digits = 2, ci_brackets = c("(",
formatted_table[is.na(formatted_table)] <- ""
# create base table
out <- tinytable::tt(formatted_table, notes = footer, caption = caption)
# insert sub header rows and column spans
out <- tinytable::group_tt(out, i = row_groups, j = col_groups)
# insert sub header rows and column spans, if we have them
if (!(is.null(row_groups) && is.null(col_groups))) {
out <- tinytable::group_tt(out, i = row_groups, j = col_groups)
}
out@output <- outformat
out
}
31 changes: 27 additions & 4 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -140,16 +140,39 @@
}


.find_factor_levels <- function(data) {
out <- lapply(colnames(data), function(i) {
v <- data[[i]]
.find_factor_levels <- function(model_data, model = NULL, model_call = NULL) {
# check whether we have on-the-fly conversion of factors
if (!is.null(model)) {
model_terms <- insight::find_terms(model)
} else if (!is.null(model_call)) { # nolint
model_terms <- insight::find_terms(model_call)
} else {
model_terms <- NULL
}
# extract all model terms, we now have "as.factor(term)" etc., if any
if (!is.null(model_terms$conditional)) {
# extract variable names from "as.factor(term)" etc.
factor_terms <- grep("(as\\.factor|factor|as\\.character)", model_terms$conditional, value = TRUE)
cleaned <- gsub("(as\\.factor|factor|as\\.character)\\((.*)\\)", "\\2", factor_terms)
# convert on-the-fly factors into real factors
if (length(cleaned)) {
for (i in seq_along(cleaned)) {
model_data[[factor_terms[i]]] <- as.factor(model_data[[cleaned[i]]])
}
}
}
# extract levels from factors, so we know the reference level
out <- lapply(colnames(model_data), function(i) {
v <- model_data[[i]]
if (is.factor(v)) {
paste0(i, levels(v))
} else if (is.character(v)) {
paste0(i, unique(v))
} else {
NULL
}
})
names(out) <- names(data)
names(out) <- names(model_data)
insight::compact_list(out)
}

Expand Down
23 changes: 16 additions & 7 deletions R/utils_format.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
# here we either have "<br>" or " " as line breaks, followed by empty "()"
i <- gsub("<br>()", "", i, fixed = TRUE)
i <- gsub(" ()", "", i, fixed = TRUE)
i <- gsub("<br>(, )", "", i, fixed = TRUE)
i <- gsub(" (, )", "", i, fixed = TRUE)
i[i == "()"] <- ""
i[i == "(, )"] <- ""
Expand Down Expand Up @@ -374,7 +375,7 @@
}

# find factors and factor levels and check if we have any factors in the data
factors <- .find_factor_levels(model_data)
factors <- .find_factor_levels(model_data, model, model_call = attributes(params)$model_call)
if (!length(factors)) {
params
}
Expand Down Expand Up @@ -419,8 +420,16 @@
if (length(found)) {
# the reference level is *not* in the pretty names yet
reference_level <- f[!f %in% names(pretty_names)]

# for on-the-fly conversion of factors, the names of the factors can
# can also contain "factor()" or "as.factor()" - we need to remove these
if (any(grepl("(as\\.factor|factor|as\\.character)", fn))) {
fn_clean <- gsub("(as\\.factor|factor|as\\.character)\\((.*)\\)", "\\2", fn)
} else {
fn_clean <- fn
}
# create a pretty level for the reference category
pretty_level <- paste0(fn, " [", sub(fn, "", reference_level, fixed = TRUE), "]")
pretty_level <- paste0(fn_clean, " [", sub(fn, "", reference_level, fixed = TRUE), "]")
# insert new pretty level at the correct position in "pretty_names"
pretty_names <- .insert_element_at(
pretty_names,
Expand Down Expand Up @@ -520,7 +529,7 @@
is_zero_inflated,
is_ordinal_model,
is_multivariate = FALSE,
ran_pars,
ran_pars, # nolint
formatted_table = NULL) {
# prepare component names
.conditional_fixed_text <- if (is_zero_inflated) {
Expand Down Expand Up @@ -944,7 +953,7 @@

# fix column output
if (inherits(attributes(x)$model, c("lavaan", "blavaan")) && "Label" %in% colnames(x)) {
x$From <- ifelse(!nzchar(as.character(x$Label), keepNA = TRUE) | x$Label == x$To, x$From, paste0(x$From, " (", x$Label, ")"))
x$From <- ifelse(!nzchar(as.character(x$Label), keepNA = TRUE) | x$Label == x$To, x$From, paste0(x$From, " (", x$Label, ")")) # nolint
x$Label <- NULL
}

Expand Down Expand Up @@ -1062,7 +1071,7 @@
# rename columns for zero-inflation part
if (startsWith(type, "zero") && !is.null(zi_coef_name) && !is.null(coef_column)) {
colnames(tables[[type]])[which(colnames(tables[[type]]) == coef_column)] <- zi_coef_name
colnames(tables[[type]])[which(colnames(tables[[type]]) == paste0("Std_", coef_column))] <- paste0("Std_", zi_coef_name)
colnames(tables[[type]])[which(colnames(tables[[type]]) == paste0("Std_", coef_column))] <- paste0("Std_", zi_coef_name) # nolint
}

# rename columns for correlation, location or scale part
Expand Down Expand Up @@ -1142,8 +1151,8 @@
}
# replace brackets by parenthesis
if (!is.null(parameter_column) && parameter_column %in% colnames(formatted_table)) {
formatted_table[[parameter_column]] <- gsub("[", ci_brackets[1], formatted_table[[parameter_column]], fixed = TRUE)
formatted_table[[parameter_column]] <- gsub("]", ci_brackets[2], formatted_table[[parameter_column]], fixed = TRUE)
formatted_table[[parameter_column]] <- gsub("[", ci_brackets[1], formatted_table[[parameter_column]], fixed = TRUE) # nolint
formatted_table[[parameter_column]] <- gsub("]", ci_brackets[2], formatted_table[[parameter_column]], fixed = TRUE) # nolint
}
}

Expand Down
66 changes: 66 additions & 0 deletions tests/testthat/_snaps/include_reference.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# include_reference, on-the-fly factors

Code
print(out1)
Output
Parameter | Coefficient | SE | 95% CI | t(27) | p
------------------------------------------------------------------
(Intercept) | 27.48 | 1.97 | [23.43, 31.53] | 13.92 | < .001
gear [3] | 0.00 | | | |
gear [4] | 0.08 | 1.83 | [-3.68, 3.83] | 0.04 | 0.967
gear [5] | 2.39 | 2.38 | [-2.50, 7.29] | 1.00 | 0.324
am [0] | 0.00 | | | |
am [1] | 4.14 | 1.81 | [ 0.42, 7.85] | 2.29 | 0.030
hp | -0.06 | 0.01 | [-0.09, -0.04] | -6.24 | < .001
Message
Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
using a Wald t-distribution approximation.

---

Code
print(out2)
Output
Parameter | Coefficient | SE | 95% CI | t(27) | p
------------------------------------------------------------------
(Intercept) | 27.48 | 1.97 | [23.43, 31.53] | 13.92 | < .001
gear [3] | 0.00 | | | |
gear [4] | 0.08 | 1.83 | [-3.68, 3.83] | 0.04 | 0.967
gear [5] | 2.39 | 2.38 | [-2.50, 7.29] | 1.00 | 0.324
am [0] | 0.00 | | | |
am [1] | 4.14 | 1.81 | [ 0.42, 7.85] | 2.29 | 0.030
hp | -0.06 | 0.01 | [-0.09, -0.04] | -6.24 | < .001
Message
Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
using a Wald t-distribution approximation.

---

Code
print_md(out, engine = "tt")
Output
+--------------+----------------------+----------------------+
| Parameter | m1 | m2 |
+==============+======================+======================+
| (Intercept) | 27.48 (23.43, 31.53) | 27.48 (23.43, 31.53) |
+--------------+----------------------+----------------------+
| gear (3) | 0.00 | 0.00 |
+--------------+----------------------+----------------------+
| gear (4) | 0.08 (-3.68, 3.83) | 0.08 (-3.68, 3.83) |
+--------------+----------------------+----------------------+
| gear (5) | 2.39 (-2.50, 7.29) | 2.39 (-2.50, 7.29) |
+--------------+----------------------+----------------------+
| am (0) | 0.00 | 0.00 |
+--------------+----------------------+----------------------+
| am (1) | 4.14 (0.42, 7.85) | 4.14 (0.42, 7.85) |
+--------------+----------------------+----------------------+
| hp | -0.06 (-0.09, -0.04) | -0.06 (-0.09, -0.04) |
+--------------+----------------------+----------------------+
| | | |
+--------------+----------------------+----------------------+
| Observations | 32 | 32 |
+--------------+----------------------+----------------------+

44 changes: 44 additions & 0 deletions tests/testthat/test-include_reference.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
skip_if_not_installed("tinytable")

test_that("include_reference, on-the-fly factors", {
data(mtcars)
d <- as.data.frame(mtcars)
d$gear <- as.factor(d$gear)
d$am <- as.factor(d$am)

m1 <- lm(mpg ~ as.factor(gear) + factor(am) + hp, data = mtcars)
m2 <- lm(mpg ~ gear + am + hp, data = d)

out1 <- model_parameters(m1, include_reference = TRUE)
out2 <- model_parameters(m2, include_reference = TRUE)

expect_snapshot(print(out1))
expect_snapshot(print(out2))

expect_equal(attributes(out1)$pretty_names, attributes(out2)$pretty_names, ignore_attr = TRUE)
expect_equal(out1$Coefficient, out2$Coefficient, tolerance = 1e-4)

out <- compare_parameters(m1, m2, include_reference = TRUE)
expect_snapshot(print_md(out, engine = "tt"))
})

skip_if(getRversion() < "4.3.3")
skip_if_not_installed("datawizard")

test_that("include_reference, on-the-fly factors", {
data(mtcars)
d <- as.data.frame(mtcars)
d$gear <- as.factor(d$gear)
d$am <- as.factor(d$am)

m1 <- lm(mpg ~ as.factor(gear) + factor(am) + hp, data = mtcars)
m2 <- lm(mpg ~ gear + am + hp, data = d)

out1 <- model_parameters(m1, include_reference = TRUE)
out3 <- mtcars |>
datawizard::data_modify(gear = factor(gear), am = as.factor(am)) |>
lm(mpg ~ gear + am + hp, data = _) |>
model_parameters(include_reference = TRUE)

expect_equal(attributes(out1)$pretty_names, attributes(out3)$pretty_names, ignore_attr = TRUE)
})

0 comments on commit 00b5ec8

Please sign in to comment.