Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix anova type 3 #460

Merged
merged 19 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
### Bug Fixes

- Previously `emmeans` will return `NA` for spatial covariance structure. This is fixed now.
- Previously `car::Anova` will give incorrect results if an interaction term is included and the order of the covariate of interest is not the first categorical variable. This is fixed now.
- Previously `car::Anova` will fail if the model does not contain intercept. This is fixed now.
- Previously, `mmrm` will ignore contrasts defined for covariates in the input data set. This is fixed now.

# mmrm 0.3.12
Expand Down
118 changes: 106 additions & 12 deletions R/interop-car.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,35 +40,51 @@ h_get_contrast <- function(object, effect, type = c("II", "III", "2", "3"), tol
assert_subset(effect, colnames(fcts))
idx <- which(effect == colnames(fcts))
cols <- which(asg == idx)
data <- model.frame(object)
var_numeric <- vapply(data, is.numeric, FUN.VALUE = TRUE)
coef_rows <- length(cols)
xlev <- component(object, "xlev")
contains_intercept <- (!0 %in% asg) && h_first_contain_categorical(effect, fcts, names(xlev))
coef_rows <- length(cols) - as.integer(contains_intercept)
l_mx <- matrix(0, nrow = coef_rows, ncol = length(asg))
if (coef_rows == 0L) {
return(l_mx)
}
l_mx[, cols] <- diag(rep(1, coef_rows))
if (contains_intercept) {
l_mx[, cols] <- cbind(-1, diag(rep(1, coef_rows)))
} else {
l_mx[, cols] <- diag(rep(1, coef_rows))
}
for (i in setdiff(seq_len(ncol(fcts)), idx)) {
x1 <- mx[, cols, drop = FALSE]
additional_vars <- names(which(fcts[, i] > fcts[, idx]))
additional_numeric <- any(var_numeric[additional_vars])
additional_numeric <- any(!additional_vars %in% names(xlev))
current_col <- which(asg == i)
if (ods[i] >= ods[idx] && all(fcts[, i] >= fcts[, idx]) && !additional_numeric) {
sub_mat <- switch(type,
"2" = ,
"II" = {
x1 <- mx[, cols, drop = FALSE]
x0 <- mx[, -c(cols, current_col), drop = FALSE]
x2 <- mx[, current_col, drop = FALSE]
m <- diag(rep(1, nrow(x0))) - x0 %*% solve(t(x0) %*% x0) %*% t(x0)
solve(t(x1) %*% m %*% x1) %*% t(x1) %*% m %*% x2
ret <- solve(t(x1) %*% m %*% x1) %*% t(x1) %*% m %*% x2
if (contains_intercept) {
ret[-1, ] - ret[1, ]
} else {
ret
}
},
"3" = ,
"III" = {
additional_levels <- vapply(data[additional_vars], function(x) {
if (is.factor(x)) nlevels(x) else length(unique(x))
}, FUN.VALUE = 1L)
t_levels <- prod(additional_levels)
l_mx[, cols] / t_levels
lvls <- h_obtain_lvls(effect, additional_vars, xlev)
t_levels <- lvls$total
nms_base <- colnames(mx)[cols]
nms <- colnames(mx)[current_col]
nms_base_split <- strsplit(nms_base, ":")
nms_split <- strsplit(nms, ":")
base_idx <- h_get_index(nms_split, nms_base_split)
mt <- l_mx[, cols, drop = FALSE] / t_levels
ret <- mt[, base_idx, drop = FALSE]
# if there is extra levels, replace it with -1/t_levels
ret[is.na(ret)] <- -1 / t_levels
ret
}
)
l_mx[, current_col] <- sub_mat
Expand Down Expand Up @@ -114,3 +130,81 @@ Anova.mmrm <- function(mod, type = c("II", "III", "2", "3"), tol = sqrt(.Machine
)
ret_df
}


#' Obtain Levels Prior and Posterior
#' @param var (`string`) name of the effect.
#' @param additional_vars (`character`) names of additional variables.
#' @param xlev (`list`) named list of character levels.
#' @param factors (`matrix`) the factor matrix.
#' @keywords internal
h_obtain_lvls <- function(var, additional_vars, xlev, factors) {
assert_string(var)
assert_character(additional_vars)
assert_list(xlev, types = "character")
nms <- names(xlev)
assert_subset(additional_vars, nms)
if (var %in% nms) {
prior_vars <- intersect(nms[seq_len(match(var, nms) - 1)], additional_vars)
prior_lvls <- vapply(xlev[prior_vars], length, FUN.VALUE = 1L)
post_vars <- intersect(nms[seq(match(var, nms) + 1, length(nms))], additional_vars)
post_lvls <- vapply(xlev[post_vars], length, FUN.VALUE = 1L)
total_lvls <- prod(prior_lvls) * prod(post_lvls)
} else {
prior_lvls <- vapply(xlev[additional_vars], length, FUN.VALUE = 1L)
post_lvls <- 2L
total_lvls <- prod(prior_lvls)
}
list(
prior = prior_lvls,
post = post_lvls,
total = total_lvls
)
}

#' Check if the Effect is the First Categorical Effect
#' @param effect (`string`) name of the effect.
#' @param categorical (`character`) names of the categorical values.
#' @param factors (`matrix`) the factor matrix.
#' @keywords internal
h_first_contain_categorical <- function(effect, factors, categorical) {
assert_string(effect)
assert_matrix(factors)
assert_character(categorical)
mt <- match(effect, colnames(factors))
varnms <- row.names(factors)
# if the effect is not categorical in any value, return FALSE
if (!any(varnms[factors[, mt] > 0] %in% categorical)) {
return(FALSE)
}
# keep only categorical rows that is in front of the current factor
factors <- factors[row.names(factors) %in% categorical, seq_len(mt - 1L), drop = FALSE]
# if previous cols are all numerical, return TRUE
if (ncol(factors) < 1L) {
return(TRUE)
}
col_ind <- apply(factors, 2, prod)
# if any of the previous cols are categorical, return FALSE
return(!any(col_ind > 0))
}

#' Test if the First Vector is Subset of the Second Vector
#' @param x (`vector`) the first list.
#' @param y (`vector`) the second list.
#' @keywords internal
h_get_index <- function(x, y) {
assert_list(x)
assert_list(y)
vapply(
x,
\(i) {
r <- vapply(y, \(j) test_subset(j, i), FUN.VALUE = TRUE)
if (sum(r) == 1L) {
which(r)
} else {
NA_integer_
}
},
FUN.VALUE = 1L
)
}
19 changes: 19 additions & 0 deletions man/h_first_contain_categorical.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/h_get_index.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/h_obtain_lvls.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions tests/testthat/_snaps/car.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
166.132390274818, 168.517347862691, 148.115819022518, 147.9145531646
), "F Statistic" = c(36.9114313439307, 0.375714493693334, 31.6630556880233,
142.112225163932, 0.258059683823067), "Pr(>=F)" = c(5.54457484361028e-14,
0.54074393045656, 7.50169682382158e-08, 2.14434380222247e-43,
0.54074393045656, 7.5016968238216e-08, 2.14434380222259e-43,
0.855493665048709)), class = c("anova", "data.frame"), row.names = c("RACE",
"SEX", "ARMCD", "AVISIT", "ARMCD:AVISIT"), heading = "Analysis of Fixed Effect Table (Type III F tests)")

Expand All @@ -14,7 +14,7 @@
174.508554247636, 174.459299901183, 143.379480366266, 143.114582736984
), "F Statistic" = c(43.8669479408289, 1.16753326235884, 28.5214961211129,
146.699440440163, 0.148199798372577), "Pr(>=F)" = c(3.70573055665334e-16,
0.281399695369735, 2.86006523470328e-07, 1.67727491048864e-43,
0.281399695369735, 2.86006523470328e-07, 1.67727491048869e-43,
0.930700208603909)), class = c("anova", "data.frame"), row.names = c("RACE",
"SEX", "ARMCD", "AVISIT", "ARMCD:AVISIT"), heading = "Analysis of Fixed Effect Table (Type III F tests)")

Expand All @@ -24,7 +24,7 @@
166.132390274818, NA, 168.517347862691, 148.115819022518, 147.9145531646
), "F Statistic" = c(36.9114313439307, 0.375714493693334, NA,
31.6630556880233, 142.112225163932, 0.258059683823067), "Pr(>=F)" = c(5.54457484361028e-14,
0.54074393045656, NA, 7.50169682382158e-08, 2.14434380222247e-43,
0.54074393045656, NA, 7.5016968238216e-08, 2.14434380222259e-43,
0.855493665048709)), class = c("anova", "data.frame"), row.names = c("RACE",
"SEX", "SEX2", "ARMCD", "AVISIT", "ARMCD:AVISIT"), heading = "Analysis of Fixed Effect Table (Type III F tests)")

Expand Down
Loading
Loading