fix anova type 3 (#460)

* fix anova type 3 * fix anova * Update NEWS.md Co-authored-by: Daniel Sabanes Bove <[email protected]> * fix * [skip style] [skip vbump] Restyle files * fix lint issues * [skip style] [skip vbump] Restyle files * fix tests * fix checks * Update R/interop-car.R Co-authored-by: Daniel Sabanes Bove <[email protected]> * Update tests/testthat/test-car.R Co-authored-by: Daniel Sabanes Bove <[email protected]> * Update R/interop-car.R Co-authored-by: Daniel Sabanes Bove <[email protected]> * Empty * rename symbol * fix anova * [skip roxygen] [skip vbump] Roxygen Man Pages Auto Update * Empty * add internal to h_get_index function --------- Co-authored-by: Daniel Sabanes Bove <[email protected]> Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
openpharma · Sep 13, 2024 · 17e417b · 17e417b
1 parent 2de9c87
commit 17e417b
Show file tree

Hide file tree

Showing 7 changed files with 353 additions and 15 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -3,6 +3,8 @@
 ### Bug Fixes
 
 - Previously `emmeans` will return `NA` for spatial covariance structure. This is fixed now.
+- Previously `car::Anova` will give incorrect results if an interaction term is included and the order of the covariate of interest is not the first categorical variable. This is fixed now.
+- Previously `car::Anova` will fail if the model does not contain intercept. This is fixed now.
 - Previously, `mmrm` will ignore contrasts defined for covariates in the input data set. This is fixed now.
 
 # mmrm 0.3.12

diff --git a/R/interop-car.R b/R/interop-car.R
@@ -40,35 +40,51 @@ h_get_contrast <- function(object, effect, type = c("II", "III", "2", "3"), tol
   assert_subset(effect, colnames(fcts))
   idx <- which(effect == colnames(fcts))
   cols <- which(asg == idx)
-  data <- model.frame(object)
-  var_numeric <- vapply(data, is.numeric, FUN.VALUE = TRUE)
-  coef_rows <- length(cols)
+  xlev <- component(object, "xlev")
+  contains_intercept <- (!0 %in% asg) && h_first_contain_categorical(effect, fcts, names(xlev))
+  coef_rows <- length(cols) - as.integer(contains_intercept)
   l_mx <- matrix(0, nrow = coef_rows, ncol = length(asg))
   if (coef_rows == 0L) {
     return(l_mx)
   }
-  l_mx[, cols] <- diag(rep(1, coef_rows))
+  if (contains_intercept) {
+    l_mx[, cols] <- cbind(-1, diag(rep(1, coef_rows)))
+  } else {
+    l_mx[, cols] <- diag(rep(1, coef_rows))
+  }
   for (i in setdiff(seq_len(ncol(fcts)), idx)) {
-    x1 <- mx[, cols, drop = FALSE]
     additional_vars <- names(which(fcts[, i] > fcts[, idx]))
-    additional_numeric <- any(var_numeric[additional_vars])
+    additional_numeric <- any(!additional_vars %in% names(xlev))
     current_col <- which(asg == i)
     if (ods[i] >= ods[idx] && all(fcts[, i] >= fcts[, idx]) && !additional_numeric) {
       sub_mat <- switch(type,
         "2" = ,
         "II" = {
+          x1 <- mx[, cols, drop = FALSE]
           x0 <- mx[, -c(cols, current_col), drop = FALSE]
           x2 <- mx[, current_col, drop = FALSE]
           m <- diag(rep(1, nrow(x0))) - x0 %*% solve(t(x0) %*% x0) %*% t(x0)
-          solve(t(x1) %*% m %*% x1) %*% t(x1) %*% m %*% x2
+          ret <- solve(t(x1) %*% m %*% x1) %*% t(x1) %*% m %*% x2
+          if (contains_intercept) {
+            ret[-1, ] - ret[1, ]
+          } else {
+            ret
+          }
         },
         "3" = ,
         "III" = {
-          additional_levels <- vapply(data[additional_vars], function(x) {
-            if (is.factor(x)) nlevels(x) else length(unique(x))
-          }, FUN.VALUE = 1L)
-          t_levels <- prod(additional_levels)
-          l_mx[, cols] / t_levels
+          lvls <- h_obtain_lvls(effect, additional_vars, xlev)
+          t_levels <- lvls$total
+          nms_base <- colnames(mx)[cols]
+          nms <- colnames(mx)[current_col]
+          nms_base_split <- strsplit(nms_base, ":")
+          nms_split <- strsplit(nms, ":")
+          base_idx <- h_get_index(nms_split, nms_base_split)
+          mt <- l_mx[, cols, drop = FALSE] / t_levels
+          ret <- mt[, base_idx, drop = FALSE]
+          # if there is extra levels, replace it with -1/t_levels
+          ret[is.na(ret)] <- -1 / t_levels
+          ret
         }
       )
       l_mx[, current_col] <- sub_mat
@@ -114,3 +130,81 @@ Anova.mmrm <- function(mod, type = c("II", "III", "2", "3"), tol = sqrt(.Machine
   )
   ret_df
 }
+
+
+#' Obtain Levels Prior and Posterior
+#' @param var (`string`) name of the effect.
+#' @param additional_vars (`character`) names of additional variables.
+#' @param xlev (`list`) named list of character levels.
+#' @param factors (`matrix`) the factor matrix.
+#' @keywords internal
+h_obtain_lvls <- function(var, additional_vars, xlev, factors) {
+  assert_string(var)
+  assert_character(additional_vars)
+  assert_list(xlev, types = "character")
+  nms <- names(xlev)
+  assert_subset(additional_vars, nms)
+  if (var %in% nms) {
+    prior_vars <- intersect(nms[seq_len(match(var, nms) - 1)], additional_vars)
+    prior_lvls <- vapply(xlev[prior_vars], length, FUN.VALUE = 1L)
+    post_vars <- intersect(nms[seq(match(var, nms) + 1, length(nms))], additional_vars)
+    post_lvls <- vapply(xlev[post_vars], length, FUN.VALUE = 1L)
+    total_lvls <- prod(prior_lvls) * prod(post_lvls)
+  } else {
+    prior_lvls <- vapply(xlev[additional_vars], length, FUN.VALUE = 1L)
+    post_lvls <- 2L
+    total_lvls <- prod(prior_lvls)
+  }
+  list(
+    prior = prior_lvls,
+    post = post_lvls,
+    total = total_lvls
+  )
+}
+
+#' Check if the Effect is the First Categorical Effect
+#' @param effect (`string`) name of the effect.
+#' @param categorical (`character`) names of the categorical values.
+#' @param factors (`matrix`) the factor matrix.
+#' @keywords internal
+h_first_contain_categorical <- function(effect, factors, categorical) {
+  assert_string(effect)
+  assert_matrix(factors)
+  assert_character(categorical)
+  mt <- match(effect, colnames(factors))
+  varnms <- row.names(factors)
+  # if the effect is not categorical in any value, return FALSE
+  if (!any(varnms[factors[, mt] > 0] %in% categorical)) {
+    return(FALSE)
+  }
+  # keep only categorical rows that is in front of the current factor
+  factors <- factors[row.names(factors) %in% categorical, seq_len(mt - 1L), drop = FALSE]
+  # if previous cols are all numerical, return TRUE
+  if (ncol(factors) < 1L) {
+    return(TRUE)
+  }
+  col_ind <- apply(factors, 2, prod)
+  # if any of the previous cols are categorical, return FALSE
+  return(!any(col_ind > 0))
+}
+
+#' Test if the First Vector is Subset of the Second Vector
+#' @param x (`vector`) the first list.
+#' @param y (`vector`) the second list.
+#' @keywords internal
+h_get_index <- function(x, y) {
+  assert_list(x)
+  assert_list(y)
+  vapply(
+    x,
+    \(i) {
+      r <- vapply(y, \(j) test_subset(j, i), FUN.VALUE = TRUE)
+      if (sum(r) == 1L) {
+        which(r)
+      } else {
+        NA_integer_
+      }
+    },
+    FUN.VALUE = 1L
+  )
+}
diff --git a/man/h_first_contain_categorical.Rd b/man/h_first_contain_categorical.Rd
diff --git a/man/h_get_index.Rd b/man/h_get_index.Rd
diff --git a/man/h_obtain_lvls.Rd b/man/h_obtain_lvls.Rd
diff --git a/tests/testthat/_snaps/car.md b/tests/testthat/_snaps/car.md
@@ -4,7 +4,7 @@
     166.132390274818, 168.517347862691, 148.115819022518, 147.9145531646
     ), "F Statistic" = c(36.9114313439307, 0.375714493693334, 31.6630556880233, 
     142.112225163932, 0.258059683823067), "Pr(>=F)" = c(5.54457484361028e-14, 
-    0.54074393045656, 7.50169682382158e-08, 2.14434380222247e-43, 
+    0.54074393045656, 7.5016968238216e-08, 2.14434380222259e-43, 
     0.855493665048709)), class = c("anova", "data.frame"), row.names = c("RACE", 
     "SEX", "ARMCD", "AVISIT", "ARMCD:AVISIT"), heading = "Analysis of Fixed Effect Table (Type III F tests)")
 
@@ -14,7 +14,7 @@
     174.508554247636, 174.459299901183, 143.379480366266, 143.114582736984
     ), "F Statistic" = c(43.8669479408289, 1.16753326235884, 28.5214961211129, 
     146.699440440163, 0.148199798372577), "Pr(>=F)" = c(3.70573055665334e-16, 
-    0.281399695369735, 2.86006523470328e-07, 1.67727491048864e-43, 
+    0.281399695369735, 2.86006523470328e-07, 1.67727491048869e-43, 
     0.930700208603909)), class = c("anova", "data.frame"), row.names = c("RACE", 
     "SEX", "ARMCD", "AVISIT", "ARMCD:AVISIT"), heading = "Analysis of Fixed Effect Table (Type III F tests)")
 
@@ -24,7 +24,7 @@
     166.132390274818, NA, 168.517347862691, 148.115819022518, 147.9145531646
     ), "F Statistic" = c(36.9114313439307, 0.375714493693334, NA, 
     31.6630556880233, 142.112225163932, 0.258059683823067), "Pr(>=F)" = c(5.54457484361028e-14, 
-    0.54074393045656, NA, 7.50169682382158e-08, 2.14434380222247e-43, 
+    0.54074393045656, NA, 7.5016968238216e-08, 2.14434380222259e-43, 
     0.855493665048709)), class = c("anova", "data.frame"), row.names = c("RACE", 
     "SEX", "SEX2", "ARMCD", "AVISIT", "ARMCD:AVISIT"), heading = "Analysis of Fixed Effect Table (Type III F tests)")