openpharma · danielinteractive · Sep 20, 2024 · Sep 3, 2024 · Sep 14, 2024 · Sep 14, 2024
@@ -58,6 +58,7 @@ Imports:
     TMB (>= 1.9.1),
     utils
 Suggests:
+    broom.helpers,
     car (>= 3.1.2),
     cli,
     clubSandwich,
@@ -81,6 +82,7 @@ Suggests:
     scales,
     testthat (>= 3.0.0),
     tidymodels,
+    withr,
     xml2
 LinkingTo:
     Rcpp,

@@ -6,11 +6,15 @@
 - Previously `car::Anova` will give incorrect results if an interaction term is included and the order of the covariate of interest is not the first categorical variable. This is fixed now.
 - Previously `car::Anova` will fail if the model does not contain intercept. This is fixed now.
 - Previously, `mmrm` will ignore contrasts defined for covariates in the input data set. This is fixed now.
+- Previously, `predict` will always require the response to be valid, even for unconditional predictions. This is fixed now and unconditional prediction do not require the response to be valid anymore.
 - When running with `TMB` package versions below 1.9.15, MMRM fit results are not completely reproducible. While this may not be relevant for most applications, because the numerical differences are very small, we now issue a warning to the user if this is the case. We advise users to upgrade their `TMB` package versions to 1.9.15 or higher to ensure reproducibility.
 
 ### Miscellaneous
 
 - Upon fitting an MMRM, it is checked whether a not reproducible optimization feature of `TMB` is turned on. If so, a warning is issued to the user once per session.
+- `model.matrix` is updated to ensure that the `NA` values are dropped. Additionally, an argument `use_response` is added to decide whether records with `NA` values in the response should be discarded.
+- `model.frame` is updated to ensure that the `na.action` works.
+- `predict` is updated to allow duplicated subject IDs for unconditional prediction.
 
 # mmrm 0.3.12
 

@@ -67,7 +67,7 @@ predict.mmrm_tmb <- function(object,
                              interval = c("none", "confidence", "prediction"),
                              level = 0.95,
                              nsim = 1000L,
-                             conditional = TRUE,
+                             conditional = FALSE,
                              ...) {
   if (missing(newdata)) {
     newdata <- object$data
@@ -79,8 +79,51 @@ predict.mmrm_tmb <- function(object,
   assert_count(nsim, positive = TRUE)
   assert_flag(conditional)
   interval <- match.arg(interval)
+  formula_parts <- object$formula_parts
+  if (any(object$tmb_data$x_cols_aliased)) {
+    warning(
+      "In fitted object there are co-linear variables and therefore dropped terms, ",
+      "and this could lead to incorrect prediction on new data."
+    )
+  }
+  colnames <- names(Filter(isFALSE, object$tmb_data$x_cols_aliased))
+  if (!conditional && interval %in% c("none", "confidence")) {
+    # model.matrix always return a complete matrix (no NA allowed)
+    x_mat <- stats::model.matrix(object, data = newdata, use_response = FALSE)[, colnames, drop = FALSE]
+    x_mat_full <- matrix(
+      NA,
+      nrow = nrow(newdata), ncol = ncol(x_mat),
+      dimnames = list(row.names(newdata), colnames(x_mat))
+    )
+    x_mat_full[row.names(x_mat), ] <- x_mat
+    predictions <- (x_mat_full %*% component(object, "beta_est"))[, 1]
+    predictions_raw <- stats::setNames(rep(NA_real_, nrow(newdata)), row.names(newdata))
+    predictions_raw[names(predictions)] <- predictions
+    if (identical(interval, "none")) {
+      return(predictions_raw)
+    }
+    se <- switch(interval,
+      # can be NA if there are aliased cols
+      "confidence" = diag(x_mat_full %*% component(object, "beta_vcov") %*% t(x_mat_full)),
+      "none" = NA_real_
+    )
+    res <- cbind(
+      fit = predictions, se = se,
+      lwr = predictions - stats::qnorm(1 - level / 2) * se, upr = predictions + stats::qnorm(1 - level / 2) * se
+    )
+    if (!se.fit) {
+      res <- res[, setdiff(colnames(res), "se")]
+    }
+    res_raw <- matrix(
+      NA_real_,
+      ncol = ncol(res), nrow = nrow(newdata),
+      dimnames = list(row.names(newdata), colnames(res))
+    )
+    res_raw[row.names(res), ] <- res
+    return(res_raw)
+  }
   tmb_data <- h_mmrm_tmb_data(
-    object$formula_parts, newdata,
+    formula_parts, newdata,
     weights = rep(1, nrow(newdata)),
     reml = TRUE,
     singular = "keep",
@@ -90,16 +133,6 @@ predict.mmrm_tmb <- function(object,
     xlev = component(object, "xlev"),
     contrasts = component(object, "contrasts")
   )
-  if (!conditional) {
-    tmb_data$y_vector[] <- NA_real_
-  }
-  if (any(object$tmb_data$x_cols_aliased)) {
-    warning(
-      "In fitted object there are co-linear variables and therefore dropped terms, ",
-      "and this could lead to incorrect prediction on new data."
-    )
-  }
-  colnames <- names(Filter(isFALSE, object$tmb_data$x_cols_aliased))
   tmb_data$x_matrix <- tmb_data$x_matrix[, colnames, drop = FALSE]
   predictions <- h_get_prediction(
     tmb_data, object$theta_est, object$beta_est, component(object, "beta_vcov")
@@ -221,29 +254,25 @@ model.frame.mmrm_tmb <- function(formula, data, include = c("subject_var", "visi
       include = include,
       full = full
     )
-
-  # Construct data frame to return to users.
-  ret <-
-    stats::model.frame(
-      formula = lst_formula_and_data$formula,
-      data = lst_formula_and_data$data,
-      na.action = na.action,
-      xlev = stats::.getXlevels(terms(formula), formula$tmb_data$full_frame)
-    )
-  # We need the full formula obs, so recalculating if not already full.
-  ret_full <- if (lst_formula_and_data$is_full) {
+  # Only if include is default (full) and also data is missing, and also na.action is na.omit we will
+  # use the model frame from the tmb_data.
+  include_choice <- c("subject_var", "visit_var", "group_var", "response_var")
+  if (missing(data) && setequal(include, include_choice) && identical(h_get_na_action(na.action), stats::na.omit)) {
+    ret <- formula$tmb_data$full_frame
+    # Remove weights column.
+    ret[, "(weights)"] <- NULL
     ret
   } else {
-    stats::model.frame(
-      formula = lst_formula_and_data$formula_full,
-      data = lst_formula_and_data$data,
-      na.action = na.action,
-      xlev = stats::.getXlevels(terms(formula), formula$tmb_data$full_frame)
-    )
+    # Construct data frame to return to users.
+    ret <-
+      stats::model.frame(
+        formula = lst_formula_and_data$formula,
+        data = h_get_na_action(na.action)(lst_formula_and_data$data),
+        na.action = na.action,
+        xlev = stats::.getXlevels(terms(formula), formula$tmb_data$full_frame)
+      )
   }
-
-  # Lastly, subsetting returned data frame to only include obs utilized in model.
-  ret[rownames(ret) %in% rownames(ret_full), , drop = FALSE]
+  ret
 }
 
 
@@ -265,9 +294,7 @@ model.frame.mmrm_tmb <- function(formula, data, include = c("subject_var", "visi
 #'
 #' @return named list with four elements:
 #' - `"formula"`: the formula including the columns requested in the `include=` argument.
-#' - `"formula_full"`: the formula including all columns
 #' - `"data"`: a data frame including all columns needed in the formula.
-#' - `"is_full"`: a logical scalar indicating if the formula and
 #'   full formula are identical
 #' @keywords internal
 h_construct_model_frame_inputs <- function(formula,
@@ -299,51 +326,34 @@ h_construct_model_frame_inputs <- function(formula,
   # Update data based on the columns in the full formula return.
   all_vars <- all.vars(new_formula_full)
   assert_names(colnames(data), must.include = all_vars)
-  full_frame <- formula$tmb_data$full_frame
+  data <- data[, all_vars, drop = FALSE]
 
-  # Return list with updated formula, full formula, data, and full formula flag.
+  # Return list with updated formula, data.
   list(
     formula = new_formula,
-    formula_full = new_formula_full,
-    data = data,
-    is_full = setequal(include, include_choice)
+    data = data
   )
 }
 
 #' @describeIn mmrm_tmb_methods obtains the model matrix.
 #' @exportS3Method
+#' @param use_response (`flag`)\cr whether to use the response for complete rows.
 #'
 #' @examples
 #' # Model matrix:
 #' model.matrix(object)
-model.matrix.mmrm_tmb <- function(object, data, include = NULL, ...) { # nolint
-  # Construct updated formula and data arguments.
-  assert_subset(include, c("subject_var", "visit_var", "group_var"))
-  lst_formula_and_data <-
-    h_construct_model_frame_inputs(formula = object, data = data, include = include)
-
-  # Construct matrix to return to users.
-  ret <-
-    stats::model.matrix(
-      object = lst_formula_and_data$formula,
-      data = lst_formula_and_data$data,
-      contrasts.arg = attr(object$tmb_data$x_matrix, "contrasts"),
-      ...
-    )
-
-  # We need the full formula obs, so recalculating if not already full.
-  ret_full <- if (lst_formula_and_data$is_full) {
-    ret
-  } else {
-    stats::model.matrix(
-      object = lst_formula_and_data$formula_full,
-      data = lst_formula_and_data$data,
-      ...
-    )
+model.matrix.mmrm_tmb <- function(object, data, use_response = TRUE, ...) { # nolint
+  # Always return the utilized model matrix if data not provided.
+  if (missing(data)) {
+    return(object$tmb_data$x_matrix)
   }
-
-  # Subset data frame to only include obs utilized in model.
-  ret[rownames(ret) %in% rownames(ret_full), , drop = FALSE]
+  stats::model.matrix(
+    h_add_terms(object$formula_parts$model_formula, NULL, drop_response = !use_response),
+    data = data,
+    contrasts.arg = attr(object$tmb_data$x_matrix, "contrasts"),
+    xlev = component(object, "xlev"),
+    ...
+  )
 }
 
 #' @describeIn mmrm_tmb_methods obtains the terms object.

@@ -154,8 +154,9 @@ h_mmrm_tmb_data <- function(formula_parts,
   if (drop_levels) {
     full_frame <- h_drop_levels(full_frame, formula_parts$subject_var, formula_parts$visit_var, names(xlev))
   }
-  keep_ind <- if (allow_na_response) {
-    # Note that response is always the first column.
+  has_response <- !identical(attr(attr(full_frame, "terms"), "response"), 0L)
+  keep_ind <- if (allow_na_response && has_response) {
+    # Note that response is always the first column if there is response.
     stats::complete.cases(full_frame[, -1L, drop = FALSE])
   } else {
     stats::complete.cases(full_frame)
@@ -199,7 +200,11 @@ h_mmrm_tmb_data <- function(formula_parts,
       attr(x_matrix, "contrasts") <- contrasts_attr
     }
   }
-  y_vector <- as.numeric(stats::model.response(full_frame))
+  y_vector <- if (has_response) {
+    as.numeric(stats::model.response(full_frame))
+  } else {
+    rep(NA_real_, nrow(full_frame))
+  }
   weights_vector <- as.numeric(stats::model.weights(full_frame))
   n_subjects <- length(unique(full_frame[[formula_parts$subject_var]]))
   subject_zero_inds <- which(!duplicated(full_frame[[formula_parts$subject_var]])) - 1L

@@ -314,6 +314,18 @@ h_warn_na_action <- function() {
   }
 }
 
+#' Obtain `na.action` as Function
+#' @keywords internal
+h_get_na_action <- function(na_action) {
+  if (is.function(na_action) && identical(methods::formalArgs(na_action), c("object", "..."))) {
+    return(na_action)
+  }
+  if (is.character(na_action) && length(na_action) == 1L) {
+    assert_subset(na_action, c("na.omit", "na.exclude", "na.fail", "na.pass", "na.contiguous"))
+    return(get(na_action, mode = "function", pos = "package:stats"))
+  }
+}
+
 #' Validate mmrm Formula
 #' @param formula (`formula`)\cr to check.
 #'

@@ -5,7 +5,7 @@
 #' @noRd
 .onLoad <- function(libname, pkgname) { # nolint
   if (utils::packageVersion("TMB") < "1.9.15") {
-    warning("TMB version 1.9.15 or higher is required for reproducible model fits")
+    warning("TMB version 1.9.15 or higher is required for reproducible model fits", call. = FALSE)
   }
 
   register_on_load(

@@ -19,14 +19,14 @@ knitr::opts_chunk$set(
 [![CRAN status](https://www.r-pkg.org/badges/version-last-release/mmrm)](https://www.r-pkg.org/badges/version-last-release/mmrm)
 [![CRAN monthly downloads](https://cranlogs.r-pkg.org/badges/mmrm)](https://cranlogs.r-pkg.org/badges/mmrm)
 [![CRAN total downloads](https://cranlogs.r-pkg.org/badges/grand-total/mmrm)](https://cranlogs.r-pkg.org/badges/grand-total/mmrm)
-[![Code Coverage](https://raw.githubusercontent.com/openpharma/mmrm/_xml_coverage_reports/data/main/badge.svg)](https://raw.githubusercontent.com/openpharma/mmrm/_xml_coverage_reports/data/main/coverage.xml)
+[![Code Coverage](https://raw.githubusercontent.com/openpharma/mmrm/_xml_coverage_reports/data/main/badge.svg)](https://openpharma.github.io/mmrm/latest-tag/coverage-report/)
 <!-- badges: end -->
 \
 
 Mixed models for repeated measures (MMRM) are a popular
 choice for analyzing longitudinal continuous outcomes in randomized
 clinical trials and beyond; see
-[Cnaan, Laird and Slasor (1997)](https://doi.org/10.1002/(SICI)1097-0258(19971030)16:20<2349::AID-SIM667>3.0.CO;2-E)
+[Cnaan, Laird and Slasor (1997)](https://doi.org/10.1002%2f%28SICI%291097-0258%2819971030%2916%3a20%3c2349%3a%3aAID-SIM667%3e3.0.CO%3b2-E)
 for a tutorial and
 [Mallinckrodt, Lane and Schnell (2008)](https://doi.org/10.1177/009286150804200402)
 for a review. This package implements
@@ -115,7 +115,7 @@ because compilation of the `C++` sources is required.
 See also the [introductory vignette](https://openpharma.github.io/mmrm/main/articles/introduction.html)
 or get started by trying out the example:
 
-```{r, child='vignettes/subsections/intro-getting_started.Rmd'}
+```{r, child='vignettes/subsections/_intro-getting_started.Rmd'}
 ```
 
 ## Citing `mmrm`

@@ -22,7 +22,7 @@ Coverage](https://raw.githubusercontent.com/openpharma/mmrm/_xml_coverage_report
 Mixed models for repeated measures (MMRM) are a popular choice for
 analyzing longitudinal continuous outcomes in randomized clinical trials
 and beyond; see [Cnaan, Laird and Slasor
-(1997)](https://doi.org/10.1002/(SICI)1097-0258(19971030)16:20%3C2349::AID-SIM667%3E3.0.CO;2-E)
+(1997)](https://doi.org/10.1002%2f%28SICI%291097-0258%2819971030%2916%3a20%3c2349%3a%3aAID-SIM667%3e3.0.CO%3b2-E)
 for a tutorial and [Mallinckrodt, Lane and Schnell
 (2008)](https://doi.org/10.1177/009286150804200402) for a review. This
 package implements MMRM based on the marginal linear model without