... minor fix in documentation and weights for wglm ...

tagteam · Oct 21, 2024 · 1de94df · 1de94df
1 parent a1db207
commit 1de94df
Show file tree

Hide file tree

Showing 7 changed files with 70 additions and 34 deletions.
diff --git a/R/ate.R b/R/ate.R
@@ -3,9 +3,9 @@
 ## author: Thomas Alexander Gerds
 ## created: Oct 23 2016 (08:53) 
 ## Version: 
-## last-updated: Oct 21 2024 (17:18) 
+## last-updated: Oct 21 2024 (18:04) 
 ##           By: Brice Ozenne
-##     Update #: 2566
+##     Update #: 2572
 #----------------------------------------------------------------------
 ## 
 ### Commentary: 
@@ -76,31 +76,31 @@
 #' @author Brice Ozenne \email{broz@@sund.ku.dk}
 #' and Thomas Alexander Gerds \email{tag@@biostat.ku.dk}
 #'
-#' @details \bold{event}: \itemize{
-#' \item IPTW estimate: a character vector indicating the event and status variables.
+#' @details Argument \bold{event}: \itemize{
+#' \item IPTW estimator: a character vector indicating the event and status variables.
 #' \item G-formula or AIPTW estimator: a survival model (e.g. Cox \code{"survival::coxph"}, Kaplan Meier \code{"prodlim::prodlim"}),
 #' a competing risk model (e.g. cause-specific Cox \code{"riskRegression::CSC"}),
 #' a logistic model (e.g. \code{"stats::glm"} when argument \code{censor} is \code{NULL} otherwise \code{"riskRegression::wglm"}).
 #' Other models can be specified, provided a suitable \code{predictRisk} method exists, but the standard error should be computed using non-parametric bootstrap,
 #' as the influence function of the estimator will typically not be available.
 #' }
-#' \bold{treatment}: \itemize{
+#' Argument \bold{treatment}: \itemize{
 #' \item G-formula estimator: a character indicating the treatment variable.
 #' \item IPTW or AIPTW estimator: a \code{"stats::glm"} model with family \code{"binomial"} (two treatment options) or a \code{"nnet::multinom"} (more than two treatment options).
 #' }
-#' \bold{censor}: \itemize{
+#' Argument \bold{censor}: \itemize{
 #' \item G-formula estimator: NULL
 #' \item IPTW or AIPTW estimator: NULL if no censoring and otherwise a survival model (e.g. Cox \code{"survival::coxph"}, Kaplan Meier \code{"prodlim::prodlim"}) 
 #' }
 #' 
-#' \bold{estimator}: when using a IPCW logistic model (\code{"riskRegression::wglm"}), the integral term w.r.t. to the martingale of the censoring process is not computed for augmented estimator,
-#' i.e. AIPTW,IPCW estimator instead AIPTW,AIPCW with the notation of Ozenne et al. 2020. \cr
+#' Argument \bold{estimator}: when set to \code{"AIPTW"} with argument \code{event} being IPCW logistic model (\code{"riskRegression::wglm"}), the integral term w.r.t. to the martingale of the censoring process is not computed,
+#' i.e. using the notation of Ozenne et al. (2020) an AIPTW,IPCW estimator instead of an AIPTW,AIPCW is evaluated. \cr
 #'
 #' In presence of censoring, the computation time and memory usage for the evaluation of the AIPTW estimator and its uncertainty do not scale well with the number of observations (n) or the number of unique timepoints (T).
-#' Point estimation involves n by T matrices and influence function n by T by n arrays. \itemize{
-#' \item for large datasets (e.g. n>5000), bootstrap is recommended as the memory need for the influence function may be prohibitive.
+#' Point estimation involves n by T matrices, influence function involves n by T by n arrays. \itemize{
+#' \item for large datasets (e.g. n>5000), bootstrap is recommended as the memory need for the influence function is likely prohibitive.
 #' \item it is possible to decrease the memory usage for the point estimation by setting the (hidden) argument \code{store=c(size.split=1000)}. The integral term of the AIPTW estimator is then evaluated for 1000 observations at a time, i.e. involing matrices of size 1000 by T instead of n by T. This may lead to increased computation time.
-#' \item reducing the number of unique timepoints (e.g. by rounding them) will lead to a less efficient but faster and less memory demanding estimation procedure.
+#' \item reducing the number of unique timepoints (e.g. by rounding them) will lead to an approximate estimation procedure that is less demanding both in term of computation and memory. The resulting estimator will be more variable than the one based on the original timepoints (i.e. wider confidence intervals).
 #' }
 #'  
 #' 

diff --git a/R/wglm.R b/R/wglm.R
@@ -3,9 +3,9 @@
 ## Author: Brice Ozenne
 ## Created: sep  1 2020 (14:58) 
 ## Version: 
-## Last-Updated: Oct 20 2024 (13:59) 
+## Last-Updated: Oct 21 2024 (18:23) 
 ##           By: Brice Ozenne
-##     Update #: 774
+##     Update #: 782
 ##----------------------------------------------------------------------
 ## 
 ### Commentary: 
@@ -40,6 +40,13 @@
 #' 
 #' @return an object of class \code{"wglm"}.
 #'
+#' @seealso
+#' \code{\link{coef.wglm}} to output the estimated parameters from the logistic regression. \cr
+#' \code{\link{confint.wglm}} to output the estimated parameters from the logistic regression with their confidence interval. \cr
+#' \code{\link{model.tables.wglm}} to output a data.frame containing the estimated parameters from the logistic regression with its confidence intervals and p-values.   \cr
+#' \code{\link{predictRisk.wglm}} to evaluate event probabilities (e.g. survival probabilities) conditional on covariates. \cr
+#' \code{summary.wglm} for displaying in the console a summary of the results. \cr
+#' \code{\link{weights.wglm}} to extract the IPCW weights. 
 
 ## * wglm (examples)
 #' @examples
@@ -56,15 +63,21 @@
 #' #### no censoring ####
 #' e.wglm <- wglm(Surv(time,event) ~ X1, 
 #'                times = tau, data = dFull, product.limit = TRUE)
-#' e.wglm ## same as a logistic regression
+#' e.wglm ## same as a logistic regression at each timepoint
+#' 
+#' coef(e.wglm)
+#' confint(e.wglm)
+#' model.tables(e.wglm)
 #'
 #' summary(ate(e.wglm, data = dFull, times = tau, treatment = "X1", verbose = FALSE))
 #'
 #' #### right-censoring ####
 #' ## no covariante in the censoring model (independent censoring)
 #' eC.wglm <- wglm(Surv(time,event) ~ X1,
 #'                times = tau, data = dSurv, product.limit = TRUE)
-#' eC.wglm
+#' summary(eC.wglm)
+#'
+#' weights(eC.wglm)
 #'
 #' ## with covariates in the censoring model
 #' eC2.wglm <- wglm(Surv(time,event) ~ X1 + X8, formula.censor = ~ X1*X8,
@@ -824,11 +837,12 @@ iid.wglm <- function(x, times = NULL, simplify = TRUE, store = NULL, ...){
 #'
 #' @param object a wglm object.
 #' @param times [numeric vector] time points at which the weights should be output. 
+#' @param prefix [character] used to name the columns. Can be \code{NA} to keep the original names.
 #' @param simplify [logical] should the ouput be converted to a vector when only one timepoint is requested. Otherwise will always return a matrix.
 #' @param ... Not used.
 #' @export
 #'
-weights.wglm <- function(object, times = NULL, simplify = TRUE, ...){
+weights.wglm <- function(object, times = NULL, prefix = "t", simplify = TRUE, ...){
 
     ## ** normalize user input
     dots <- list(...)
@@ -840,10 +854,16 @@ weights.wglm <- function(object, times = NULL, simplify = TRUE, ...){
             stop("Unknown timepoint ",paste(setdiff(times,object$time), collapse = ", ")," in argument \'times\'. \n",
                  "Valid timepoints: ",paste(object$time, collapse = ", "),". \n")
         }
-    }
-
+    }    
     ## ** extract weights
-    out <- object$data[object$name.IPCW]
+    if(is.data.table(object$data)){
+        out <- as.data.frame(object$data[,.SD,.SDcols = object$name.IPCW])
+    }else{
+        out <- object$data[object$name.IPCW]
+    }
+    if(!is.na(prefix)){
+        names(out) <- paste0(prefix,object$time)
+    }
     if(!is.null(times)){
         out <- out[match(times,object$time)]
     }

diff --git a/man/anova.ate.Rd b/man/anova.ate.Rd
diff --git a/man/ate.Rd b/man/ate.Rd
diff --git a/man/autoplot.ate.Rd b/man/autoplot.ate.Rd
diff --git a/man/weights.wglm.Rd b/man/weights.wglm.Rd
diff --git a/man/wglm.Rd b/man/wglm.Rd