diff --git a/R/ate.R b/R/ate.R index 879524a..29128ac 100644 --- a/R/ate.R +++ b/R/ate.R @@ -3,9 +3,9 @@ ## author: Thomas Alexander Gerds ## created: Oct 23 2016 (08:53) ## Version: -## last-updated: Oct 21 2024 (17:18) +## last-updated: Oct 21 2024 (18:04) ## By: Brice Ozenne -## Update #: 2566 +## Update #: 2572 #---------------------------------------------------------------------- ## ### Commentary: @@ -76,31 +76,31 @@ #' @author Brice Ozenne \email{broz@@sund.ku.dk} #' and Thomas Alexander Gerds \email{tag@@biostat.ku.dk} #' -#' @details \bold{event}: \itemize{ -#' \item IPTW estimate: a character vector indicating the event and status variables. +#' @details Argument \bold{event}: \itemize{ +#' \item IPTW estimator: a character vector indicating the event and status variables. #' \item G-formula or AIPTW estimator: a survival model (e.g. Cox \code{"survival::coxph"}, Kaplan Meier \code{"prodlim::prodlim"}), #' a competing risk model (e.g. cause-specific Cox \code{"riskRegression::CSC"}), #' a logistic model (e.g. \code{"stats::glm"} when argument \code{censor} is \code{NULL} otherwise \code{"riskRegression::wglm"}). #' Other models can be specified, provided a suitable \code{predictRisk} method exists, but the standard error should be computed using non-parametric bootstrap, #' as the influence function of the estimator will typically not be available. #' } -#' \bold{treatment}: \itemize{ +#' Argument \bold{treatment}: \itemize{ #' \item G-formula estimator: a character indicating the treatment variable. #' \item IPTW or AIPTW estimator: a \code{"stats::glm"} model with family \code{"binomial"} (two treatment options) or a \code{"nnet::multinom"} (more than two treatment options). #' } -#' \bold{censor}: \itemize{ +#' Argument \bold{censor}: \itemize{ #' \item G-formula estimator: NULL #' \item IPTW or AIPTW estimator: NULL if no censoring and otherwise a survival model (e.g. Cox \code{"survival::coxph"}, Kaplan Meier \code{"prodlim::prodlim"}) #' } #' -#' \bold{estimator}: when using a IPCW logistic model (\code{"riskRegression::wglm"}), the integral term w.r.t. to the martingale of the censoring process is not computed for augmented estimator, -#' i.e. AIPTW,IPCW estimator instead AIPTW,AIPCW with the notation of Ozenne et al. 2020. \cr +#' Argument \bold{estimator}: when set to \code{"AIPTW"} with argument \code{event} being IPCW logistic model (\code{"riskRegression::wglm"}), the integral term w.r.t. to the martingale of the censoring process is not computed, +#' i.e. using the notation of Ozenne et al. (2020) an AIPTW,IPCW estimator instead of an AIPTW,AIPCW is evaluated. \cr #' #' In presence of censoring, the computation time and memory usage for the evaluation of the AIPTW estimator and its uncertainty do not scale well with the number of observations (n) or the number of unique timepoints (T). -#' Point estimation involves n by T matrices and influence function n by T by n arrays. \itemize{ -#' \item for large datasets (e.g. n>5000), bootstrap is recommended as the memory need for the influence function may be prohibitive. +#' Point estimation involves n by T matrices, influence function involves n by T by n arrays. \itemize{ +#' \item for large datasets (e.g. n>5000), bootstrap is recommended as the memory need for the influence function is likely prohibitive. #' \item it is possible to decrease the memory usage for the point estimation by setting the (hidden) argument \code{store=c(size.split=1000)}. The integral term of the AIPTW estimator is then evaluated for 1000 observations at a time, i.e. involing matrices of size 1000 by T instead of n by T. This may lead to increased computation time. -#' \item reducing the number of unique timepoints (e.g. by rounding them) will lead to a less efficient but faster and less memory demanding estimation procedure. +#' \item reducing the number of unique timepoints (e.g. by rounding them) will lead to an approximate estimation procedure that is less demanding both in term of computation and memory. The resulting estimator will be more variable than the one based on the original timepoints (i.e. wider confidence intervals). #' } #' #' diff --git a/R/wglm.R b/R/wglm.R index 36d2261..c121c63 100644 --- a/R/wglm.R +++ b/R/wglm.R @@ -3,9 +3,9 @@ ## Author: Brice Ozenne ## Created: sep 1 2020 (14:58) ## Version: -## Last-Updated: Oct 20 2024 (13:59) +## Last-Updated: Oct 21 2024 (18:23) ## By: Brice Ozenne -## Update #: 774 +## Update #: 782 ##---------------------------------------------------------------------- ## ### Commentary: @@ -40,6 +40,13 @@ #' #' @return an object of class \code{"wglm"}. #' +#' @seealso +#' \code{\link{coef.wglm}} to output the estimated parameters from the logistic regression. \cr +#' \code{\link{confint.wglm}} to output the estimated parameters from the logistic regression with their confidence interval. \cr +#' \code{\link{model.tables.wglm}} to output a data.frame containing the estimated parameters from the logistic regression with its confidence intervals and p-values. \cr +#' \code{\link{predictRisk.wglm}} to evaluate event probabilities (e.g. survival probabilities) conditional on covariates. \cr +#' \code{summary.wglm} for displaying in the console a summary of the results. \cr +#' \code{\link{weights.wglm}} to extract the IPCW weights. ## * wglm (examples) #' @examples @@ -56,7 +63,11 @@ #' #### no censoring #### #' e.wglm <- wglm(Surv(time,event) ~ X1, #' times = tau, data = dFull, product.limit = TRUE) -#' e.wglm ## same as a logistic regression +#' e.wglm ## same as a logistic regression at each timepoint +#' +#' coef(e.wglm) +#' confint(e.wglm) +#' model.tables(e.wglm) #' #' summary(ate(e.wglm, data = dFull, times = tau, treatment = "X1", verbose = FALSE)) #' @@ -64,7 +75,9 @@ #' ## no covariante in the censoring model (independent censoring) #' eC.wglm <- wglm(Surv(time,event) ~ X1, #' times = tau, data = dSurv, product.limit = TRUE) -#' eC.wglm +#' summary(eC.wglm) +#' +#' weights(eC.wglm) #' #' ## with covariates in the censoring model #' eC2.wglm <- wglm(Surv(time,event) ~ X1 + X8, formula.censor = ~ X1*X8, @@ -824,11 +837,12 @@ iid.wglm <- function(x, times = NULL, simplify = TRUE, store = NULL, ...){ #' #' @param object a wglm object. #' @param times [numeric vector] time points at which the weights should be output. +#' @param prefix [character] used to name the columns. Can be \code{NA} to keep the original names. #' @param simplify [logical] should the ouput be converted to a vector when only one timepoint is requested. Otherwise will always return a matrix. #' @param ... Not used. #' @export #' -weights.wglm <- function(object, times = NULL, simplify = TRUE, ...){ +weights.wglm <- function(object, times = NULL, prefix = "t", simplify = TRUE, ...){ ## ** normalize user input dots <- list(...) @@ -840,10 +854,16 @@ weights.wglm <- function(object, times = NULL, simplify = TRUE, ...){ stop("Unknown timepoint ",paste(setdiff(times,object$time), collapse = ", ")," in argument \'times\'. \n", "Valid timepoints: ",paste(object$time, collapse = ", "),". \n") } - } - + } ## ** extract weights - out <- object$data[object$name.IPCW] + if(is.data.table(object$data)){ + out <- as.data.frame(object$data[,.SD,.SDcols = object$name.IPCW]) + }else{ + out <- object$data[object$name.IPCW] + } + if(!is.na(prefix)){ + names(out) <- paste0(prefix,object$time) + } if(!is.null(times)){ out <- out[match(times,object$time)] } diff --git a/man/anova.ate.Rd b/man/anova.ate.Rd index b200375..69fa57f 100644 --- a/man/anova.ate.Rd +++ b/man/anova.ate.Rd @@ -49,6 +49,7 @@ Experimental!!! \examples{ library(survival) library(data.table) +library(ggplot2) \dontrun{ ## simulate data @@ -59,7 +60,7 @@ dtS$X12 <- LETTERS[as.numeric(as.factor(paste0(dtS$X1,dtS$X2)))] dtS <- dtS[dtS$X12!="D"] ## model fit -fit <- cph(formula = Surv(time,event)~ X1+X6,data=dtS,y=TRUE,x=TRUE) +fit <- coxph(formula = Surv(time,event)~ X1+X6,data=dtS,y=TRUE,x=TRUE) seqTime <- 1:10 ateFit <- ate(fit, data = dtS, treatment = "X1", contrasts = NULL, times = seqTime, B = 0, iid = TRUE, se = TRUE, verbose = TRUE, band = TRUE) diff --git a/man/ate.Rd b/man/ate.Rd index b0149f0..8171ea4 100644 --- a/man/ate.Rd +++ b/man/ate.Rd @@ -111,31 +111,31 @@ to estimate the average treatment based on Cox regression with or without competing risks. } \details{ -\bold{event}: \itemize{ -\item IPTW estimate: a character vector indicating the event and status variables. +Argument \bold{event}: \itemize{ +\item IPTW estimator: a character vector indicating the event and status variables. \item G-formula or AIPTW estimator: a survival model (e.g. Cox \code{"survival::coxph"}, Kaplan Meier \code{"prodlim::prodlim"}), a competing risk model (e.g. cause-specific Cox \code{"riskRegression::CSC"}), a logistic model (e.g. \code{"stats::glm"} when argument \code{censor} is \code{NULL} otherwise \code{"riskRegression::wglm"}). Other models can be specified, provided a suitable \code{predictRisk} method exists, but the standard error should be computed using non-parametric bootstrap, as the influence function of the estimator will typically not be available. } -\bold{treatment}: \itemize{ +Argument \bold{treatment}: \itemize{ \item G-formula estimator: a character indicating the treatment variable. \item IPTW or AIPTW estimator: a \code{"stats::glm"} model with family \code{"binomial"} (two treatment options) or a \code{"nnet::multinom"} (more than two treatment options). } -\bold{censor}: \itemize{ +Argument \bold{censor}: \itemize{ \item G-formula estimator: NULL \item IPTW or AIPTW estimator: NULL if no censoring and otherwise a survival model (e.g. Cox \code{"survival::coxph"}, Kaplan Meier \code{"prodlim::prodlim"}) } -\bold{estimator}: when using a IPCW logistic model (\code{"riskRegression::wglm"}), the integral term w.r.t. to the martingale of the censoring process is not computed for augmented estimator, -i.e. AIPTW,IPCW estimator instead AIPTW,AIPCW with the notation of Ozenne et al. 2020. \cr +Argument \bold{estimator}: when set to \code{"AIPTW"} with argument \code{event} being IPCW logistic model (\code{"riskRegression::wglm"}), the integral term w.r.t. to the martingale of the censoring process is not computed, +i.e. using the notation of Ozenne et al. (2020) an AIPTW,IPCW estimator instead of an AIPTW,AIPCW is evaluated. \cr In presence of censoring, the computation time and memory usage for the evaluation of the AIPTW estimator and its uncertainty do not scale well with the number of observations (n) or the number of unique timepoints (T). -Point estimation involves n by T matrices and influence function n by T by n arrays. \itemize{ -\item for large datasets (e.g. n>5000), bootstrap is recommended as the memory need for the influence function may be prohibitive. +Point estimation involves n by T matrices, influence function involves n by T by n arrays. \itemize{ +\item for large datasets (e.g. n>5000), bootstrap is recommended as the memory need for the influence function is likely prohibitive. \item it is possible to decrease the memory usage for the point estimation by setting the (hidden) argument \code{store=c(size.split=1000)}. The integral term of the AIPTW estimator is then evaluated for 1000 observations at a time, i.e. involing matrices of size 1000 by T instead of n by T. This may lead to increased computation time. -\item reducing the number of unique timepoints (e.g. by rounding them) will lead to a less efficient but faster and less memory demanding estimation procedure. +\item reducing the number of unique timepoints (e.g. by rounding them) will lead to an approximate estimation procedure that is less demanding both in term of computation and memory. The resulting estimator will be more variable than the one based on the original timepoints (i.e. wider confidence intervals). } } \examples{ diff --git a/man/autoplot.ate.Rd b/man/autoplot.ate.Rd index 67a03cb..84e6e67 100644 --- a/man/autoplot.ate.Rd +++ b/man/autoplot.ate.Rd @@ -65,7 +65,6 @@ Plot average risks. } \examples{ library(survival) -library(rms) library(ggplot2) #### simulate data #### @@ -75,7 +74,7 @@ dtS <- sampleData(n,outcome="survival") seqTimes <- c(0,sort(dtS$time[dtS$event==1]),max(dtS$time)) #### Cox model #### -fit <- cph(formula = Surv(time,event)~ X1+X2,data=dtS,y=TRUE,x=TRUE) +fit <- coxph(formula = Surv(time,event)~ X1+X2,data=dtS,y=TRUE,x=TRUE) #### plot.type = 1: for few timepoints #### ateFit <- ate(fit, data = dtS, treatment = "X1", diff --git a/man/weights.wglm.Rd b/man/weights.wglm.Rd index f8d9939..e3f3c8d 100644 --- a/man/weights.wglm.Rd +++ b/man/weights.wglm.Rd @@ -4,13 +4,15 @@ \alias{weights.wglm} \title{Extract IPCW Weights} \usage{ -\method{weights}{wglm}(object, times = NULL, simplify = TRUE, ...) +\method{weights}{wglm}(object, times = NULL, prefix = "t", simplify = TRUE, ...) } \arguments{ \item{object}{a wglm object.} \item{times}{[numeric vector] time points at which the weights should be output.} +\item{prefix}{[character] used to name the columns. Can be \code{NA} to keep the original names.} + \item{simplify}{[logical] should the ouput be converted to a vector when only one timepoint is requested. Otherwise will always return a matrix.} \item{...}{Not used.} diff --git a/man/wglm.Rd b/man/wglm.Rd index 7a2e4c0..003d8db 100644 --- a/man/wglm.Rd +++ b/man/wglm.Rd @@ -69,7 +69,11 @@ dSurv <- d[event!=2] ## (artificially) remove competing risk #### no censoring #### e.wglm <- wglm(Surv(time,event) ~ X1, times = tau, data = dFull, product.limit = TRUE) -e.wglm ## same as a logistic regression +e.wglm ## same as a logistic regression at each timepoint + +coef(e.wglm) +confint(e.wglm) +model.tables(e.wglm) summary(ate(e.wglm, data = dFull, times = tau, treatment = "X1", verbose = FALSE)) @@ -77,7 +81,9 @@ summary(ate(e.wglm, data = dFull, times = tau, treatment = "X1", verbose = FALSE ## no covariante in the censoring model (independent censoring) eC.wglm <- wglm(Surv(time,event) ~ X1, times = tau, data = dSurv, product.limit = TRUE) -eC.wglm +summary(eC.wglm) + +weights(eC.wglm) ## with covariates in the censoring model eC2.wglm <- wglm(Surv(time,event) ~ X1 + X8, formula.censor = ~ X1*X8, @@ -93,3 +99,11 @@ summary(eCR.wglm) eCR.wglm <- wglm(Surv(time,event) ~ X1, formula.censor = ~X1, times = tau, data = d) } +\seealso{ +\code{\link{coef.wglm}} to output the estimated parameters from the logistic regression. \cr +\code{\link{confint.wglm}} to output the estimated parameters from the logistic regression with their confidence interval. \cr +\code{\link{model.tables.wglm}} to output a data.frame containing the estimated parameters from the logistic regression with its confidence intervals and p-values. \cr +\code{\link{predictRisk.wglm}} to evaluate event probabilities (e.g. survival probabilities) conditional on covariates. \cr +\code{summary.wglm} for displaying in the console a summary of the results. \cr +\code{\link{weights.wglm}} to extract the IPCW weights. +}