Merge pull request #130 from timcadman/dev

Dev
timcadman · Oct 31, 2023 · cf07095 · cf07095
2 parents a9c3ef3 + f3f2439
commit cf07095
Show file tree

Hide file tree

Showing 61 changed files with 4,302 additions and 2,281 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -11,3 +11,5 @@
 ^cran-comments\.md$
 ^codecov\.yml$
 ^man-roxygen
+^doc$
+^Meta$
diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,6 @@
 .Rhistory
 .RData
 .Ruserdata
-**/**/.DS_Store
+**/**/.DS_Store
+/doc/
+/Meta/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: dsHelper
 Type: Package
 Title: Helper Functions for Use with 'DataSHIELD'
-Version: 1.0.3.9000
+Version: 1.1.0
 Description: Often we need to automate things with 'DataSHIELD'. These functions help to do that.
 Authors@R: 
   c(person(given= "Tim", 
@@ -35,7 +35,8 @@ Imports:
   checkmate,
   withr,
   lme4, 
-  webmockr
+  webmockr,
+  tidyselect
 Suggests:
   knitr,
   rmarkdown,

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,18 +1,21 @@
 # Generated by roxygen2: do not edit by hand
 
 export(dh.anyData)
+export(dh.buildModels)
 export(dh.classDiscrepancy)
 export(dh.columnCast)
 export(dh.createTableOne)
 export(dh.defineCases)
 export(dh.dropCols)
 export(dh.findVarsIndex)
 export(dh.getAnonPlotData)
+export(dh.getRmStats)
 export(dh.getStats)
 export(dh.lmTab)
 export(dh.lmeMultPoly)
 export(dh.localProxy)
 export(dh.makeAgePolys)
+export(dh.makeExcludedDf)
 export(dh.makeIQR)
 export(dh.makeLmerForm)
 export(dh.makeOutcome)
@@ -25,26 +28,24 @@ export(dh.multGLM)
 export(dh.predictLmer)
 export(dh.quartileSplit)
 export(dh.renameVars)
+export(dh.stablisedWeights)
 export(dh.subjHasData)
 export(dh.tidyEnv)
 export(dh.trimPredData)
 export(dh.zByGroup)
-export(dt.makeExcludedDf)
 import(lme4)
 importFrom(DSI,datashield.aggregate)
 importFrom(DSI,datashield.assign)
 importFrom(DSI,datashield.connections_find)
-importFrom(checkmate,assert)
 importFrom(checkmate,assert_character)
 importFrom(checkmate,assert_choice)
 importFrom(checkmate,assert_data_frame)
 importFrom(checkmate,assert_list)
 importFrom(checkmate,assert_logical)
+importFrom(checkmate,assert_set_equal)
+importFrom(checkmate,assert_string)
 importFrom(checkmate,assert_subset)
-importFrom(checkmate,check_choice)
-importFrom(checkmate,check_list)
-importFrom(checkmate,check_set_equal)
-importFrom(checkmate,check_string)
+importFrom(checkmate,assert_true)
 importFrom(checkmate,makeAssertCollection)
 importFrom(checkmate,reportAssertions)
 importFrom(dplyr,"%>%")
@@ -83,6 +84,7 @@ importFrom(dplyr,vars)
 importFrom(dsBaseClient,ds.Boole)
 importFrom(dsBaseClient,ds.asCharacter)
 importFrom(dsBaseClient,ds.asFactor)
+importFrom(dsBaseClient,ds.asFactorSimple)
 importFrom(dsBaseClient,ds.asInteger)
 importFrom(dsBaseClient,ds.asNumeric)
 importFrom(dsBaseClient,ds.assign)
@@ -94,6 +96,8 @@ importFrom(dsBaseClient,ds.dataFrame)
 importFrom(dsBaseClient,ds.dataFrameSort)
 importFrom(dsBaseClient,ds.dataFrameSubset)
 importFrom(dsBaseClient,ds.dim)
+importFrom(dsBaseClient,ds.exists)
+importFrom(dsBaseClient,ds.glmPredict)
 importFrom(dsBaseClient,ds.glmSLMA)
 importFrom(dsBaseClient,ds.isNA)
 importFrom(dsBaseClient,ds.length)
@@ -113,7 +117,10 @@ importFrom(dsBaseClient,ds.rep)
 importFrom(dsBaseClient,ds.replaceNA)
 importFrom(dsBaseClient,ds.rm)
 importFrom(dsBaseClient,ds.scatterPlot)
+importFrom(dsBaseClient,ds.summary)
 importFrom(dsBaseClient,ds.table)
+importFrom(dsBaseClient,ds.tapply)
+importFrom(dsBaseClient,ds.tapply.assign)
 importFrom(magrittr,"%<>%")
 importFrom(metafor,rma)
 importFrom(metafor,rma.uni)
@@ -154,6 +161,7 @@ importFrom(tidyr,pivot_wider)
 importFrom(tidyr,replace_na)
 importFrom(tidyr,separate)
 importFrom(tidyr,tibble)
+importFrom(tidyselect,contains)
 importFrom(utils,capture.output)
 importFrom(utils,combn)
 importFrom(utils,getFromNamespace)

diff --git a/R/any-data.R b/R/any-data.R
@@ -42,7 +42,7 @@ dh.anyData <- function(df = NULL, vars = NULL, conns = NULL, checks = TRUE) {
   }
 
   if (checks == TRUE) {
-    .isDefined(df = df, vars = vars, conns = conns)
+    .isDefined(df = df, conns = conns)
   }
 
   # get the lengths

diff --git a/R/build-models.R b/R/build-models.R
@@ -0,0 +1,176 @@
+#' Build Exposure-Outcome Models with Optional Covariates
+#'
+#' This function constructs exposure-outcome models for each cohort using
+#' available exposure and outcome variables, and optionally available covariates.
+#'
+#' @param avail_exp Output from `dh.anyData` detailing available exposures for each cohort.
+#' @param avail_out Output from `dh.anyData` detailing available outcomes  for each cohort.
+#' @param avail_cov Output from `dh.anyData` detailing available exposures for each cohort (optional).
+#' @details The function performs the following steps:
+#'   1. Summarizes available exposure and outcome variables.
+#'   2. Combines available exposure and outcome variables for all cohorts.
+#'   3. Tidies up the combinations.
+#'   4. If covariates are available, it splits combinations by cohort, adds covariates, and creates
+#'   formulas.
+#'   5. Returns the constructed models or formulas.
+#' @return A tibble containing four columns: (i) cohort, (ii) exposure, (iii) outcome, (iv)
+#' regression formula with these combinations. Optionally contains fifth column with list of
+#' covariates if `avail_cov` is not null.
+#' @importFrom dplyr %>% group_by group_split pull rename mutate bind_rows
+#' @importFrom purrr pmap set_names map
+#' @importFrom tibble as_tibble
+#'
+#' @export
+dh.buildModels <- function(avail_exp = NULL, avail_cov = NULL, avail_out = NULL) {
+  exp_avail <- summarise_available(avail_exp)
+  out_avail <- summarise_available(avail_out)
+
+  all_combinations <- combine_exposure_outcome(exp_avail, out_avail)
+
+  tidy_combinations <- tidy_all_combinations(all_combinations)
+
+  if (is.null(avail_cov)) {
+    out <- make_exp_out_form(tidy_combinations)
+  } else {
+    cov_avail <- summarise_available(avail_cov)
+    tidy_combinations_split <- split_combinations_by_cohort(tidy_combinations)
+    comb_with_covariates <- join_covariates(tidy_combinations_split, cov_avail)
+    out <- make_exp_out_cov_form(comb_with_covariates)
+  }
+
+  return(out)
+}
+
+#' Summarise Available Exposure and Outcome Variables
+#'
+#' This subfunction converts the input tibble showing availability of variables into a list
+#' containing the available variables for each cohort.
+#'
+#' @param anydata_obj Output from `dh.anyData`.
+#' @details The function extracts available variables and organizes them by cohort.
+#' @return A list of available variables grouped by cohort.
+#' @noRd
+summarise_available <- function(anydata_obj) {
+  value <- cohort <- . <- NULL
+
+  anydata_obj %>%
+    pmap(function(...) {
+      tmp <- c(...)
+      tmp[which(tmp == TRUE)] %>%
+        names()
+    }) %>%
+    set_names(anydata_obj$variable) %>%
+    map(as_tibble) %>%
+    bind_rows(.id = "variable") %>%
+    dplyr::rename(cohort = value) %>%
+    group_by(cohort) %>%
+    group_split() %>%
+    set_names(map(., ~ .x$cohort[1])) %>%
+    map(~ pull(., variable))
+}
+
+#' Combine Available Exposure and Outcome Variables
+#'
+#' This subfunction takes the output from `summarise_available` and returns a list of dataframes
+#' (length number of cohorts) with all combinations of exposures and outcomes for each cohort
+#'
+#' @param exp_avail A list of available exposure variables for each cohort.
+#' @param out_avail A list of available outcome variables for each cohort.
+#' @return A list of combinations of exposure and outcome variables.
+#' @noRd
+combine_exposure_outcome <- function(exp_avail, out_avail) {
+  combinations <- list(exp_avail, out_avail) %>%
+    pmap(function(.x, .y) {
+      expand.grid(.x, .y)
+    })
+
+  return(combinations)
+}
+
+#' Tidy Up Combinations
+#'
+#' This subfunction tidies up the output of `combine_exposure_outcome`.
+#'
+#' @param all_combinations Output from `combine_exposure_outcome`
+#' @return A tidier tibble
+#' @noRd
+tidy_all_combinations <- function(all_combinations) {
+  cohort <- Var1 <- Var2 <- NULL
+
+  all_combinations %>%
+    bind_rows(.id = "cohort") %>%
+    as_tibble() %>%
+    dplyr::rename(cohort = cohort, exposure = Var1, outcome = Var2)
+}
+
+#' Create Exposure-Outcome Models
+#'
+#' This subfunction creates a character string formula using for exposure-outcome models without
+#' covariates which can serve as input into ds.glmSLMA or ds.glm.
+#'
+#' @param tidy_comb A tibble with tidy combinations of exposure and outcome variables.
+#' @return A tibble containing three columns: (i) cohort, (ii) exposure, (iii) regression formula
+#' with all combinations of available exposure and outcome for each cohort.
+#' @noRd
+make_exp_out_form <- function(tidy_comb) {
+  outcome <- exposure <- NULL
+
+  formula <- tidy_comb %>%
+    mutate(formula = paste0(outcome, "~1+", exposure))
+
+  return(formula)
+}
+
+#' Split Combinations by Cohort
+#'
+#' This subfunction splits the ouput of `tidy_all_combinations` into a list of tibbles grouped by
+#' cohort. We do this because each cohort may have different covariates, so we need to be join the cohort
+#' specific covariates to the cohort-specific models.
+#'
+#' @param tidy_combinations Tidy combinations of exposure and outcome variables.
+#' @return A list tibbles of exposure-outcome combinations split by cohort.
+#' @noRd
+split_combinations_by_cohort <- function(tidy_combinations) {
+  cohort <- . <- NULL
+
+  combinations_split <- tidy_combinations %>%
+    group_by(cohort) %>%
+    group_split() %>%
+    set_names(map(., ~ .x$cohort[1]))
+}
+
+#' Join Covariates with exposure-outcome tibble
+#'
+#' This subfunction joins covariates with exposure-outcome combinations.
+#'
+#' @param comb_split Output from `split_combinations_by_cohort`.
+#' @param cov_avail Output from `summarise_available`, ie list showing available covariates.
+#' @return A tibble containing four columns: (i) cohort, (ii) exposure, (iii) outcome, and (iv)
+#' a list of covariates. This represents all combinations of exposures and outcomes, and the same
+#' list of covariates for each cohort repeated.
+#' @noRd
+join_covariates <- function(comb_split, cov_avail) {
+  list(comb_split, cov_avail) %>%
+    pmap(function(.x, .y) {
+      .x %>% mutate(covariates = list(paste0(.y)))
+    }) %>%
+    bind_rows(.id = "cohort")
+}
+
+#' Create final Exposure-Outcome Models with Covariates
+#'
+#' This subfunction adds formula to output tibble where covariates are present.
+#'
+#' @param comb_with_covariates Output from `join_covariates`.
+#' @return A tibble containing five columns: (i) cohort, (ii) exposure, (iii) outcome, (iv)
+#' list of covariates for each cohort, and (v) regression formula including exposure, outcome and
+#' covariates.
+#' @noRd
+make_exp_out_cov_form <- function(comb_with_covariates) {
+  outcome <- exposure <- covariates <- NULL
+
+  formula <- comb_with_covariates %>%
+    mutate(formula = paste0(outcome, "~1+", exposure, "+", paste0(unlist(covariates), collapse = "+")))
+
+  return(formula)
+}