diff --git a/DESCRIPTION b/DESCRIPTION index 423ed921..44ce6370 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dsHelper Type: Package Title: Helper Functions for Use with 'DataSHIELD' -Version: 0.4.19.9000 +Version: 0.4.19.9001 Description: Often we need to automate things with 'DataSHIELD'. These functions help to do that. Authors@R: c(person(given= "Tim", diff --git a/NAMESPACE b/NAMESPACE index 59aad10a..abd1b145 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(dh.anyData) export(dh.classDiscrepancy) export(dh.columnCast) +export(dh.createTableOne) export(dh.defineCases) export(dh.dropCols) export(dh.findVarsIndex) @@ -19,12 +20,16 @@ export(dh.makeStrata) export(dh.meanByAge) export(dh.meanByGroup) export(dh.metaManual) +export(dh.metaSepModels) +export(dh.multGLM) export(dh.predictLmer) export(dh.quartileSplit) export(dh.renameVars) export(dh.subjHasData) export(dh.tidyEnv) export(dh.trimPredData) +export(dh.zByGroup) +export(dt.makeExcludedDf) importFrom(DSI,datashield.aggregate) importFrom(DSI,datashield.assign) importFrom(DSI,datashield.connections_find) @@ -75,6 +80,7 @@ importFrom(dsBaseClient,ds.dataFrame) importFrom(dsBaseClient,ds.dataFrameSort) importFrom(dsBaseClient,ds.dataFrameSubset) importFrom(dsBaseClient,ds.dim) +importFrom(dsBaseClient,ds.glmSLMA) importFrom(dsBaseClient,ds.isNA) importFrom(dsBaseClient,ds.length) importFrom(dsBaseClient,ds.levels) @@ -96,6 +102,7 @@ importFrom(dsBaseClient,ds.scatterPlot) importFrom(dsBaseClient,ds.table) importFrom(magrittr,"%<>%") importFrom(metafor,rma) +importFrom(metafor,rma.uni) importFrom(purrr,cross2) importFrom(purrr,flatten_chr) importFrom(purrr,flatten_dbl) @@ -115,6 +122,7 @@ importFrom(purrr,pmap_df) importFrom(purrr,set_names) importFrom(rlang,":=") importFrom(rlang,arg_match) +importFrom(rlang,is_bool) importFrom(rlang,quo_name) importFrom(rlang,set_names) importFrom(rlang,sym) @@ -129,8 +137,11 @@ importFrom(tibble,tibble) importFrom(tidyr,pivot_longer) importFrom(tidyr,pivot_wider) importFrom(tidyr,replace_na) +importFrom(tidyr,separate) importFrom(tidyr,tibble) importFrom(utils,capture.output) importFrom(utils,combn) importFrom(utils,getFromNamespace) importFrom(utils,globalVariables) +importFrom(utils,head) +importFrom(utils,tail) diff --git a/R/cast-cols.R b/R/cast-cols.R index c47a911c..ccf51bfe 100644 --- a/R/cast-cols.R +++ b/R/cast-cols.R @@ -13,6 +13,7 @@ #' #' @importFrom dsBaseClient ds.asFactor ds.asCharacter ds.asNumeric ds.asInteger ds.colnames ds.dim ds.rep ds.dataFrameSubset ds.cbind #' @importFrom DSI datashield.connections_find +#' @importFrom utils tail #' #' @return Tibble with a summary of the successful and failed casts #' @export diff --git a/R/create-table-one.R b/R/create-table-one.R new file mode 100644 index 00000000..0417a894 --- /dev/null +++ b/R/create-table-one.R @@ -0,0 +1,658 @@ +#' Creates tables in useful formats for including in manuscripts +#' +#' dh.getStats extracts key statistics and stores them in a clientside list. +#' dh.createTableOne builds on this by formatting the extract stats into a +#' table which can then be included in a manuscript. Flexible formatting options +#' are included. +#' +#' @param stats Exported object from dh.getStats. +#' @param vars Variable to be included in table. +#' @param var_labs Tibble with two columns: 'variable' containing the +#' names of the variables specified in `vars`, and 'var_label' containing the +#' replacement labels for these variables. +#' @param cat_labs Tibble with three columns: 'variable' containing the +#' names of the categorical variables specified in `vars`, 'category' +#' containing the categories of these variabels, and "cat_label" containing +#' the replacement category labels for these variables. +#' @param coh_labs Tibble with two columns: 'cohort' containing the names +#' of all cohorts included in `stats`, and 'cohort_labs' containing the +#' replacement labels for these cohorts. +#' @param type Character specifying which cohorts to include in the table. If +#' "combined" then only combined stats will be returned, if "cohort" then only +#' cohort-specific stats will be returned, if "both" then everything will be +#' returned. +#' @param coh_direction Character specifying direction of data if `type` is +#' 'cohort' or 'both'. Use 'rows' to return cohorts as rows and variable as +#' columns, or use 'cols' to return cohorts as columns and variables as rows. +#' Defauls is "col". +#' @param cont_stats Character specifying which summary statistic to return +#' for continuous stats. Use 'med_iqr' to return the median and interquartile +#' range, use 'mean_sd' to return the mean and standard deviation. Default is +#' "med_iqr". +#' @param inc_missing Boolean specifying whether to return missing values in +#' the output. Use TRUE for yes and FALSE for no. +#' @param round_digits Optionally, the number of decimal places to round output +#' to. Default is 2. +#' @param perc_denom The denominator for percentages. Either 'valid' for valid +#' cases or 'total' for total cases. +#' +#' @return Tibble containing formatted summary statistics. If `coh_direction` is +#' 'cols', the tibble will contain four columns: 'cohort', 'variable', +#' 'category' & value. If `coh_direction` is rows, the tibble will contain the +#' column 'cohort' as well as as columns for all continuous variables and all +#' categories of categorical variables. +#' +#' @family descriptive functions +#' +#' @importFrom dplyr %>% left_join mutate select filter bind_rows +#' @importFrom tidyr pivot_wider pivot_longer +#' @importFrom rlang arg_match is_bool +#' +#' @export +dh.createTableOne <- function(stats = NULL, vars = NULL, var_labs = NULL, + cat_labs = NULL, type = NULL, coh_labs = NULL, + coh_direction = "cols", cont_stats = "med_iqr", + inc_missing = NULL, round_digits = 2, + perc_denom = NULL){ + + variable <- . <- cat_label <- var_label <- cohort <- value <- data_type <- + miss_n_perc <- category <- coh_label <- avail_stats <- vars_list <- + stats_cat <- stats_cont <- old_var <- NULL + + if (is.null(stats)) { + stop("`stats` must not be NULL.", call. = FALSE) + } + + if (is.null(vars)) { + stop("`vars` must not be NULL.", call. = FALSE) + } + + if (is.null(type)) { + stop("`type` must not be NULL.", call. = FALSE) + } + + type <- arg_match(type, c("cohort", "combined", "both")) + + if (is.null(coh_direction)) { + stop("`coh_direction` must not be NULL.", call. = FALSE) + } + + coh_direction <- arg_match(coh_direction, c("rows", "cols")) + + if (is.null(cont_stats)) { + stop("`cont_stats` must not be NULL.", call. = FALSE) + } + + cont_stats <- arg_match(cont_stats, c("med_iqr", "mean_sd")) + + if (is.null(inc_missing)) { + stop("`inc_missing` must not be NULL.", call. = FALSE) + } + + if (is_bool(inc_missing) == FALSE){ + stop("`inc_missing` must be either TRUE or FALSE") + } + + if (is.null(perc_denom)) { + stop("`perc_denom` must not be NULL.", call. = FALSE) + } + + perc_denom <- arg_match(perc_denom, c("valid", "total")) + + stats_types <- .checkAvailStats(stats) + + avail_stats <- .splitStatsVars(stats, stats_types) + + .checkVarsAvail(unlist(avail_stats), vars) + + vars_list <- .splitTargetVars(avail_stats, vars) + + if(length(stats$categorical) > 0) { + + stats_cat <- .formatCatStats(stats, vars_list$categorical, inc_missing, + round_digits, perc_denom) + + } + + if(length(stats$continuous) > 0) { + + stats_cont <- .formatContStats(stats, vars_list$continuous, cont_stats, + inc_missing, round_digits) + + } + + out <- bind_rows(stats_cat, stats_cont) %>% + mutate(variable = factor(variable, levels = vars, ordered = T)) %>% + arrange(variable) + + if(!is.null(cat_labs)){ + + .checkLabType(cat_labs) + .checkLabCols(cat_labs, 3, c("variable", "category", "cat_label")) + .checkLabsMatchCats(stats_cat, cat_labs) + + out <- out %>% + left_join(., cat_labs, by = c("variable", "category")) %>% + mutate(category = cat_label) %>% + dplyr::select(-cat_label) + + } + + if(!is.null(var_labs)){ + + .checkLabType(var_labs) + .checkLabCols(var_labs, 2, c("variable", "var_label")) + .checkLabsMatchVars(vars, var_labs) + + out <- out %>% + left_join(., + var_labs %>% + dplyr::select(variable, var_label) %>% + distinct, by = "variable") %>% + mutate(old_var = variable) %>% + mutate(variable = var_label) %>% + dplyr::select(-var_label) + + } + + if(!is.null(coh_labs)){ + + .checkLabType(coh_labs) + .checkLabCols(coh_labs, 2, c("cohort", "coh_label")) + .checkLabsMatchCoh(stats, coh_labs, stats_types) + + out <- left_join(out, coh_labs, by = "cohort") %>% + dplyr::select(-cohort) %>% + dplyr::rename(cohort = coh_label) %>% + dplyr::select(cohort, everything()) + + } + + if(type == "combined"){ + + out <- out %>% + dplyr::filter(cohort == "combined") %>% + dplyr::select(-data_type, -old_var) + + } else if(type == "cohort"){ + + out <- out %>% + dplyr::filter(cohort != "combined") + + } + + if(type %in% c("cohort", "both")){ + + if(coh_direction == "cols"){ + + if(inc_missing == FALSE){ + + out <- out %>% + pivot_wider( + names_from = cohort, + values_from = value) + + } else if(inc_missing == TRUE){ + + out_cat <- out %>% + dplyr::filter(data_type == "cat") %>% + dplyr::select(-miss_n_perc) %>% + mutate(category = ifelse(is.na(category), "miss_n_perc", category)) %>% + pivot_wider( + names_from = cohort, + values_from = value) + + out_cont <- out %>% + dplyr::filter(data_type == "cont") %>% + pivot_wider( + names_from = cohort, + values_from = value) + + out <- bind_rows(out_cat, out_cont) + + } + + out <- out %>% + dplyr::select(-data_type) + + } else if(coh_direction == "rows"){ + + if(inc_missing == FALSE){ + + out <- out %>% + dplyr::select(-data_type) %>% + pivot_wider( + names_from = c(variable, category), + values_from = value) + + } else if(inc_missing == TRUE){ + + out_cat <- out %>% + dplyr::filter(data_type == "cat") %>% + mutate(category = ifelse(is.na(category), "miss_n_perc", category)) %>% + dplyr::select(-miss_n_perc) + + out_cont <- out %>% + dplyr::filter(data_type == "cont") %>% + dplyr::select(-category) %>% + pivot_longer( + cols = c(value, miss_n_perc), + names_to = "category", + values_to = "value") + + out <- bind_rows(out_cat, out_cont) %>% + dplyr::select(-data_type) %>% + mutate(old_var = factor(old_var, levels = vars, ordered = TRUE)) %>% + arrange(old_var) + + } + + } + + } + + avail_coh <- .availCoh(stats, stats_types) + + avail_coh <- c(avail_coh[!avail_coh == "combined"], "combined") + + if(coh_direction == "rows"){ + + out <- out %>% + mutate(cohort = factor(cohort, levels = avail_coh, ordered = TRUE)) + + } + + # out <- out %>% +# dplyr::select(-old_var) + + return(out) + +} + +#' Identifies whether there are both categorical and continuous stats +#' +#' @param Exported object from dh.getStats +#' +#' @return Character vector containing the names of available stats objects +#' +#' @noRd +.checkAvailStats <- function(stats){ + + var_types <- NULL + +if(length(stats$categorical) > 0 & length(stats$continuous) > 0 ){ + + var_types <- c("categorical", "continuous") + +} else if (length(stats$categorical) > 0 & length(stats$continuous) == 0 ){ + + var_types <- "categorical" + +} else if (length(stats$categorical) == 0 & length(stats$continuous) > 0 ){ + + var_types <- "continuous" + +} + +} + + +#' Extracts variables names from `stats` object. +#' +#' @param Exported object from dh.getStats +#' +#' @return List with two elements named 'categorical' and 'continuous' +#' containing character vectors of respective variable names. +#' +#' @importFrom dplyr %>% pull +#' @importFrom purrr map set_names +#' +#' @noRd +.splitStatsVars <- function(stats, var_types){ + + variable <- NULL + + + + out <- var_types %>% + map(function(x){ + + stats[[x]] %>% pull(variable) %>% unique + + }) %>% + set_names(var_types) + + return(out) + +} + +#' Checks that all the variable names provided to `vars` are available in the +#' object provided to `stats` +#' +#' @param source_vars Vector of variables names extracted from `stats` +#' @param target_vars Vector of variable names provided to `vars` +#' +#' @return Returns an error if all variables in `target_vars` are not included +#' in `source_vars`, otherwise nothing is returned. +#' +#' @noRd +.checkVarsAvail <- function(source_vars, target_vars){ + + avail <- target_vars[target_vars %in% source_vars] + not_avail <- target_vars[!target_vars %in% source_vars] + + if(length(not_avail > 0)){ + + stop( + paste0( + "The following variables provided in `vars` are not present in the + statistics provided in `stats`\n\n", + paste0(not_avail, collapse = ", ")) + ) + + } + +} + +#' Divides variable names provided to `vars` into categorical and continuous +#' using the variable types in `stats` +#' +#' @param stats_vars Output from .splitStatsVars +#' @param target_vars Vector of variable names provided to `vars` +#' +#' @return List with two elements named 'categorical' and 'continuous' +#' containing character vectors of respective variable names. +#' +#' @importFrom dplyr %>% +#' @importFrom purrr map set_names +#' +#' @noRd +.splitTargetVars <- function(stats_vars, target_vars){ + +var_types <- c("categorical", "continuous") + + out <- var_types %>% + map(function(x){ + + stats_vars[[x]][stats_vars[[x]] %in% target_vars] + + }) %>% + set_names(var_types) + + return(out) + +} + +#' Performs the initial formatting of categorical statistics +#' +#' @param stats Exported object from dh.getStats +#' @param vars Character vector of variable names +#' @param inc_missing Boolean specifying whether to return missing values in +#' the output. Use TRUE for yes and FALSE for no. +#' +#' @return Tibble containing 5 columns: 'cohort', 'variable', 'category', +#' 'value' and 'data_type' +#' +#' @importFrom dplyr %>% filter mutate select +#' +#' @noRd +#' +.formatCatStats <- function(stats, vars, inc_missing, round_digits, perc_denom){ + + variable <- value <- perc_valid <- cohort <- category <- perc_total <- NULL + + out <- stats$categorical %>% + dplyr::filter(variable %in% vars) %>% + mutate(perc_valid = signif(perc_valid, round_digits)) + + if(perc_denom == "total"){ + + out <- out %>% + mutate(value = paste0(value, " (", perc_total, ")")) + + } else if(perc_denom == "valid"){ + + out <- out %>% + mutate(value = ifelse( + !is.na(category), + paste0(value, " (", perc_valid, ")") , + paste0(value, " (", perc_total, ")"))) + + } + + if(inc_missing == FALSE){ + + out <- out %>% + dplyr::filter(!is.na(category)) + + } + + out <- out %>% + dplyr::select(cohort, variable, category, value) %>% + mutate(data_type = "cat") %>% + mutate(category = ifelse(is.na(category), "missing", as.character(category))) + + return(out) + +} + +#' Performs the initial formatting of continuous statistics +#' +#' @param stats Exported object from dh.getStats +#' @param vars Character vector of variable names +#' @param cont_stats Character specifying which summary statistic to return +#' for continuous stats. Use 'med_iqr' to return the median and interquartile +#' range, use 'mean_sd' to return the mean and standard deviation. +#' @param inc_missing Boolean specifying whether to return missing values in +#' the output. Use TRUE for yes and FALSE for no. +#' +#' @return Tibble containing 5 columns: 'cohort', 'variable', 'category', +#' 'value' and 'data_type'. If `inc_missing` is TRUE contains an sixth column +#' 'miss_n_perc' +#' +#' @importFrom dplyr %>% filter mutate select +#' +#' @noRd +.formatContStats <- function(stats, vars, cont_stats, inc_missing, round_digits){ + + variable <- std.dev <- perc_50 <- perc_25 <- perc_75 <- perc_95 <- missing_n <- + missing_perc <- cohort <- category <- value <- miss_n_perc <- + data_type <- valid_n <- NULL + + out <- stats$continuous %>% + dplyr::filter(variable %in% vars) %>% + mutate(across(c(mean:perc_95, missing_perc), ~signif(., round_digits))) %>% + mutate(data_type = "cont") %>% + pivot_longer( + cols = c(valid_n, missing_n), + names_to = "category", + values_to = "missing") + + if(cont_stats == "mean_sd"){ + + out <- out %>% + mutate(value = paste0(mean, " \u00b1 ", std.dev)) + + } else if(cont_stats == "med_iqr"){ + + out <- out %>% + mutate(value = paste0(perc_50, " (", perc_25, ",", perc_75, ")")) + + } + + if(inc_missing == TRUE){ + + out <- out %>% + mutate(value = ifelse(category == "missing_n", + paste0(missing, " (", missing_perc, ")"), value)) %>% + dplyr::select(cohort, variable, category, value, miss_n_perc, data_type) %>% + mutate(category = case_when( + category == "valid_n" ~ "value", + category == "missing_n" ~ "missing")) + + } else if(inc_missing == FALSE){ + + out <- out %>% + dplyr::select(cohort, variable, category, value, data_type) + + } + + return(out) + +} + +#' Checks whether the object provided is either a tibble or data frame. +#' +#' @param labs Object provided either to var_labs, cat_labs or coh_labs +#' +#' @return Returns an error of provided variables is not one of these types. +#' Otherwise nothing is returned. +#' +#' @noRd +.checkLabType <- function(labs){ + + check_type <- class(labs) + + if(!any(check_type %in% c("tbl_df", "tbl", "data.frame") == TRUE) == TRUE){ + + stop("Label object must be class tibble or data frame") + + } + +} + +#' Checks that all variables provided to `vars` exist in `var_labs`. +#' +#' @param vars Character vector of variable names +#' @param var_labs Tibble with two columns: 'variable' containing the +#' names of the variables specified in `vars`, and 'var_label' containing the +#' replacement labels for these variables. +#' +#' @return Returns an error if all variables provided to `vars` do not exist in +#' `var_labs`. Otherwise nothing is returned. +#' +#' @noRd +.checkLabsMatchVars <- function(vars, var_labs){ + + missing_vars <- vars[!vars %in% var_labs$variable] + + if(length(missing_vars > 0)){ + + stop( + cat("The following variables are specifed in `vars` but do not have a + corresponding label provided in `var_labs`\n\n", missing_vars)) + + } + +} + +#' Checks that all categorical variables provided to `stats_cat` have +#' corresponding labels provided in `cat_labs` +#' +#' @param stats_cat Object returned from .formatCatStats +#' @param cat_labs Tibble with three columns: 'variable' containing the +#' names of the categorical variables specified in `vars`, 'category' +#' containing the categories of these variabels, and "cat_label" containing +#' the replacement category labels for these variables. +#' +#' @return Returns an error if labels have not been provided for all levels of +#' all categorical variables. +#' +#' @importFrom dplyr left_join %>% filter +#' +#' @noRd +.checkLabsMatchCats <- function(stats_cat, cat_labs){ + + category <- cat_label <- NULL + + test_cats <- left_join(stats_cat, cat_labs, by = c("variable", "category")) + + missing_cats <- test_cats %>% + dplyr::filter(!is.na(category) & is.na(cat_label)) + + if(length(missing_cats > 0)){ + + stop( + "The following categorical variables are included in 'vars' + but do not have a corresponding labels for all categories provided in + `cat_labs`\n\n") + + print(missing_cats) + + } + +} + +#' Checks that the columns provided in labs contain all and only those provided +#' in `required_cols` +#' +#' @param labs Object provided either to var_labs, cat_labs or coh_labs +#' @param n_col Integer specifying the required number of columns +#' @param required_cols Character vector specifying the required column names +#' +#' @return Returns an error if number and names of columns does not match that +#' specified +#' +#' noRd +.checkLabCols <- function(labs, n_col, required_cols){ + + check_cols <- all(required_cols %in% colnames(labs)) + + if(check_cols == FALSE){ + + stop( + paste0("Labels object must be a tibble containing ", n_col, + " columns: ", paste0(required_cols, collapse = ", "))) + + } + +} + +#' Makes character vector of cohorts present within `stats` +#' +#' @param stats Exported object from dh.getStats +#' +#' @return Character vector of cohort names +#' +#' @importFrom dplyr %>% pull +#' +#' @noRd +.availCoh <- function(stats, stats_types){ + + cohort <- NULL + +out <- stats_types %>% + map(function(x){stats[[x]] %>% pull(cohort) %>% unique}) %>% + unlist %>% + unique + +return(out) + +} + +#' Checks that all cohorts contained within `stats` exist in `coh_labs`. +#' +#' @param stats Exported object from dh.getStats +#' @param coh_labs Tibble with two columns: 'cohort' containing the +#' names of the cohorts present in `stats`, and 'coh_label' containing the +#' replacement labels for these cohorts +#' +#' @return Returns an error if all cohorts present in `stats` do not exist in +#' `coh_labs`. Otherwise nothing is returned. +#' +#' @noRd +.checkLabsMatchCoh <- function(stats, coh_labs, stats_types){ + + avail_coh <- .availCoh(stats, stats_types) + + missing_coh_labs <- avail_coh[!avail_coh %in% coh_labs$cohort] + + if(length(missing_coh_labs > 0)){ + + stop( + cat("The following cohorts are contained in `stats` but do not have a + corresponding label provided in `coh_labs`\n\n", missing_coh_labs)) + + } + +} diff --git a/R/define-cases.R b/R/define-cases.R index 9552cea9..648f0f6b 100644 --- a/R/define-cases.R +++ b/R/define-cases.R @@ -33,7 +33,7 @@ #' @export dh.defineCases <- function(df = NULL, vars = NULL, type = NULL, new_obj = NULL, conns = NULL, checks = FALSE, newobj = NULL) { - if (is.null(df)) { + if (is.null(df)) { stop("`df` must not be NULL.", call. = FALSE) } diff --git a/R/drop-cols.R b/R/drop-cols.R index 1aa84a91..8e82bb3c 100644 --- a/R/drop-cols.R +++ b/R/drop-cols.R @@ -27,13 +27,17 @@ #' @importFrom stringr str_subset #' #' @export -dh.dropCols <- function(df = NULL, vars = NULL, new_obj = df, type = NULL, +dh.dropCols <- function(df = NULL, vars = NULL, new_obj = NULL, type = NULL, conns = NULL, checks = TRUE, new_df_name = NULL) { . <- NULL if (is.null(conns)) { conns <- datashield.connections_find() } + + if (is.null(new_obj)) { + new_obj <- df + } if (checks == TRUE) { .isDefined(df = df, vars = vars, conns = conns) diff --git a/R/get-stats.R b/R/get-stats.R index b3c03e19..2bceee92 100644 --- a/R/get-stats.R +++ b/R/get-stats.R @@ -177,6 +177,7 @@ check with ds.class \n\n", ## ---- Check whether these levels are identical for all cohorts --------------- level_ref <- check_levels %>% map(function(x){x[!is.na(x)]}) %>% + map_depth(2, sort) %>% map(unique) %>% map(length) %>% bind_rows() %>% @@ -205,14 +206,19 @@ check with ds.class \n\n", ################################################################################ # 5. Get maximum ns for each cohort ################################################################################ - cohort_ns <- ds.dim(df, type = "split", datasources = conns) %>% + cohort_ns_sep <- ds.dim(df, type = "split", datasources = conns) %>% map_df(~ .[1]) %>% set_names(names(conns)) %>% pivot_longer( cols = everything(), names_to = "cohort", - values_to = "cohort_n" - ) + values_to = "cohort_n") + + cohort_n_comb <- tibble( + cohort = "combined", + cohort_n = sum(cohort_ns_sep$cohort_n)) + + cohort_ns <- bind_rows(cohort_ns_sep, cohort_n_comb) ################################################################################ # 6. Identify variable classes @@ -482,7 +488,6 @@ check with ds.class \n\n", mutate( std.dev = sqrt(EstimatedVar), valid_n = replace_na(Nvalid, 0), - cohort_n = Ntotal, missing_n = cohort_n - valid_n, missing_perc = (missing_n / cohort_n) * 100 ) %>% diff --git a/R/lm-tab.R b/R/lm-tab.R index ece2240c..6a24ffb9 100644 --- a/R/lm-tab.R +++ b/R/lm-tab.R @@ -153,8 +153,22 @@ dh.lmTab <- function(model = NULL, type = NULL, coh_names = NULL, map(~ as_tibble(x = ., rownames = "variable")) %>% bind_rows(.id = "cohort") %>% rename(est = Estimate) %>% - rename(se = "Std. Error") %>% - dplyr::select(cohort, variable, est, se) %>% + rename(se = "Std. Error") + + if(type == "glm_slma"){ + + separate <- separate %>% + dplyr::select(cohort, variable, est, se, "Pr(>|z|)") %>% + dplyr::rename(pvalue = "Pr(>|z|)") + + } else if(type == "lmer_slma"){ + + separate <- separate %>% + mutate(pvalue = NA) + + } + + separate <- separate %>% left_join(., ns, by = "cohort") %>% mutate(n_coh = 1) @@ -236,7 +250,7 @@ dh.lmTab <- function(model = NULL, type = NULL, coh_names = NULL, values_from = value ) %>% mutate(est = paste0(est, " (", lowci, ",", uppci, ")")) %>% - dplyr::select(variable, se, est, pvalue) + dplyr::select(cohort, variable, se, est, pvalue) } if (type == "lmer_slma") { diff --git a/R/lme-mult-poly.R b/R/lme-mult-poly.R index af9ca728..05832a38 100644 --- a/R/lme-mult-poly.R +++ b/R/lme-mult-poly.R @@ -35,8 +35,10 @@ #' @export dh.lmeMultPoly <- function(df = NULL, formulae = NULL, poly_names = NULL, conns = NULL, checks = TRUE) { - sum_log <- NULL + loglik <- model <- study <- log_rank <- . <- av_rank <- loglik_study1 <- + loglik_study2 <- sum_log <- NULL + if (is.null(df)) { stop("`df` must not be NULL.", call. = FALSE) } @@ -57,9 +59,6 @@ dh.lmeMultPoly <- function(df = NULL, formulae = NULL, poly_names = NULL, .isDefined(df = df, conns = conns) } - loglik <- model <- study <- log_rank <- . <- av_rank <- loglik_study1 <- - loglik_study2 <- NULL - ## ---- Run the models --------------------------------------------------------- suppressWarnings( models <- formulae %>% @@ -166,6 +165,9 @@ dh.lmeMultPoly <- function(df = NULL, formulae = NULL, poly_names = NULL, add_row(model = poly_names[fail_tmp == FALSE]) } + ## Set names for the models + models <- models %>% set_names(poly_names) + out <- list(models = models, convergence = problems, fit = fit.tab) return(out) diff --git a/R/make-age-polys.R b/R/make-age-polys.R index 52cee04a..3761f891 100644 --- a/R/make-age-polys.R +++ b/R/make-age-polys.R @@ -31,6 +31,8 @@ dh.makeAgePolys <- function(df = NULL, age_var = NULL, poly_form = c("^-2", "^-1", "^-0.5", "log", "^0.5", "^2", "^3"), poly_names = c("_m_2", "_m_1", "_m_0_5", "log", "_0_5", "_2", "_3"), conns = NULL, checks = TRUE, agevars = NULL) { + + df <- df if (is.null(df)) { stop("`df` must not be NULL.", call. = FALSE) } @@ -63,12 +65,11 @@ dh.makeAgePolys <- function(df = NULL, age_var = NULL, poly_names <- poly_names[str_detect(poly_names, "log") == FALSE] poly_form <- poly_form[str_detect(poly_form, "log") == FALSE] } - df_age <- c(paste0(df, "$", age_var)) + df_age <- paste0(df, "$", age_var) polys <- tibble( poly = cross2(age_var, poly_names) %>% map_chr(paste, sep = "", collapse = ""), - form = cross2(df_age, poly_form) %>% map_chr(paste, sep = "", collapse = "") - ) + form = cross2(df_age, poly_form) %>% map_chr(paste, sep = "", collapse = "")) if (log_yn == TRUE) { polys <- add_row( @@ -87,8 +88,12 @@ dh.makeAgePolys <- function(df = NULL, age_var = NULL, ) }) - ds.cbind(x = c(df, polys$poly), newobj = df) + ds.cbind(x = c(df, polys$poly), newobj = df, datasources = conns) + dh.tidyEnv( + obj = polys$poly, + type = "remove") + cat("\nThe following transformations of age have been created in dataframe:", df, "\n\n", polys$poly) } diff --git a/R/make-excluded-df.R b/R/make-excluded-df.R new file mode 100644 index 00000000..5f28e060 --- /dev/null +++ b/R/make-excluded-df.R @@ -0,0 +1,91 @@ +#' Given df-A & df-B, creates a new df which is the rows in A but not in B +#' +#' When writing a paper often we need to exclude various participants for +#' various reasons. Then we will need a df with all of these excluded +#' participants. This is one way to do it. +#' +#' @param original_df Dataframe containing the full sample +#' @param final_df Dataframe containing the included sample +#' @param type Specifies type of dataframe in `original_df`. Either 'long' or +#' 'wide'. NOTE NOT CURRENTLY FUNCTIONAL - ONLY WORKS FOR WIDE. +#' @template new_obj +#' @template conns +#' +#' @return Creates a serverside dataframe containing the rows from `original_df` +#' that are not contained in `final_df` +#' +#' @importFrom utils head +#' +#' @export +dt.makeExcludedDf <- function(original_df, final_df, new_obj, + type = "wide", conns = NULL){ + + if (is.null(conns)) { + conns <- datashield.connections_find() + } + + dims <- ds.dim(final_df) + + length.ref <- tibble( + cohort = names(conns), + length = dims %>% map(~.x[[1]]) %>% unlist() %>% head(-1) %>% as.character) + + length.ref %>% + pmap(function(cohort, length){ + ds.rep( + x1 = 1, + times = length, + source.x1 = "clientside", + source.times = "c", + newobj = "final_ones", + datasources = conns[cohort]) + }) + + ds.dataFrame( + x = c(paste0(final_df, "$child_id"), "final_ones"), + newobj = "final_tmp") + +## ---- Now merge this vector with baseline_df --------------------------------- + +if(type == "wide"){ + + ds.merge( + x.name = original_df, + y.name = "final_tmp", + by.x.names = "child_id", + by.y.names = "child_id", + all.x = TRUE, + all.y = FALSE, + newobj = "orig_w_vec") + +} else if(type == "long"){ + + ds.merge( + x.name = original_df, + y.name = "final_tmp", + by.x.names = c("child_id", "age"), + by.y.names = c("child_id", "age"), + all.x = TRUE, + all.y = FALSE, + newobj = "orig_w_vec") + +} + +## ---- Transform case_def to binary vector ------------------------------------ +ds.Boole( + V1 = paste0("orig_w_vec$final_ones"), + V2 = 1, + Boolean.operator = "==", + na.assign = "0", + newobj = "exc_vec") + +## ---- Create excluded subset based on this vector ---------------------------- +ds.dataFrameSubset( + df.name = original_df, + V1.name = "exc_vec", + V2.name = "0", + Boolean.operator = "==", + keep.NAs = FALSE, + newobj = new_obj) + +} diff --git a/R/make-lmer-form.R b/R/make-lmer-form.R index 8233093f..dde3e7bb 100644 --- a/R/make-lmer-form.R +++ b/R/make-lmer-form.R @@ -32,7 +32,7 @@ #' #' @export dh.makeLmerForm <- function(outcome = NULL, id_var = NULL, age_vars = NULL, - random = NULL, fixed = NULL, age_interactions = FALSE) { + random = NULL, fixed = NULL, age_interactions = NULL) { if (is.null(outcome)) { stop("`outcome` must not be NULL.", call. = FALSE) } diff --git a/R/mean-by-group.R b/R/mean-by-group.R index c1563488..2677ff93 100644 --- a/R/mean-by-group.R +++ b/R/mean-by-group.R @@ -10,7 +10,8 @@ #' @importFrom dsBaseClient ds.meanSdGp #' @importFrom purrr map #' @importFrom tibble as_tibble -#' @importFrom dplyr bind_rows %>% slice +#' @importFrom dplyr bind_rows %>% slice +#' @importFrom tidyr separate #' #' @template conns #' @template df @@ -30,7 +31,8 @@ dh.meanByGroup <- function(df = NULL, outcome = NULL, group_var = NULL, intervals = NULL, conns = NULL, checks = FALSE) { value <- op <- tmp <- varname <- new_df_name <- age <- group <- cohort <- - . <- enough_obs <- variable <- NULL + . <- enough_obs <- variable <- level <- std.dev <- nvalid <- ntotal <- + x <- nvalid_2 <- nvalid_1 <- mean_2 <- std.dev_2 <- nmissing <- NULL if (is.null(df)) { stop("`df` must not be NULL.", call. = FALSE) @@ -184,21 +186,32 @@ paste0(warnings$issues$cohort, collapse = ", ") datasources = conns[cohort]) }) + ## Now we take these values and put them into a neater table out <- obs_by_agecat_comb %>% - map(function(x) { - x$Mean_gp %>% - as_tibble(rownames = "group") %>% - slice(2) %>% - pivot_longer( - cols = -group, - names_to = "cohort", - values_to = "mean")}) %>% + map(function(x){ + + tibble( + group = dimnames(x$Mean_gp_study)[[1]], + mean = as.numeric(x$Mean_gp_study), + std.dev = as.numeric(x$StDev_gp_study), + nvalid = as.numeric(x$Nvalid_gp_study), + cohort = colnames(x$Mean_gp_study)) + }) %>% bind_rows() %>% - mutate(group = str_remove(group, "_[^_]+$")) %>% + separate(group, into = c("group", "level"), sep="_(?=[^_]+$)") %>% + pivot_wider( + names_from = "level", + values_from = c("nvalid", "mean", "std.dev")) %>% + dplyr::rename( + "nvalid" = nvalid_2, + "nmissing" = nvalid_1, + "mean" = mean_2, + "std.dev" = std.dev_2) %>% mutate(group = str_remove(group, "grp_")) %>% - dplyr::select(cohort, group, mean) - + dplyr::select(cohort, group, mean, std.dev, nvalid, nmissing) + + ## ---- Remove temporary objects ------------------------------------------- dh.tidyEnv( obj = c(cats$new_df_name, assign_conditions$varname), diff --git a/R/meta-sep-models.R b/R/meta-sep-models.R new file mode 100644 index 00000000..d9f6954d --- /dev/null +++ b/R/meta-sep-models.R @@ -0,0 +1,170 @@ +#' Function in progress to meta-analyse separate models. +#' +#' @param ref Tibble, output from dh.multGlm. +#' @param exp Logical, whether to exponentiate coefficients after meta-analysis +#' @param method Method of meta-analysing coefficients. +#' @param output Character; "cohort" to return cohort coefficients only, "meta" +#' to return meta-analysed coefficients only, "both" to return both. +#' +#' @return A tibble +#' +#' @family model building +#' +#' @importFrom rlang arg_match +#' @importFrom purrr pmap set_names map pmap_df map_int +#' @importFrom dplyr %>% bind_rows group_by group_keys group_split mutate select +#' left_join +#' @importFrom metafor rma.uni +#' @importFrom tibble tibble +#' +#' @export +dh.metaSepModels <- function(ref = NULL, exp = NULL, method = NULL, + output = "both"){ + + exposure <- variable <- cohort <- . <- est <- lowci <- uppci <- + model_id <- n_obs <- se <- NULL + + method <- arg_match( + arg = method, + values = c("DL", "HE", "HS", "HSk", "SJ", "ML", "REML", "EB", "PM", "GENQ")) + + output <- arg_match( + arg = output, + values = c("meta", "cohort", "both")) + + if(output %in% c("meta", "both") == TRUE){ + ## ---- Get coefficients ----------------------------------------------------- + model_coefs <- ref %>% + pmap(function(cohort, fit, ...){ + + dh.lmTab( + model = fit, + coh_names = cohort, + type = "glm_slma", + ci_format = "separate", + direction = "wide", + family = "binomial", + digits = 50) %>% + dplyr::filter(cohort != "combined") + }) %>% + set_names(ref$model_id) %>% + bind_rows(.id = "model_id") + + ## ---- Create tibble respecting grouping order ------------------------------ + model_holder <- model_coefs %>% + group_by(model_id, variable) %>% + group_keys + + ## ---- Meta-analyse --------------------------------------------------------- + ma.fit <- model_coefs %>% + group_by(model_id, variable) %>% + group_split() %>% + map( + ~rma.uni( + yi = .x$est, + sei = .x$se, + method = method, + control=list(stepadj=0.5, maxiter=1000))) + + ## ---- Put back together ---------------------------------------------------- + model_holder <- model_holder %>% + mutate(meta = ma.fit) + + ma.out <- model_holder %>% + pmap_df(function(model_id, variable, meta){ + + tibble( + model_id = model_id, + variable = variable, + est = meta$beta[1], + se = meta$se, + lowci = meta$ci.lb, + uppci = meta$ci.ub, + i2 = round(meta$I2, 2), + t2 = round(meta$tau2, 2), + q = round(meta$QE, 2), + q_p = round(meta$QEp, 2), + k = round(meta$k, 2), + p = round(meta$p, 2), + t2_se = round(meta$se.tau2, 2), + n_coh = meta$k) + + }) %>% + mutate(metafor_obj = model_holder$meta) + + } + + ## ---- Calculate combined sample size --------------------------------------- + sample_n_comb <- model_coefs %>% + group_by(model_id, variable) %>% + group_split %>% + map(function(x){ + + x %>% + mutate(n_obs = sum(x$n_obs)) %>% + dplyr::select(model_id, variable, n_obs) %>% + slice(1) + }) %>% + bind_rows + + ma.out <- left_join(ma.out, sample_n_comb, by = c("model_id", "variable")) %>% + mutate(cohort = "combined") + + if(output %in% c("cohort", "both") == TRUE){ + + coh.out <- ref %>% + pmap(function(model, cohort, fit, ...){ + + dh.lmTab( + model = fit, + coh_names = cohort, + type = "glm_slma", + direction = "wide", + family = "binomial", + ci_format = "separate", + digits = 50) %>% + dplyr::filter(cohort != "combined") + + }) %>% + set_names(ref$model_id) %>% + bind_rows(.id = "model_id") + + } + + if(output == "both"){ + + both.out <- coh.out %>% + mutate( + i2 = NA, + metafor_obj = NA, + weight = 1/(se^2)) %>% + group_by(model_id, variable) %>% + group_split() %>% + map(~mutate(., weight_scaled = (weight / sum(weight)*100))) %>% + bind_rows %>% + ungroup + + out <- bind_rows(both.out, ma.out) + + } else if(output == "cohort"){ + + out <- coh.out + + } else if(output == "meta"){ + + out <- ma.out + + } + + if(exp == TRUE){ + + out <- out %>% + mutate(across(c(est, lowci, uppci), ~exp(.))) + + } + + out <- left_join(out, ref, by = c("model_id", "cohort")) + + return(out) + +} \ No newline at end of file diff --git a/R/multGlm.R b/R/multGlm.R new file mode 100644 index 00000000..9ed55e60 --- /dev/null +++ b/R/multGlm.R @@ -0,0 +1,101 @@ +#' Loop multiple GLM models and handle errors & non-convergence +#' +#' This function allows you to fit different glm models to different cohorts, +#' for example if you want to include different covariates for different +#' cohorts. +#' +#' @importFrom dplyr %>% bind_cols mutate filter +#' @importFrom dsBaseClient ds.glmSLMA +#' @importFrom purrr map pmap +#' @importFrom stringr str_detect +#' +#' @template df +#' @param ref reference tibble, output from dh.buildModels. +#' @param vary_df Option to provide different df for different models. Default +#' is FALSE. +#' @param family Family to use in glm models. Default is "gaussian". +#' @template checks +#' @template conns +#' +#' @return Tibble containing five columns: +#' * model = Description of model fit, taken from `model_name`. +#' * formula = Formula for this model, taken from `formulae`. +#' * cohort = Cohort for which this model was fit, taken from `cohort`. +#' * fit = Output from glm model. +#' * converged = Logical; whether or not the model converged/fit'. +#' +#' @family analysis functions +#' @md +#' +#' @export +dh.multGLM <- function(df = NULL, ref = NULL, checks = TRUE, conns = NULL, + vary_df = F, family = "gaussian"){ + formulae <- model_names <- cohort <- converged <- NULL + + if (vary_df == F & is.null(df)) { + stop("`df` must not be NULL.", call. = FALSE) + } + + if (is.null(ref)) { + stop("`ref` must not be NULL.", call. = FALSE) + } + + if (is.null(conns)) { + conns <- datashield.connections_find() + } + + ## ---- Sort out df ---------------------------------------------------------- + if(vary_df == F){ + + ref <- ref %>% mutate(df = df) + + } + + ## ---- Run the models ------------------------------------------------------- + suppressWarnings( + models <- ref %>% + pmap(function(formula, cohort, df, ...){ + + tryCatch( + { + dsBaseClient::ds.glmSLMA( + formula = unlist(formula), + dataName = df, + family = family, + combine.with.metafor = TRUE, + datasources = conns[cohort] + ) + }, + error = function(error_message) { + out <- list("failed", error_message) + return(out) + } + ) + }) + ) + + ## ---- Identify models which failed completely ------------------------------ + fail_tmp <- models %>% + map(~ .[[1]][[1]][[1]]) %>% + str_detect("POTENTIALLY DISCLOSIVE|failed", negate = TRUE) + + out <- ref %>% + mutate( + fit = models, + converged = fail_tmp) + + problems <- out %>% + dplyr::filter(converged == FALSE) + + ## ---- Summarise convergence info --------------------------------------------- + if (all(out$converged == FALSE)) { + warning("All models failed. Check 'convergence' table for more details") + } + + if (any(out$converged == TRUE) & any(out$converged == FALSE)) { + warning("Some models threw an error message. Check 'converged' column for + more details") + } + + return(out) +} diff --git a/R/predict-lmer.R b/R/predict-lmer.R index ce0d7c84..65bf8ada 100644 --- a/R/predict-lmer.R +++ b/R/predict-lmer.R @@ -47,18 +47,21 @@ dh.predictLmer <- function(model = NULL, new_data = NULL, coh_names = NULL, } ## ---- First we add a column to the new data for the intercept ---------------- + if("intercept" %in% colnames(new_data) == FALSE){ + new_data <- new_data %>% mutate(intercept = 1) %>% select(intercept, everything()) - ## ---- First we extract coefficients ------------------------------------------ + } + + ## ---- Then we extract coefficients ------------------------------------------ coefs <- dh.lmTab( model = model, type = "lmer_slma", coh_names = coh_names, direction = "long", - ci_format = "separate" - ) + ci_format = "separate") ## ---- Now we get the names of coefficients which aren't the intercept -------- coef_names <- coefs$fixed %>% @@ -67,10 +70,10 @@ dh.predictLmer <- function(model = NULL, new_data = NULL, coh_names = NULL, ## ---- Now we get the coefficients for each cohort ---------------------------- coefs_by_cohort <- coefs$fixed %>% + dplyr::select(cohort, variable, coefficient, value) %>% pivot_wider( names_from = variable, - values_from = value - ) %>% + values_from = value) %>% dplyr::filter(coefficient == "est") %>% select(-coefficient) %>% group_by(cohort) @@ -103,7 +106,6 @@ dh.predictLmer <- function(model = NULL, new_data = NULL, coh_names = NULL, mutate(predicted = rowSums(x)) }) - ## ---- Now get the standard errors ------------------------------------------------ nstudy <- seq(1, model$num.valid.studies, 1) study_ref <- paste0("study", nstudy) diff --git a/R/utils.R b/R/utils.R index 9482c289..310bb9c2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -133,7 +133,7 @@ isDefined <- function(datasources = NULL, obj = NULL, error.message = TRUE) { #' Convert strings indicating boolean operators to symbols. Used in functions #' make-strata and quantile-split. #' -#' @param boole_string +#' @param boole_string Argument string to be converted to operators. #' @importFrom dplyr case_when #' @noRd .convertBooleText <- function(boole_string){ diff --git a/R/z-score-by-group.R b/R/z-score-by-group.R new file mode 100644 index 00000000..c9975e80 --- /dev/null +++ b/R/z-score-by-group.R @@ -0,0 +1,69 @@ +#' Creates z-scores within specified bands +#' +#' Especially with mental health outcomes, we often want to transform raw scores +#' into z-scores, but within certain age bands (e.g. measurement occasion or +#' per year). This function does this. +#' +#' @template df +#' @param out_var Variable to make z-scores for +#' @param age_var Age variable +#' @param low_band Lower band for z score +#' @param upp_band Upper band for z score +#' @template new_obj +#' @template conns +#' +#' @return Z score serverside within specified bands +#' +#' @importFrom tibble tibble +#' @importFrom dplyr %>% +#' @importFrom purrr pmap cross2 map_chr +#' @importFrom dsBaseClient ds.cbind +#' @importFrom DSI datashield.connections_find +#' +#' @family trajectory functions +#' @family data manipulation functions +#' +#' @export +dh.zByGroup <- function(df, out_var, age_var, low_band, upp_band, conns, new_obj){ + + .BooleTwoConditions( + df = df, + var = "age", + value_1 = low_band, + value_2 = upp_band, + op_1 = ">=", + op_2 = "<", + newobj = "tmp_1", + conns = conns) + + ds.recodeValues( + var.name = "tmp_1", + values2replace.vector = 0, + new.values.vector = NA, + newobj = "tmp_2", + datasources = conns) + + ref <- dh.meanByGroup( + df = df, + outcome = out_var, + group_var = age_var, + intervals = c(low_band, upp_band), + checks = F, + conns = conns) + + assign_form <- paste0( + "((", + paste0(df, "$", out_var), + "-", ref$mean, ")", "/", ref$std.dev, ")") + + ds.assign( + toAssign = assign_form, + newobj = "tmp_3", + datasources = conns) + + ds.assign( + toAssign = paste0("tmp_2*tmp_3"), + newobj = new_obj, + datasources = conns) + +} \ No newline at end of file diff --git a/man/dh.anyData.Rd b/man/dh.anyData.Rd index 42f27d72..acdae9d6 100644 --- a/man/dh.anyData.Rd +++ b/man/dh.anyData.Rd @@ -30,6 +30,7 @@ generates a look-up table with this information. \seealso{ Other descriptive functions: \code{\link{dh.classDiscrepancy}()}, +\code{\link{dh.createTableOne}()}, \code{\link{dh.defineCases}()}, \code{\link{dh.getStats}()}, \code{\link{dh.lmTab}()}, diff --git a/man/dh.classDiscrepancy.Rd b/man/dh.classDiscrepancy.Rd index c436ec6d..7795ca24 100644 --- a/man/dh.classDiscrepancy.Rd +++ b/man/dh.classDiscrepancy.Rd @@ -33,6 +33,7 @@ the class of each variable. \seealso{ Other descriptive functions: \code{\link{dh.anyData}()}, +\code{\link{dh.createTableOne}()}, \code{\link{dh.defineCases}()}, \code{\link{dh.getStats}()}, \code{\link{dh.lmTab}()}, diff --git a/man/dh.createTableOne.Rd b/man/dh.createTableOne.Rd new file mode 100644 index 00000000..c6ca3597 --- /dev/null +++ b/man/dh.createTableOne.Rd @@ -0,0 +1,85 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create-table-one.R +\name{dh.createTableOne} +\alias{dh.createTableOne} +\title{Creates tables in useful formats for including in manuscripts} +\usage{ +dh.createTableOne( + stats = NULL, + vars = NULL, + var_labs = NULL, + cat_labs = NULL, + type = NULL, + coh_labs = NULL, + coh_direction = "cols", + cont_stats = "med_iqr", + inc_missing = NULL, + round_digits = 2, + perc_denom = NULL +) +} +\arguments{ +\item{stats}{Exported object from dh.getStats.} + +\item{vars}{Variable to be included in table.} + +\item{var_labs}{Tibble with two columns: 'variable' containing the +names of the variables specified in `vars`, and 'var_label' containing the +replacement labels for these variables.} + +\item{cat_labs}{Tibble with three columns: 'variable' containing the +names of the categorical variables specified in `vars`, 'category' +containing the categories of these variabels, and "cat_label" containing +the replacement category labels for these variables.} + +\item{type}{Character specifying which cohorts to include in the table. If +"combined" then only combined stats will be returned, if "cohort" then only +cohort-specific stats will be returned, if "both" then everything will be +returned.} + +\item{coh_labs}{Tibble with two columns: 'cohort' containing the names +of all cohorts included in `stats`, and 'cohort_labs' containing the +replacement labels for these cohorts.} + +\item{coh_direction}{Character specifying direction of data if `type` is +'cohort' or 'both'. Use 'rows' to return cohorts as rows and variable as +columns, or use 'cols' to return cohorts as columns and variables as rows. +Defauls is "col".} + +\item{cont_stats}{Character specifying which summary statistic to return +for continuous stats. Use 'med_iqr' to return the median and interquartile +range, use 'mean_sd' to return the mean and standard deviation. Default is +"med_iqr".} + +\item{inc_missing}{Boolean specifying whether to return missing values in +the output. Use TRUE for yes and FALSE for no.} + +\item{round_digits}{Optionally, the number of decimal places to round output +to. Default is 2.} + +\item{perc_denom}{The denominator for percentages. Either 'valid' for valid +cases or 'total' for total cases.} +} +\value{ +Tibble containing formatted summary statistics. If `coh_direction` is +'cols', the tibble will contain four columns: 'cohort', 'variable', +'category' & value. If `coh_direction` is rows, the tibble will contain the +column 'cohort' as well as as columns for all continuous variables and all +categories of categorical variables. +} +\description{ +dh.getStats extracts key statistics and stores them in a clientside list. +dh.createTableOne builds on this by formatting the extract stats into a +table which can then be included in a manuscript. Flexible formatting options +are included. +} +\seealso{ +Other descriptive functions: +\code{\link{dh.anyData}()}, +\code{\link{dh.classDiscrepancy}()}, +\code{\link{dh.defineCases}()}, +\code{\link{dh.getStats}()}, +\code{\link{dh.lmTab}()}, +\code{\link{dh.meanByGroup}()} +} +\concept{descriptive functions} diff --git a/man/dh.defineCases.Rd b/man/dh.defineCases.Rd index c5ef8945..e4613a3b 100644 --- a/man/dh.defineCases.Rd +++ b/man/dh.defineCases.Rd @@ -51,6 +51,7 @@ This function replaces the deprecated dh.subjHasData. Other descriptive functions: \code{\link{dh.anyData}()}, \code{\link{dh.classDiscrepancy}()}, +\code{\link{dh.createTableOne}()}, \code{\link{dh.getStats}()}, \code{\link{dh.lmTab}()}, \code{\link{dh.meanByGroup}()} diff --git a/man/dh.dropCols.Rd b/man/dh.dropCols.Rd index 99c4e66c..70b55089 100644 --- a/man/dh.dropCols.Rd +++ b/man/dh.dropCols.Rd @@ -7,7 +7,7 @@ dh.dropCols( df = NULL, vars = NULL, - new_obj = df, + new_obj = NULL, type = NULL, conns = NULL, checks = TRUE, @@ -49,6 +49,7 @@ Other data manipulation functions: \code{\link{dh.makeStrata}()}, \code{\link{dh.quartileSplit}()}, \code{\link{dh.renameVars}()}, -\code{\link{dh.tidyEnv}()} +\code{\link{dh.tidyEnv}()}, +\code{\link{dh.zByGroup}()} } \concept{data manipulation functions} diff --git a/man/dh.getStats.Rd b/man/dh.getStats.Rd index 49c251a4..3ebf73cd 100644 --- a/man/dh.getStats.Rd +++ b/man/dh.getStats.Rd @@ -70,6 +70,7 @@ dh.getStats will return the variable for that cohort with all NAs. See Other descriptive functions: \code{\link{dh.anyData}()}, \code{\link{dh.classDiscrepancy}()}, +\code{\link{dh.createTableOne}()}, \code{\link{dh.defineCases}()}, \code{\link{dh.lmTab}()}, \code{\link{dh.meanByGroup}()} diff --git a/man/dh.lmTab.Rd b/man/dh.lmTab.Rd index ca639920..96f0d34d 100644 --- a/man/dh.lmTab.Rd +++ b/man/dh.lmTab.Rd @@ -74,6 +74,7 @@ ds.glm, ds.glmSLMA and ds.lmerSLMA objects. Other descriptive functions: \code{\link{dh.anyData}()}, \code{\link{dh.classDiscrepancy}()}, +\code{\link{dh.createTableOne}()}, \code{\link{dh.defineCases}()}, \code{\link{dh.getStats}()}, \code{\link{dh.meanByGroup}()} diff --git a/man/dh.lmeMultPoly.Rd b/man/dh.lmeMultPoly.Rd index 552c73a8..df7df7a3 100644 --- a/man/dh.lmeMultPoly.Rd +++ b/man/dh.lmeMultPoly.Rd @@ -46,6 +46,7 @@ Other trajectory functions: \code{\link{dh.makeAgePolys}()}, \code{\link{dh.makeLmerForm}()}, \code{\link{dh.predictLmer}()}, -\code{\link{dh.trimPredData}()} +\code{\link{dh.trimPredData}()}, +\code{\link{dh.zByGroup}()} } \concept{trajectory functions} diff --git a/man/dh.makeAgePolys.Rd b/man/dh.makeAgePolys.Rd index 47470799..6623c8b1 100644 --- a/man/dh.makeAgePolys.Rd +++ b/man/dh.makeAgePolys.Rd @@ -46,7 +46,8 @@ Other trajectory functions: \code{\link{dh.lmeMultPoly}()}, \code{\link{dh.makeLmerForm}()}, \code{\link{dh.predictLmer}()}, -\code{\link{dh.trimPredData}()} +\code{\link{dh.trimPredData}()}, +\code{\link{dh.zByGroup}()} Other data manipulation functions: \code{\link{dh.dropCols}()}, @@ -54,7 +55,8 @@ Other data manipulation functions: \code{\link{dh.makeStrata}()}, \code{\link{dh.quartileSplit}()}, \code{\link{dh.renameVars}()}, -\code{\link{dh.tidyEnv}()} +\code{\link{dh.tidyEnv}()}, +\code{\link{dh.zByGroup}()} } \concept{data manipulation functions} \concept{trajectory functions} diff --git a/man/dh.makeIQR.Rd b/man/dh.makeIQR.Rd index 502570af..254b8562 100644 --- a/man/dh.makeIQR.Rd +++ b/man/dh.makeIQR.Rd @@ -49,6 +49,7 @@ Other data manipulation functions: \code{\link{dh.makeStrata}()}, \code{\link{dh.quartileSplit}()}, \code{\link{dh.renameVars}()}, -\code{\link{dh.tidyEnv}()} +\code{\link{dh.tidyEnv}()}, +\code{\link{dh.zByGroup}()} } \concept{data manipulation functions} diff --git a/man/dh.makeLmerForm.Rd b/man/dh.makeLmerForm.Rd index 1b19898f..c74fbef5 100644 --- a/man/dh.makeLmerForm.Rd +++ b/man/dh.makeLmerForm.Rd @@ -10,7 +10,7 @@ dh.makeLmerForm( age_vars = NULL, random = NULL, fixed = NULL, - age_interactions = FALSE + age_interactions = NULL ) } \arguments{ @@ -50,6 +50,7 @@ Other trajectory functions: \code{\link{dh.lmeMultPoly}()}, \code{\link{dh.makeAgePolys}()}, \code{\link{dh.predictLmer}()}, -\code{\link{dh.trimPredData}()} +\code{\link{dh.trimPredData}()}, +\code{\link{dh.zByGroup}()} } \concept{trajectory functions} diff --git a/man/dh.makeStrata.Rd b/man/dh.makeStrata.Rd index 39ab414a..8ccee904 100644 --- a/man/dh.makeStrata.Rd +++ b/man/dh.makeStrata.Rd @@ -113,6 +113,7 @@ Other data manipulation functions: \code{\link{dh.makeIQR}()}, \code{\link{dh.quartileSplit}()}, \code{\link{dh.renameVars}()}, -\code{\link{dh.tidyEnv}()} +\code{\link{dh.tidyEnv}()}, +\code{\link{dh.zByGroup}()} } \concept{data manipulation functions} diff --git a/man/dh.meanByGroup.Rd b/man/dh.meanByGroup.Rd index 5b38303f..8c856e31 100644 --- a/man/dh.meanByGroup.Rd +++ b/man/dh.meanByGroup.Rd @@ -44,6 +44,7 @@ variable, which may not always be what is required. Other descriptive functions: \code{\link{dh.anyData}()}, \code{\link{dh.classDiscrepancy}()}, +\code{\link{dh.createTableOne}()}, \code{\link{dh.defineCases}()}, \code{\link{dh.getStats}()}, \code{\link{dh.lmTab}()} diff --git a/man/dh.metaSepModels.Rd b/man/dh.metaSepModels.Rd new file mode 100644 index 00000000..c8ae45dd --- /dev/null +++ b/man/dh.metaSepModels.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meta-sep-models.R +\name{dh.metaSepModels} +\alias{dh.metaSepModels} +\title{Function in progress to meta-analyse separate models.} +\usage{ +dh.metaSepModels(ref = NULL, exp = NULL, method = NULL, output = "both") +} +\arguments{ +\item{ref}{Tibble, output from dh.multGlm.} + +\item{exp}{Logical, whether to exponentiate coefficients after meta-analysis} + +\item{method}{Method of meta-analysing coefficients.} + +\item{output}{Character; "cohort" to return cohort coefficients only, "meta" +to return meta-analysed coefficients only, "both" to return both.} +} +\value{ +A tibble +} +\description{ +Function in progress to meta-analyse separate models. +} +\concept{model building} diff --git a/man/dh.multGLM.Rd b/man/dh.multGLM.Rd new file mode 100644 index 00000000..7b9d0e71 --- /dev/null +++ b/man/dh.multGLM.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/multGlm.R +\name{dh.multGLM} +\alias{dh.multGLM} +\title{Loop multiple GLM models and handle errors & non-convergence} +\usage{ +dh.multGLM( + df = NULL, + ref = NULL, + checks = TRUE, + conns = NULL, + vary_df = F, + family = "gaussian" +) +} +\arguments{ +\item{df}{Character specifying a server-side data frame.} + +\item{ref}{reference tibble, output from dh.buildModels.} + +\item{checks}{Logical; if TRUE checks are performed prior to running the +function. Default is TRUE.} + +\item{conns}{DataSHIELD connections object.} + +\item{vary_df}{Option to provide different df for different models. Default +is FALSE.} + +\item{family}{Family to use in glm models. Default is "gaussian".} +} +\value{ +Tibble containing five columns: +\itemize{ +\item model = Description of model fit, taken from \code{model_name}. +\item formula = Formula for this model, taken from \code{formulae}. +\item cohort = Cohort for which this model was fit, taken from \code{cohort}. +\item fit = Output from glm model. +\item converged = Logical; whether or not the model converged/fit'. +} +} +\description{ +This function allows you to fit different glm models to different cohorts, +for example if you want to include different covariates for different +cohorts. +} +\concept{analysis functions} diff --git a/man/dh.predictLmer.Rd b/man/dh.predictLmer.Rd index efcf12b6..1368db9c 100644 --- a/man/dh.predictLmer.Rd +++ b/man/dh.predictLmer.Rd @@ -34,6 +34,7 @@ Other trajectory functions: \code{\link{dh.lmeMultPoly}()}, \code{\link{dh.makeAgePolys}()}, \code{\link{dh.makeLmerForm}()}, -\code{\link{dh.trimPredData}()} +\code{\link{dh.trimPredData}()}, +\code{\link{dh.zByGroup}()} } \concept{trajectory functions} diff --git a/man/dh.quartileSplit.Rd b/man/dh.quartileSplit.Rd index 599bc625..c5535f8e 100644 --- a/man/dh.quartileSplit.Rd +++ b/man/dh.quartileSplit.Rd @@ -62,6 +62,7 @@ Other data manipulation functions: \code{\link{dh.makeIQR}()}, \code{\link{dh.makeStrata}()}, \code{\link{dh.renameVars}()}, -\code{\link{dh.tidyEnv}()} +\code{\link{dh.tidyEnv}()}, +\code{\link{dh.zByGroup}()} } \concept{data manipulation functions} diff --git a/man/dh.renameVars.Rd b/man/dh.renameVars.Rd index 67531fe3..79290106 100644 --- a/man/dh.renameVars.Rd +++ b/man/dh.renameVars.Rd @@ -40,6 +40,7 @@ Other data manipulation functions: \code{\link{dh.makeIQR}()}, \code{\link{dh.makeStrata}()}, \code{\link{dh.quartileSplit}()}, -\code{\link{dh.tidyEnv}()} +\code{\link{dh.tidyEnv}()}, +\code{\link{dh.zByGroup}()} } \concept{data manipulation functions} diff --git a/man/dh.tidyEnv.Rd b/man/dh.tidyEnv.Rd index bc4fd3c4..2c06f233 100644 --- a/man/dh.tidyEnv.Rd +++ b/man/dh.tidyEnv.Rd @@ -28,6 +28,7 @@ Other data manipulation functions: \code{\link{dh.makeIQR}()}, \code{\link{dh.makeStrata}()}, \code{\link{dh.quartileSplit}()}, -\code{\link{dh.renameVars}()} +\code{\link{dh.renameVars}()}, +\code{\link{dh.zByGroup}()} } \concept{data manipulation functions} diff --git a/man/dh.trimPredData.Rd b/man/dh.trimPredData.Rd index 454882e7..7db043d3 100644 --- a/man/dh.trimPredData.Rd +++ b/man/dh.trimPredData.Rd @@ -42,6 +42,7 @@ Other trajectory functions: \code{\link{dh.lmeMultPoly}()}, \code{\link{dh.makeAgePolys}()}, \code{\link{dh.makeLmerForm}()}, -\code{\link{dh.predictLmer}()} +\code{\link{dh.predictLmer}()}, +\code{\link{dh.zByGroup}()} } \concept{trajectory functions} diff --git a/man/dh.zByGroup.Rd b/man/dh.zByGroup.Rd new file mode 100644 index 00000000..dd456502 --- /dev/null +++ b/man/dh.zByGroup.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/z-score-by-group.R +\name{dh.zByGroup} +\alias{dh.zByGroup} +\title{Creates z-scores within specified bands} +\usage{ +dh.zByGroup(df, out_var, age_var, low_band, upp_band, conns, new_obj) +} +\arguments{ +\item{df}{Character specifying a server-side data frame.} + +\item{out_var}{Variable to make z-scores for} + +\item{age_var}{Age variable} + +\item{low_band}{Lower band for z score} + +\item{upp_band}{Upper band for z score} + +\item{conns}{DataSHIELD connections object.} + +\item{new_obj}{Character specifying name for created serverside object.} +} +\value{ +Z score serverside within specified bands +} +\description{ +Especially with mental health outcomes, we often want to transform raw scores +into z-scores, but within certain age bands (e.g. measurement occasion or +per year). This function does this. +} +\seealso{ +Other trajectory functions: +\code{\link{dh.lmeMultPoly}()}, +\code{\link{dh.makeAgePolys}()}, +\code{\link{dh.makeLmerForm}()}, +\code{\link{dh.predictLmer}()}, +\code{\link{dh.trimPredData}()} + +Other data manipulation functions: +\code{\link{dh.dropCols}()}, +\code{\link{dh.makeAgePolys}()}, +\code{\link{dh.makeIQR}()}, +\code{\link{dh.makeStrata}()}, +\code{\link{dh.quartileSplit}()}, +\code{\link{dh.renameVars}()}, +\code{\link{dh.tidyEnv}()} +} +\concept{data manipulation functions} +\concept{trajectory functions} diff --git a/man/dot-checkLabCols.Rd b/man/dot-checkLabCols.Rd new file mode 100644 index 00000000..d55f6431 --- /dev/null +++ b/man/dot-checkLabCols.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create-table-one.R +\name{.checkLabCols} +\alias{.checkLabCols} +\title{Checks that the columns provided in labs contain all and only those provided +in `required_cols`} +\usage{ +.checkLabCols(labs, n_col, required_cols) +} +\arguments{ +\item{labs}{Object provided either to var_labs, cat_labs or coh_labs} + +\item{n_col}{Integer specifying the required number of columns} + +\item{required_cols}{Character vector specifying the required column names} +} +\value{ +Returns an error if number and names of columns does not match that +specified + +noRd +} +\description{ +Checks that the columns provided in labs contain all and only those provided +in `required_cols` +} diff --git a/man/dt.makeExcludedDf.Rd b/man/dt.makeExcludedDf.Rd new file mode 100644 index 00000000..2a6bc13f --- /dev/null +++ b/man/dt.makeExcludedDf.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/make-excluded-df.R +\name{dt.makeExcludedDf} +\alias{dt.makeExcludedDf} +\title{Given df-A & df-B, creates a new df which is the rows in A but not in B} +\usage{ +dt.makeExcludedDf(original_df, final_df, new_obj, type = "wide", conns = NULL) +} +\arguments{ +\item{original_df}{Dataframe containing the full sample} + +\item{final_df}{Dataframe containing the included sample} + +\item{new_obj}{Character specifying name for created serverside object.} + +\item{type}{Specifies type of dataframe in `original_df`. Either 'long' or +'wide'. NOTE NOT CURRENTLY FUNCTIONAL - ONLY WORKS FOR WIDE.} + +\item{conns}{DataSHIELD connections object.} +} +\value{ +Creates a serverside dataframe containing the rows from `original_df` +that are not contained in `final_df` +} +\description{ +When writing a paper often we need to exclude various participants for +various reasons. Then we will need a df with all of these excluded +participants. This is one way to do it. +}