From a4ce14a34040335681b2b897c3b8de96c57a860e Mon Sep 17 00:00:00 2001 From: Marton Kovacs Date: Wed, 15 May 2024 15:27:34 +0200 Subject: [PATCH] updated bayes anova precalculation code --- R/ssp_bayesian_anova.R | 115 +- R/ssp_eq_anova.R | 110 + R/ssp_rope_anova.R | 99 + data-raw/bf_anova_precalculations.r | 89 +- data-raw/options/anova_options_bayesian.R | 39 + data-raw/options/anova_options_eq.R | 40 + data-raw/options/anova_options_rope.R | 40 + data/options/ssp_anova_options_bayesian.csv | 1666 ++++++ data/options/ssp_anova_options_eq.csv | 5551 +++++++++++++++++++ data/options/ssp_anova_options_rope.csv | 5551 +++++++++++++++++++ 10 files changed, 13180 insertions(+), 120 deletions(-) create mode 100644 R/ssp_eq_anova.R create mode 100644 R/ssp_rope_anova.R create mode 100644 data-raw/options/anova_options_bayesian.R create mode 100644 data-raw/options/anova_options_eq.R create mode 100644 data-raw/options/anova_options_rope.R create mode 100644 data/options/ssp_anova_options_bayesian.csv create mode 100644 data/options/ssp_anova_options_eq.csv create mode 100644 data/options/ssp_anova_options_rope.csv diff --git a/R/ssp_bayesian_anova.R b/R/ssp_bayesian_anova.R index 80d9c7c..fe9405c 100644 --- a/R/ssp_bayesian_anova.R +++ b/R/ssp_bayesian_anova.R @@ -1,10 +1,11 @@ + #' Determine sample size with Bayes Factor Design Analysis (BFDA) -#' +#' #' The present method provides an expected sample size such that #' compelling evidence in the form of a Bayes factor can be collected #' for a given effect size with a certain long-run probability when #' allowing for sequential testing. -#' +#' #' @param mu Numeric. The expected population mean values. #' @param thresh Integer. The Bayes factor threshold for inference. #' @param tpr Numeric. The long-run probability of obtaining a Bayes factor at least @@ -12,7 +13,7 @@ #' @param iter Integer. The number of simulations. #' @param prior_scale Numeric. Scale of the Cauchy prior distribution. #' @param max_n Integer. The maximum number of participants per group (all groups are assumed to have equal sample size). -#' +#' #' @return The function returns a list of four named numeric vectors. #' The first `n1` is the range of determined sample sizes for the given design. #' The second `tpr_out` is the range of TPRs that were provided as a parameter. @@ -21,34 +22,29 @@ #' @examples #' \dontrun{ #' SampleSizePlanner::ssp_anova_bf( -#' tpr = 0.8, max_n = 400, mu = c(1, 1.2, 1.5, 1.3), sigma = 2, +#' effect = "Interaction Effect", tpr = 0.8, max_n = 10001, mu = c(1.5, 1.5, 0, 1), sigma = 2, #' seed = NULL, thresh = 10, prior_scale = 1 / sqrt(2) #' ) #' } -ssp_anova_bf <- function( - effect = c("Main Effect 1", "Main Effect 2", "Interaction Effect"), - iter = 1000, max_n, mu, sigma, seed = NULL, tpr, thresh = 10, prior_scale = 1 / sqrt(2)) { - +ssp_anova_bf <- function(effect, mu, tpr, thresh , prior_scale, iter, max_n = 10001, max_bf = 1e8, sigma = 1, seed = NULL) { + tpr_optim_res <- tpr_optim( fun = twoway_ANOVA_bf_pwr, + range = c(4, max_n), + tpr = tpr, effect = effect, iter = iter, - range = c(4, max_n), mu = mu, sigma = sigma, seed = seed, - tpr = tpr, thresh = thresh, + max_bf = max_bf, prior_scale = prior_scale ) - + return( - list( - n1 = tpr_optim_res$n1, - tpr_out = tpr_optim_res$tpr_out, - effect = effect - ) + tpr_optim_res ) } @@ -59,69 +55,60 @@ twoway_ANOVA_bf_pwr <- function( n = n1, mu = mu, sigma = sigma, - thresh = 10, - prior_scale = 1 / sqrt(2), + thresh = thresh, + max_bf = max_bf, + prior_scale = prior_scale, seed = NULL) { - - # Evaluate if effects are specified - effects <- c("Main Effect 1", "Main Effect 2", "Interaction Effect") - if (is.na(effect)||!effect %in% effects) { - message("Specify which effect: Main or interaction") - stop(call. = FALSE) - } - - # Evaluate if parameters input are correct - if (!is.vector(mu)||length(mu) != 4) { - message("Mean group must be a vector of four!") - stop(call. = FALSE) - } - - # Arrange order so that m1_1 < m1_2 & m2_1 < m2_2 - sorted_mu <- c(sort(mu[1:2]), sort(mu[3:4])) - # sort that m1_1 < m2_1 - if (sorted_mu[3] < sorted_mu[1]) { - sorted_mu <- c(sorted_mu[c(3:4, 1:2)]) - } - - # Re-scale the mu and sigma before calculating the power - while (sorted_mu[1] != 0 | sigma != 1) { - sorted_mu = sorted_mu/sigma # scale mu by sigma - sigma = sigma/sigma # scale sigma to 1 - sorted_mu = sorted_mu - sorted_mu[1] # scale the mean of first group to 0 - } - - # Create a data frame to store the p-values from each iteration + + # Create a data frame to store the bf-values from each iteration bf_value_data <- data.frame(matrix(NA, nrow = iter, ncol = 3)) colnames(bf_value_data) <- c("bf_grp1", "bf_grp2", "bf_int") - + # Set seeds set.seed(seed) - - # For each iteration, generate data, do ANOVA, and record p-values + + # For each iteration, generate data, do ANOVA, and record bf-values for (i in 1:iter) { # Generate data - + grp1 <- as.factor(rep(c(0, 1), each = 2*n)) grp2 <- as.factor(rep(c(0, 1), each = n, times = 2)) - - value <- c(rnorm(n = n, mean = sorted_mu[1], sd = sigma), # grp1 = 0; grp2 = 0 - rnorm(n = n, mean = sorted_mu[2], sd = sigma), # grp1 = 0; grp2 = 1 - rnorm(n = n, mean = sorted_mu[3], sd = sigma), # grp1 = 1; grp2 = 0 - rnorm(n = n, mean = sorted_mu[4], sd = sigma)) # grp1 = 1; grp2 = 1 + + value <- c(rnorm(n = n, mean = mu[1], sd = sigma), # grp1 = 0; grp2 = 0 + rnorm(n = n, mean = mu[2], sd = sigma), # grp1 = 0; grp2 = 1 + rnorm(n = n, mean = mu[3], sd = sigma), # grp1 = 1; grp2 = 0 + rnorm(n = n, mean = mu[4], sd = sigma)) # grp1 = 1; grp2 = 1 data <- data.frame(grp1, grp2, value) - + # Anova analysis - results <- BayesFactor::anovaBF(value ~ grp1 * grp2, data = data, + results <- BayesFactor::anovaBF(value ~ grp1 * grp2, data = data, whichModels = "withmain", rscaleEffects = prior_scale, progress = FALSE) bf_value <- BayesFactor::extractBF(results)[, 1] - - # Store p-values of each iteration into the data frame + # print(n) + # print(bf_value) + # Store bf-values of each iteration into the data frame + if (bf_value[3] == Inf) { # fixing infinite bf_value in the denominator to max 10e6 + bf_value[3] <- .Machine$integer.max} bf_value_data[i, ] <- c( - bf_value[1], # p-value for main effect (group 1) - bf_value[2], # p-value for main effect (group 2) - bf_value[4] / bf_value[3] # p-value for interaction effect (grp1 + grp2 + grp1:grp2 vs. grp1 + grp2) + bf_value[1], # bf-value for main effect (group 1) + bf_value[2], # bf-value for main effect (group 2) + bf_value[4] / bf_value[3] # bf-value for interaction effect (grp1 + grp2 + grp1:grp2 vs. grp1 + grp2) ) + + # evaluating if upper bound of BF is exceeded repeatedly + if (i==11 & all(bf_value_data[1:10,1] > max_bf) & effect == "Main Effect 1") { + return(pwr_grp1 = 1) + break + } + if (i==11 & all(bf_value_data[1:10,2] > max_bf) & effect == "Main Effect 2") { + return(pwr_grp2 = 1) + break + } + if (i==11 & all(bf_value_data[1:10,3] > max_bf) & effect == "Interaction Effect") { + return(pwr_int = 1) + break + } } # Determine which effect that gets evaluated or shown @@ -132,5 +119,5 @@ twoway_ANOVA_bf_pwr <- function( } else if (effect == "Interaction Effect") { return(pwr_int = sum(bf_value_data$bf_int >= thresh) / iter) } - } + diff --git a/R/ssp_eq_anova.R b/R/ssp_eq_anova.R new file mode 100644 index 0000000..519907b --- /dev/null +++ b/R/ssp_eq_anova.R @@ -0,0 +1,110 @@ +######################################################################################### +#' Determine sample size with the Bayesian Equivalence Interval Method +#' +#' Script for pre-calculation. +#' The present method provides an expected sample size such that +#' compelling evidence in the form of a Bayes factor can be collected +#' for a given eq band with a certain long-run probability. +#' +#' + +############################SSP Function for EQ########################################## +# function to calculate the minimum sample size for a 2-way anova with the EQ method +ssp_twoway_ANOVA_eq_pwr <- function(mu, effect, eq_band, tpr, thresh, prior_scale, iter, post_iter = 1000, sigma = 1, prior_location = 0, max_n = 10001) { + delta <- ifelse(effect == "Main Effect 1", + (base::mean(mu[1],mu[2])-base::mean(mu[3], mu[4])), # Main Effect 1 + (base::mean(mu[1],mu[3])-base::mean(mu[2], mu[4]))) # Main Effect 2 + est <- ssp_tost(tpr = tpr, eq_band = eq_band, delta = delta, alpha = 0.05, max_n = 10001) + result <- tpr_optim( + fun = twoway_ANOVA_eq_pwr, + range = c(10,round(est$n1/2)), + mu = mu, + effect = effect, + eq_band = eq_band, + tpr = tpr, + thresh = thresh, + prior_scale = prior_scale, + sigma = sigma, + iter = iter, + post_iter = post_iter, + prior_location = prior_location + ) + + return(result) +} + + + +############################Equivalence Interval Function################################ +# function for pre-calculations to calculate the tpr for a 2-way anova with the +# Bayesian Equivalence Interval Method + +twoway_ANOVA_eq_pwr <- function( + n = n1, + effect = effect, + eq_band = eq_band, + iter = iter, + post_iter = post_iter, + mu = mu, + sigma = sigma, + thresh = thresh, + prior_scale = prior_scale, + prior_location = prior_location, + seed = NULL) { + + # Create a data frame to store the bayes factors from each iteration + bf_data <- data.frame(matrix(NA, nrow = iter, ncol = 2)) + colnames(bf_data) <- c("bf_grp1", "bf_grp2") + + # For each iteration, generate data, do ANOVA, and calculate Equivalence Interval Bayes Factor + for (i in 1:iter) { + + # Generate data + grp1 <- as.factor(rep(c(0, 1), each = 2*n)) + grp2 <- as.factor(rep(c(0, 1), each = n, times = 2)) + value <- c(rnorm(n = n, mean = mu[1], sd = sigma), # grp1 = 0; grp2 = 0 + rnorm(n = n, mean = mu[2], sd = sigma), # grp1 = 0; grp2 = 1 + rnorm(n = n, mean = mu[3], sd = sigma), # grp1 = 1; grp2 = 0 + rnorm(n = n, mean = mu[4], sd = sigma)) # grp1 = 1; grp2 = 1 + data <- data.frame(grp1, grp2, value) + + # Equivalence Interval Bayes Factor Method + # Main Effects + results1 <- BayesFactor::lmBF(value ~ grp1, data = data, rscaleEffects = prior_scale, progress = FALSE) + results2 <- BayesFactor::lmBF(value ~ grp2, data = data, rscaleEffects = prior_scale, progress = FALSE) + + # Sample from posterior distribution with for each main effect + posterior1 <- BayesFactor::posterior(results1, iterations = post_iter, progress = FALSE) + posterior2 <- BayesFactor::posterior(results2, iterations = post_iter, progress = FALSE) + + # grab the delta distributions for each main effect + delta1 <- (posterior1[,3]-posterior1[,2])/base::sqrt(posterior1[,4]) + delta2 <- (posterior2[,3]-posterior2[,2])/base::sqrt(posterior2[,4]) + + # area inside the equivalence interval of posterior distributions + post_1_dens <- (base::sum(delta1 thresh + if (effect == "Main Effect 1") { + return(pwr_grp1 = base::sum(bf_data[,1]> thresh) / iter) + } else if (effect == "Main Effect 2") { + return(pwr_grp2 = base::sum(bf_data[,2]> thresh) / iter) + } + +} diff --git a/R/ssp_rope_anova.R b/R/ssp_rope_anova.R new file mode 100644 index 0000000..175ff9d --- /dev/null +++ b/R/ssp_rope_anova.R @@ -0,0 +1,99 @@ +########################################################################################## +#' Determine sample size with the Region of Practical Equivalence Method (ROPE) +#' +#' Script for pre-calculations! +#' The present method provides an expected sample size such that +#' compelling evidence in the form of the Highest Density Interval can be collected +#' for a given eq band with a certain long-run probability. +#' + + +############################SSP Function for ROPE########################################## +# function to calculate the minimum sample size for a 2-way anova with the ROPE method +ssp_twoway_ANOVA_rope_pwr <- function(mu, effect, eq_band, tpr, ci, prior_scale, iter, post_iter = 1000, sigma = 1, prior_location = 0, max_n = 10001) { + + result <- tpr_optim( + fun = twoway_ANOVA_rope_pwr, + range = c(10, max_n), + mu = mu, + effect = effect, + eq_band = eq_band, + tpr = tpr, + ci = ci, + prior_scale = prior_scale, + sigma = sigma, + iter = iter, + post_iter = post_iter, + prior_location = prior_location + ) + + return(result) +} + + + +############################ROPE Function################################################## +# function to calculate the tpr for a 2-way anova with the ROPE method +twoway_ANOVA_rope_pwr <- function( + n = n1, + effect = effect, + eq_band = eq_band, + iter = iter, + post_iter = post_iter, + mu = mu, + sigma = sigma, + ci = ci, + prior_scale = prior_scale, + prior_location = prior_location, + seed = NULL) +{ + + # Create a data frame to store the bayes factors from each iteration + rope_data <- data.frame(matrix(NA, nrow = iter, ncol = 2)) + colnames(rope_data) <- c("rope_grp1", "rope_grp2") + + # For each iteration, generate data, do ANOVA, and calculate ROPE + for (i in 1:iter) { + + # Generate data + grp1 <- as.factor(rep(c(0, 1), each = 2*n)) + grp2 <- as.factor(rep(c(0, 1), each = n, times = 2)) + value <- c(rnorm(n = n, mean = mu[1], sd = sigma), # grp1 = 0; grp2 = 0 + rnorm(n = n, mean = mu[2], sd = sigma), # grp1 = 0; grp2 = 1 + rnorm(n = n, mean = mu[3], sd = sigma), # grp1 = 1; grp2 = 0 + rnorm(n = n, mean = mu[4], sd = sigma)) # grp1 = 1; grp2 = 1 + data <- data.frame(grp1, grp2, value) + + # ROPE Method + # Main Effects + results1 <- BayesFactor::lmBF(value ~ grp1, data = data, rscaleEffects = prior_scale, progress = FALSE) + results2 <- BayesFactor::lmBF(value ~ grp2, data = data, rscaleEffects = prior_scale, progress = FALSE) + + # Sample from posterior distribution with for each main effect + posterior1 <- BayesFactor::posterior(results1, iterations = post_iter, progress = FALSE) + posterior2 <- BayesFactor::posterior(results2, iterations = post_iter, progress = FALSE) + + # grab the delta distributions for each main effect + delta1 <- (posterior1[,3]-posterior1[,2])/base::sqrt(posterior1[,4]) + delta2 <- (posterior2[,3]-posterior2[,2])/base::sqrt(posterior2[,4]) + + # Calculate the 95% HDI + HDI_1 <- bayestestR::hdi(delta1, ci = ci) + HDI_2 <- bayestestR::hdi(delta2, ci = ci) + + # Check if HDI within eq band and store counts of each iteration into the data frame + rope_data[i, ] <- c( + ifelse(HDI_1[1,3]>-eq_band & HDI_1[1,4]-eq_band & HDI_2[1,4]% - dplyr::filter(m12 <= m21) %>% - dplyr::slice(-1) %>% - dplyr::rowwise() %>% - dplyr::mutate(mu = mapply(c, m11, m12, m21, m22, SIMPLIFY = FALSE)) %>% - dplyr::ungroup() %>% - dplyr::mutate(iter = row_number()) +# Dataframe is created with "./data-raw/options/anova_options_bayesian.R +bayes_anova_options <- readr::read_csv(here("data/options/ssp_anova_options_bayesian.csv")) |> + mutate(iter = row_number()) # Set file directory ----------------------------------------------------------- ifelse( - !dir.exists("../data/bayes-anova-res"), - dir.create("../data/bayes-anova-res"), + !dir.exists(here("data/bayes-anova-res")), + dir.create(here("data/bayes-anova-res")), "Directory already exists." ) @@ -77,30 +39,33 @@ ifelse( # As a safety net: # Instead of running a batch of 75 possible configurations, +n_batches <- 75 # First, we split all the iteration as list bayes_anova_options_split <- split(bayes_anova_options, bayes_anova_options$iter) # Since each iteration has 75 possible configuration, then we need: -n_saves <- ceiling(length(bayes_anova_options_split) / 75) +n_saves <- ceiling(length(bayes_anova_options_split) / n_batches) init <- 1 # Run iterations -for (i in 24:n_saves) { +for (i in 1:n_saves) { # Print the current iteration - print(paste("Iteration", i, "is running currently.")) + print(paste("Batch", i, "is running currently.")) # Slice the data - start_index <- (i - 1) * 75 + 1 - end_index <- min(i * 75, length(bayes_anova_options_split)) + start_index <- (i - 1) * n_batches + 1 + end_index <- min(i * n_batches, length(bayes_anova_options_split)) bayes_anova_options_sliced <- bayes_anova_options_split[start_index:end_index] # Calculate Bayesian ANOVA ssp_bayes_anova_res <- future.apply::future_lapply(bayes_anova_options_sliced, function(x) { - # print(paste("Running:", "tpr:", x$tpr, "effect:", x$effect, "thresh:", x$thresh, "prior:", x$prior_scale)) - safe_ssp_anova_bf( + # Print params of current iteration + print(paste("Running:", "tpr:", x$tpr, "effect:", x$effect, "thresh:", x$thresh, "prior:", x$prior_scale)) + # Calculate sample size + results <- safe_ssp_anova_bf( tpr = x$tpr, effect = x$effect, thresh = x$thresh, @@ -112,10 +77,22 @@ for (i in 24:n_saves) { mu = c(x$m11, x$m12, x$m21, x$m22), sigma = 1 ) + # Combine results with params to save + list( + parameters = list( + tpr = x$tpr, + effect = x$effect, + thresh = x$thresh, + prior_scale = x$prior_scale, + mu = c(x$m11, x$m12, x$m21, x$m22), + sigma = 1 + ), + output = results + ) }) - + # Save the results - saveRDS(ssp_bayes_anova_res, paste0("../data/bayes-anova-res/set-", i, ".rds")) + saveRDS(ssp_bayes_anova_res, here(paste0("data/bayes-anova-res/set-", i, ".rds"))) # Remove object rm(ssp_bayes_anova_res) diff --git a/data-raw/options/anova_options_bayesian.R b/data-raw/options/anova_options_bayesian.R new file mode 100644 index 0000000..2451d91 --- /dev/null +++ b/data-raw/options/anova_options_bayesian.R @@ -0,0 +1,39 @@ +############################Bayesian 2-way ANOVA########################################## +# Create a csv file containing all configurations used for the pre calculations +# Create a data frame storing all possible configurations for the method +bayes_anova_options <- + as.data.frame( + tidyr::expand_grid( + m11 = 0, + m12 = base::seq(0, 1.25, by = 0.25), + m21 = base::seq(0, 1.25, by = 0.25), + m22 = base::seq(0, 1.25, by = 0.25), + effect = c("Main Effect 1", "Main Effect 2", "Interaction Effect"), + tpr = seq(0.7, 0.9, by = 0.05), + thresh = c(10), + prior_scale = c(1 / sqrt(2)) + )) + +# sort the mean vectors +# create sorting function +sort_mu <- function (mu) { + if (mu[2]==base::min(mu)) {mu <- mu[c(2,1,4,3)]} # sort mu[1]=min(mu) + if (mu[3]==base::min(mu)) {mu <- mu[c(3,4,1,2)]} + if (mu[4]==base::min(mu)) {mu <- mu[c(4,3,2,1)]} + if (mu[1]==mu[2] & mu[3]>mu[4]) {mu <- mu[c(2,1,4,3)]}# if mu[1]=mu[2] or mu[1]=mu[3] we can apply operations again + if (mu[1]==mu[3] & mu[2]>mu[4]) {mu <- mu[c(3,4,1,2)]} + if (mu[2]>mu[3]) {mu <- mu[c(1,3,2,4)]} # sort mu[2]mu[4]) {mu <- mu[c(2,1,4,3)]}# if mu[1]=mu[2] or mu[1]=mu[3] we can apply operations again + if (mu[1]==mu[3] & mu[2]>mu[4]) {mu <- mu[c(3,4,1,2)]} + if (mu[2]>mu[3]) {mu <- mu[c(1,3,2,4)]} # sort mu[2]mu[4]) {mu <- mu[c(2,1,4,3)]}# if mu[1]=mu[2] or mu[1]=mu[3] we can apply operations again + if (mu[1]==mu[3] & mu[2]>mu[4]) {mu <- mu[c(3,4,1,2)]} + if (mu[2]>mu[3]) {mu <- mu[c(1,3,2,4)]} # sort mu[2]