Merge pull request #42 from egouldo/update-box-cox-weights

Investigate and fix rating analysis weights
egouldo · Jun 14, 2024 · 9d24c31 · 9d24c31
2 parents 77c89f6 + 9e7a50a
commit 9d24c31
Show file tree

Hide file tree

Showing 19 changed files with 3,149 additions and 2,187 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: ManyEcoEvo
 Title: Meta-analyse data from 'Many-Analysts' style studies
-Version: 1.3.0
+Version: 1.5.0
 Authors@R: c(person(given = "Elliot",
                     family = "Gould", 
                     email =  "[email protected]", 

diff --git a/NAMESPACE b/NAMESPACE
@@ -92,6 +92,7 @@ export(summarise_variable_counts)
 export(validate_predictions)
 export(validate_predictions_df_blue_tit)
 export(validate_predictions_df_euc)
+import(cli)
 import(dplyr)
 import(ggbeeswarm)
 import(ggplot2)
@@ -120,9 +121,15 @@ importFrom(forcats,fct_relevel)
 importFrom(magrittr,"%>%")
 importFrom(pointblank,col_vals_not_null)
 importFrom(purrr,map)
+importFrom(purrr,map2)
+importFrom(purrr,map_chr)
 importFrom(purrr,map_dfr)
+importFrom(purrr,pluck)
+importFrom(purrr,pmap)
+importFrom(purrr,possibly)
 importFrom(purrr,set_names)
 importFrom(rlang,is_na)
+importFrom(rlang,is_null)
 importFrom(rlang,na_chr)
 importFrom(sae,bxcx)
 importFrom(tidyr,pivot_longer)

diff --git a/R/fit_boxcox_ratings_cat.R b/R/fit_boxcox_ratings_cat.R
@@ -37,20 +37,19 @@ fit_boxcox_ratings_cat <- function(.data, outcome, outcome_var, interceptless =
            obs_id = 1:n()) 
 
   if(interceptless == FALSE){
+
     f <- rlang::new_formula(rlang::ensym(outcome), 
                             expr(PublishableAsIs + 
                                    (1 | ReviewerId) # + (1 | study_id ) RE ommitted due to convergence issues
                             ))
-    mod <- lme4::lmer(f,
-                      data = data_tbl ,
-                      weights = I(1/pull(data_tbl,{{outcome_var}}))
-                      )
+
+    mod <- lme4::lmer(formula = f, data = data_tbl)
+
   }else(#interceptless: for plotting
+
     mod <- lme4::lmer(rlang::new_formula(rlang::ensym(outcome), 
-                                         expr(-1 + PublishableAsIs + (1 | ReviewerId))), #+ (1 | study_id) #problem with the groups
-                      data = data_tbl #,
-                      # weights = I(1/pull(data_tbl,{{outcome_var}}))
-                      )
+                                         expr(-1 + PublishableAsIs + (1 | ReviewerId))),
+                      data = data_tbl)
   )
 
   return(mod)

diff --git a/R/generate_collinearity_subset.R b/R/generate_collinearity_subset.R
@@ -18,12 +18,16 @@
 #' - `generate_rating_subsets()`
 #' 
 #' `generate_collinearity_subset()` only creates expertise subsets based on the full dataset where `exclusion_set == "complete"` and `publishable_subset == "All"` and `expertise_subset == "All"`.
+#' @import dplyr
+#' @importFrom purrr map
+#' @importFrom purrr map2
+#' @importFrom purrr pluck
 #' @examples
 #' ManyEcoEvo %>%
 #' prepare_response_variables(estimate_type = "Zr") |>
 #' generate_exclusion_subsets(estimate_type = "Zr") |>
 #' generate_rating_subsets() |>
-#' generate_expertise_subsets(expert_subset) |>
+#' generate_expertise_subsets(ManyEcoEvo:::expert_subset) |>
 #' generate_collinearity_subset(collinearity_subset = collinearity_subset)
 generate_collinearity_subset <- function(ManyEcoEvo, collinearity_subset) {
   # Check if the inputs are a dataframe
@@ -56,7 +60,7 @@ generate_collinearity_subset <- function(ManyEcoEvo, collinearity_subset) {
     mutate(diversity_data = 
              map2(.x = diversity_data, 
                   .y = data, 
-                  .f = ~ semi_join(.x, .y) %>% distinct),
+                  .f = ~ semi_join(.x, .y, join_by(id_col, dataset)) %>% distinct),
            collinearity_subset = "collinearity_removed")
 
   out <- bind_rows(

diff --git a/R/generate_expertise_subsets.R b/R/generate_expertise_subsets.R
@@ -16,13 +16,11 @@
 #' library(ManyEcoEvo)
 #' library(tidyverse)
 #' library(targets)
-#' targets::tar_load(ManyEcoEvo)
-#' targets::tar_load(expert_subset)
 #' ManyEcoEvo %>%
 #' prepare_response_variables(estimate_type = "Zr") |>
 #' generate_exclusion_subsets(estimate_type = "Zr") |>
 #' generate_rating_subsets() |>
-#' generate_expertise_subsets(expert_subset)
+#' generate_expertise_subsets(ManyEcoEvo:::expert_subset)
 generate_expertise_subsets <- function(ManyEcoEvo, expert_subset) {
   #TODO idea, allow ellipses arg in function and pass those expressions to filter.
   # that way isn't hardcoded in the function. Repeat for all other generate / exclude map funs

diff --git a/R/meta_analyse_datasets.R b/R/meta_analyse_datasets.R
@@ -14,6 +14,10 @@
 #'
 #' @return A nested dataframe with all columns of object parsed to arg `MA_data`, but with additional columns for the results of each analysis: `MA_mod`, `sorensen_glm`, `box_cox_ratings_cont`, `box_cox_ratings_cat`, `box_cox_rating_cat_no_int`, `uni_mixed_effects`
 #' @export
+#' @importFrom purrr map_chr map2 map possibly pmap
+#' @import dplyr
+#' @import cli
+#' @importFrom rlang na_chr is_null na_chr
 #' @family Multi-dataset Wrapper Functions
 #'
 #' @examples
@@ -23,8 +27,7 @@
 #'   #   dplyr::filter(dataset == "eucalyptus",
 #'   #          (max(VZr, na.rm = TRUE) == VZr)) TODO, do we need to include now that INF's removed?
 meta_analyse_datasets <- function(MA_data){
-  #example:
-
+
   poss_fit_metafor_mv <- purrr::possibly(fit_metafor_mv,
                                          otherwise = NA,
                                          quiet = FALSE)
@@ -34,7 +37,7 @@ meta_analyse_datasets <- function(MA_data){
   fit_MA_mv <- function(effects_analysis, Z_colname, VZ_colname, estimate_type){
     Zr <- effects_analysis %>%  pull({{Z_colname}})
     VZr <- effects_analysis %>%  pull({{VZ_colname}})
-    mod <- fit_metafor_mv(estimate = Zr, 
+    mod <- poss_fit_metafor_mv(estimate = Zr, 
                           variance = VZr, 
                           estimate_type = estimate_type, 
                           data = effects_analysis)
@@ -45,7 +48,7 @@ meta_analyse_datasets <- function(MA_data){
     # Must group by cols else multiple "effects_analysis" elements
     # get passed to fit_MA_mv()
     MA_data <- MA_data %>% 
-      group_by(estimate_type, dataset, exclusion_set, publishable_subset, expertise_subset)
+      group_by(estimate_type, dataset, exclusion_set, publishable_subset, expertise_subset, collinearity_subset)
   } else {
     MA_data <- MA_data %>% 
       group_by(estimate_type, dataset, exclusion_set)

diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/_targets.R b/_targets.R
@@ -65,9 +65,6 @@ list(tarchetypes::tar_file_read(name = euc_reviews,
      tarchetypes::tar_file_read(name = list_of_new_prediction_files,
                                 command = "data-raw/analyst_data/S2/list_of_new_csv_files.csv",
                                 read = readr::read_csv(!!.x)),
-     tarchetypes::tar_file_read(name = expert_subset,
-                                command = "data-raw/metadata_and_key_data/Good_Statistician_ResponseIds.csv",
-                                read = readr::read_csv(file = !!.x)),
      targets::tar_target(name = all_review_data,
                          command = prepare_review_data(bt_reviews,euc_reviews)),
      targets::tar_target(ManyEcoEvo,
@@ -79,7 +76,7 @@ list(tarchetypes::tar_file_read(name = euc_reviews,
                            prepare_response_variables(estimate_type = "Zr") |>  
                            generate_exclusion_subsets(estimate_type = "Zr") |> 
                            generate_rating_subsets() |> 
-                           generate_expertise_subsets(expert_subset) |>
+                           generate_expertise_subsets(ManyEcoEvo:::expert_subset) |>
                            generate_collinearity_subset(ManyEcoEvo:::collinearity_subset) |>
                            compute_MA_inputs(estimate_type = "Zr") |> 
                            generate_outlier_subsets() |> # TODO run before MA_inputs? diversity indices need to be recalculated!!