From 217dc48d1049f42cdc9b7b2f5b8f680e57f28021 Mon Sep 17 00:00:00 2001
From: egouldo <egouldo@users.noreply.github.com>
Date: Mon, 2 Sep 2024 12:43:33 +1000
Subject: [PATCH] - bug!: #42 #63 update eucalyptus yi results / viz extraction
 now that logged model fitting is in `ManyEcoEvo::`

---
 index.qmd | 327 +++++++++++++++++++++++++++++++++---------------------
 1 file changed, 199 insertions(+), 128 deletions(-)

diff --git a/index.qmd b/index.qmd
index 9168eee..f8aa52e 100644
--- a/index.qmd
+++ b/index.qmd
@@ -2858,123 +2858,194 @@ plot_forest_2 <- function(data, intercept = TRUE, MA_mean = TRUE, y_zoom = numer
   print(p)
 }
 
-back_transformed_predictions <- 
-  ManyEcoEvo_yi %>% 
-  dplyr::mutate(data = 
-                  purrr::map(data, 
-                             ~ dplyr::filter(.x,
-                                             stringr::str_detect(response_variable_type, "constructed",                                                                       negate = TRUE)))) %>% 
-  prepare_response_variables_yi(estimate_type = "yi",
-                                param_table = ManyEcoEvo:::analysis_data_param_tables) %>% 
-  generate_yi_subsets()
-
-
-raw_mod_data_logged <- 
-  back_transformed_predictions %>% 
-  filter(dataset == "eucalyptus") %>%
-  group_by(estimate_type) %>% 
-  select(estimate_type, data) %>% 
-  unnest(data) %>% 
-  rename(study_id = id_col) %>% 
-  hoist(params, param_mean = list("value", 1), param_sd = list("value", 2)) %>% 
-  rowwise() %>% 
-  mutate(exclusion_threshold = param_mean + 3*param_sd) %>% 
-  filter(fit < exclusion_threshold) %>% 
-  mutate(log_vals = map2(fit, se.fit, log_transform, 1000)) %>% 
-  unnest(log_vals) %>%
-  select(study_id, 
-         TeamIdentifier,
-         estimate_type, 
-         starts_with("response_"), 
-         -response_id_S2, 
-         ends_with("_log")) %>% 
-  group_by(estimate_type) %>% 
-  nest()
-  
+# ---- new code ----
+
+MA_yi_summary_stats <- 
+  bind_rows(
+    {ManyEcoEvo_yi_viz %>% 
+        filter(dataset == "eucalyptus", model_name == "MA_mod")  %>% 
+        unnest(cols = tidy_mod_summary) %>%  
+        mutate(response_scale = list(log_back(estimate, std.error, 1000)), 
+               .by = c(dataset, estimate_type, term, type), 
+               .keep = "used") %>% 
+        select(-estimate, -std.error) %>% 
+        unnest_wider(response_scale) %>% 
+        rename(estimate = mean_origin, conf.low = lower, conf.high = upper) %>% 
+        nest(tidy_mod_summary = c(-dataset, -estimate_type))},
+    {ManyEcoEvo_yi_viz %>% 
+        filter(dataset == "blue tit", 
+               model_name == "MA_mod") %>%  
+        select(estimate_type, dataset, tidy_mod_summary)}
+  ) %>% 
+  mutate(MA_mean = map(tidy_mod_summary, filter, type == "summary")) %>%
+  hoist(MA_mean, 
+        mean = "estimate", 
+        MA_conf.low = "conf.low", 
+        MA_conf.high = "conf.high") %>% 
+  mutate(max_min_est = 
+           map(tidy_mod_summary, ~ filter(.x, type == "study") %>% 
+                 summarise(max_est = max(estimate), 
+                           min_est = min(estimate))), 
+         max_min_CI = 
+           map(tidy_mod_summary, ~ filter(.x, type == "study") %>%
+                 summarise(max_upper_CI = max(conf.high), 
+                           min_lower_CI = min(conf.low)))) %>% 
+  unnest_wider(col = c(max_min_est, max_min_CI)) %>% 
+  select(-MA_mean) %>% 
+  rename(MA_mean = mean)
 
-mod_data_logged <- raw_mod_data_logged %>% 
-  mutate(MA_mod = 
-           map(data, 
-               ~ ManyEcoEvo::fit_MA_mv(effects_analysis = .x, 
-                                      Z_colname = mean_log, 
-                                      VZ_colname = std.error_log, 
-                                      estimate_type = "yi")))
-
-
-plot_data_logged <- mod_data_logged %>% 
-  mutate(tidy_mod = map(.x = MA_mod,
-                        ~broom::tidy(.x,
-                                     conf.int = TRUE, 
-                                     include_studies = TRUE) %>% 
-                          rename(study_id = term))) %>% 
-  select(tidy_mod) %>% 
-  unnest(cols = c(tidy_mod)) 
-
-
-MA_yi_summary_stats <- # ALL ON logged RESPONSE SCALE for EUC, standardized response values for BT
-  plot_data_logged %>% 
-    mutate(response_scale = map2(estimate, std.error, log_back, 100)) %>% 
-    select(estimate_type, study_id, type, response_scale) %>% 
-    unnest(response_scale) %>% 
-    rename(estimate = mean_origin, conf.low = lower, conf.high = upper) %>% 
-    nest(tidy_mod = -estimate_type) %>% 
-  mutate(dataset = "eucalyptus") %>% 
-  bind_rows({
-    ManyEcoEvo_yi_results %>% 
-    ungroup() %>% 
-    filter(exclusion_set == "complete", dataset == "blue tit") %>% 
-    select(dataset, estimate_type, MA_mod, effects_analysis, -exclusion_set) %>% 
-    group_by(estimate_type, dataset) %>% 
-    transmute(tidy_mod = map(.x = MA_mod,
-                                    ~broom::tidy(.x,
-                                                 conf.int = TRUE, 
-                                                 include_studies = TRUE) %>% 
-                                        rename(study_id = term)))
-  }) %>% 
-    mutate(MA_mean = map(tidy_mod, filter, type == "summary")) %>% 
-    hoist(MA_mean, 
-          mean = "estimate", 
-          MA_conf.low = "conf.low", 
-          MA_conf.high = "conf.high") %>% 
-    mutate(max_min_est = map(tidy_mod, 
-                             ~ filter(.x, type == "study") %>%
-                                 summarise(max_est = max(estimate),
-                                           min_est = min(estimate)))) %>% 
-    mutate(max_min_CI = map(tidy_mod, 
-                             ~ filter(.x, type == "study") %>%
-                                 summarise(max_upper_CI = max(conf.high),
-                                           min_lower_CI = min(conf.low)))) %>% 
-    unnest_wider(col = c(max_min_est, max_min_CI)) %>% 
-  ungroup %>% 
-  rows_update({plot_data_logged %>% #hells yes to this gem of a function!
-      mutate(dataset = "eucalyptus") %>% 
-    filter(type != "summary") %>% 
-    nest(tidy_mod = c(-estimate_type, -dataset))}, 
-    by = c("dataset", "estimate_type")) %>% 
+eucalyptus_yi_plot_data <- MA_yi_summary_stats %>% #extract euc data for plotting (on count scale, not log scale)
+  filter(dataset == "eucalyptus") %>% 
+  select(dataset, estimate_type, tidy_mod_summary)
+
+MA_yi_summary_stats <- 
+  MA_yi_summary_stats %>% 
+  left_join(., {MA_yi_summary_stats %>% 
+      select(dataset, estimate_type, tidy_mod_summary) %>% 
+      filter(dataset == "blue tit") %>% 
       mutate(no_effect = 
-                  map_int(tidy_mod, 
-                          ~ filter(.x, 
-                                   estimate >0 & conf.low <= 0 | estimate <0 & conf.high >= 0, 
-                                   type == "study") %>% 
-                            nrow() ),
-                pos_sign = 
-                  map_int(tidy_mod, 
-                          ~ filter(.x, estimate >0, conf.low > 0, 
-                                   type == "study") %>% 
-                            nrow()),
-                neg_sign = 
-                  map_int(tidy_mod, 
-                          ~ filter(.x, estimate < 0, conf.high < 0, 
-                                   type == "study") %>% 
-                            nrow()),
+               map_int(tidy_mod_summary, 
+                       ~ filter(.x, 
+                                estimate >0 & conf.low <= 0 | estimate <0 & conf.high >= 0, 
+                                type == "study") %>% 
+                         nrow() ),
+             pos_sign = 
+               map_int(tidy_mod_summary, 
+                       ~ filter(.x, estimate >0, conf.low > 0, 
+                                type == "study") %>% 
+                         nrow()),
+             neg_sign = 
+               map_int(tidy_mod_summary, 
+                       ~ filter(.x, estimate < 0, conf.high < 0, 
+                                type == "study") %>% 
+                         nrow()),
              total_effects = 
-               map_int(tidy_mod,
+               map_int(tidy_mod_summary,
                        ~ filter(.x, 
                                 type == "study") %>% 
-                                nrow()
-                       )) %>% 
-  select(-tidy_mod, -MA_mean) %>% 
-  rename(MA_mean = mean)
+                         nrow()
+               ), 
+             .by = c("dataset", "estimate_type"), 
+             .keep = "none")}, 
+      by = join_by(dataset, estimate_type)) %>% 
+  select(-tidy_mod_summary)
+
+
+# --- old code ----
+
+# 
+# back_transformed_predictions <- 
+#   ManyEcoEvo_yi %>% 
+#   prepare_response_variables_yi(estimate_type = "yi",
+#                                 param_table = ManyEcoEvo:::analysis_data_param_tables) %>% 
+#   generate_yi_subsets()
+# 
+# 
+# raw_mod_data_logged <- 
+#   back_transformed_predictions %>% 
+#   filter(dataset == "eucalyptus") %>%
+#   group_by(estimate_type) %>% 
+#   select(estimate_type, data) %>% 
+#   unnest(data) %>% 
+#   rename(study_id = id_col) %>% 
+#   hoist(params, param_mean = list("value", 1), param_sd = list("value", 2)) %>% 
+#   rowwise() %>% 
+#   mutate(exclusion_threshold = param_mean + 3*param_sd) %>% 
+#   filter(fit < exclusion_threshold) %>% 
+#   mutate(log_vals = map2(fit, se.fit, log_transform, 1000)) %>% 
+#   unnest(log_vals) %>%
+#   select(study_id, 
+#          TeamIdentifier,
+#          estimate_type, 
+#          starts_with("response_"), 
+#          -response_id_S2, 
+#          ends_with("_log")) %>% 
+#   group_by(estimate_type) %>% 
+#   nest()
+# 
+# 
+# mod_data_logged <- raw_mod_data_logged %>% 
+#   mutate(MA_mod = 
+#            map(data, 
+#                ~ ManyEcoEvo::fit_MA_mv(effects_analysis = .x, 
+#                                        Z_colname = mean_log, 
+#                                        VZ_colname = se_log, 
+#                                        estimate_type = "yi")))
+# 
+# 
+# plot_data_logged <- mod_data_logged %>% 
+#   mutate(tidy_mod = map(.x = MA_mod,
+#                         ~broom::tidy(.x,
+#                                      conf.int = TRUE, 
+#                                      include_studies = TRUE) %>% 
+#                           rename(study_id = term))) %>% 
+#   select(tidy_mod) %>% 
+#   unnest(cols = c(tidy_mod)) 
+# 
+# MA_yi_summary_stats <- # ALL ON logged RESPONSE SCALE for EUC, standardized response values for BT
+#   plot_data_logged %>% 
+#   mutate(response_scale = map2(estimate, std.error, log_back, 100)) %>% 
+#   select(estimate_type, study_id, type, response_scale) %>% 
+#   unnest(response_scale) %>% 
+#   rename(estimate = mean_origin, conf.low = lower, conf.high = upper) %>% 
+#   nest(tidy_mod = -estimate_type) %>% 
+#   mutate(dataset = "eucalyptus") %>% 
+#   bind_rows(., {
+#     ManyEcoEvo_yi_results %>% 
+#       ungroup() %>% 
+#       filter(exclusion_set == "complete", dataset == "blue tit") %>% 
+#       select(dataset, estimate_type, MA_mod, effects_analysis, -exclusion_set) %>% 
+#       group_by(estimate_type, dataset) %>% 
+#       transmute(tidy_mod = map(.x = MA_mod,
+#                                ~broom::tidy(.x,
+#                                             conf.int = TRUE, 
+#                                             include_studies = TRUE) %>% 
+#                                  rename(study_id = term)))
+#   }) %>% 
+#   mutate(MA_mean = map(tidy_mod, filter, type == "summary")) %>% 
+#   hoist(MA_mean, 
+#         mean = "estimate", 
+#         MA_conf.low = "conf.low", 
+#         MA_conf.high = "conf.high") %>% 
+#   mutate(max_min_est = map(tidy_mod, 
+#                            ~ filter(.x, type == "study") %>%
+#                              summarise(max_est = max(estimate),
+#                                        min_est = min(estimate)))) %>% 
+#   mutate(max_min_CI = map(tidy_mod, 
+#                           ~ filter(.x, type == "study") %>%
+#                             summarise(max_upper_CI = max(conf.high),
+#                                       min_lower_CI = min(conf.low)))) %>% 
+#   unnest_wider(col = c(max_min_est, max_min_CI)) %>% 
+#   ungroup %>% 
+#   rows_update({plot_data_logged %>% #hells yes to this gem of a function!
+#       mutate(dataset = "eucalyptus") %>% 
+#       filter(type != "summary") %>% 
+#       nest(tidy_mod = c(-estimate_type, -dataset))}, 
+#       by = c("dataset", "estimate_type")) %>% 
+#   mutate(no_effect = 
+#            map_int(tidy_mod, 
+#                    ~ filter(.x, 
+#                             estimate >0 & conf.low <= 0 | estimate <0 & conf.high >= 0, 
+#                             type == "study") %>% 
+#                      nrow() ),
+#          pos_sign = 
+#            map_int(tidy_mod, 
+#                    ~ filter(.x, estimate >0, conf.low > 0, 
+#                             type == "study") %>% 
+#                      nrow()),
+#          neg_sign = 
+#            map_int(tidy_mod, 
+#                    ~ filter(.x, estimate < 0, conf.high < 0, 
+#                             type == "study") %>% 
+#                      nrow()),
+#          total_effects = 
+#            map_int(tidy_mod,
+#                    ~ filter(.x, 
+#                             type == "study") %>% 
+#                      nrow()
+#            )) %>% 
+#   select(-tidy_mod, -MA_mean) %>% 
+#   rename(MA_mean = mean)
 ```
 
 As with the effect size $Z_r$, we observed substantial variability in the size of out-of-sample predictions derived from the analysts' models.
@@ -2995,15 +3066,6 @@ The meta-analytic mean predictions for these three scenarios were similar; `r fi
 #| echo: false
 #| fig-keep: last
 
-yi_plot_data_bt <- 
-  ManyEcoEvo_yi_results %>% 
-  filter(exclusion_set == "complete", dataset == "blue tit") %>% 
-  select(MA_mod, -exclusion_set) %>% 
-  group_by(estimate_type, dataset) %>% 
-  transmute(tidy_mod = map(MA_mod, 
-                           ~ broom::tidy(.x, conf.int = TRUE, include_studies = TRUE) %>% 
-                             rename(id_col = term))) %>% 
-  unnest(tidy_mod) 
 plot_forest_yi <- function(data, intercept = TRUE, MA_mean = TRUE){
   if (MA_mean == FALSE){
     data <- filter(data, study_id != "overall")
@@ -3059,6 +3121,15 @@ plot_forest_yi <- function(data, intercept = TRUE, MA_mean = TRUE){
   
   print(p)
 }
+
+ManyEcoEvo_yi_viz %>% 
+  filter(dataset == "blue tit") %>% 
+  group_by(estimate_type, dataset) %>% 
+  select(estimate_type, dataset, tidy_mod_summary) %>% 
+  unnest(cols = tidy_mod_summary) %>% 
+  rename(study_id = term) %>% 
+  ungroup %>% 
+  plot_forest_yi(., intercept = TRUE, MA_mean = TRUE) +
   theme(axis.text.y = element_blank())
 ```
 
@@ -3070,13 +3141,13 @@ plot_forest_yi <- function(data, intercept = TRUE, MA_mean = TRUE){
 #| message: false
 #| fig-keep: last
 
-plot_data_logged %>% 
-  mutate(response_scale = map2(estimate, std.error, log_back, 1000)) %>% 
-  select(estimate_type, study_id, type, response_scale) %>% 
-  unnest(response_scale) %>% 
-  rename(estimate = mean_origin, conf.low = lower, conf.high = upper) %>% 
-#  filter(estimate <1000) %>% 
-  plot_forest_2(MA_mean = T,y_zoom = c(0,40)) +
+eucalyptus_yi_plot_data %>% 
+  group_by(estimate_type, dataset) %>%
+  select(estimate_type, dataset, tidy_mod_summary) %>% 
+  unnest(cols = tidy_mod_summary) %>% 
+  rename(study_id = term) %>% 
+  ungroup %>% 
+  plot_forest_2(MA_mean = T,y_zoom = c(0,150)) +
    theme(axis.text.y = element_blank())
 ```