From 217dc48d1049f42cdc9b7b2f5b8f680e57f28021 Mon Sep 17 00:00:00 2001 From: egouldo Date: Mon, 2 Sep 2024 12:43:33 +1000 Subject: [PATCH] - bug!: #42 #63 update eucalyptus yi results / viz extraction now that logged model fitting is in `ManyEcoEvo::` --- index.qmd | 327 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 199 insertions(+), 128 deletions(-) diff --git a/index.qmd b/index.qmd index 9168eee..f8aa52e 100644 --- a/index.qmd +++ b/index.qmd @@ -2858,123 +2858,194 @@ plot_forest_2 <- function(data, intercept = TRUE, MA_mean = TRUE, y_zoom = numer print(p) } -back_transformed_predictions <- - ManyEcoEvo_yi %>% - dplyr::mutate(data = - purrr::map(data, - ~ dplyr::filter(.x, - stringr::str_detect(response_variable_type, "constructed", negate = TRUE)))) %>% - prepare_response_variables_yi(estimate_type = "yi", - param_table = ManyEcoEvo:::analysis_data_param_tables) %>% - generate_yi_subsets() - - -raw_mod_data_logged <- - back_transformed_predictions %>% - filter(dataset == "eucalyptus") %>% - group_by(estimate_type) %>% - select(estimate_type, data) %>% - unnest(data) %>% - rename(study_id = id_col) %>% - hoist(params, param_mean = list("value", 1), param_sd = list("value", 2)) %>% - rowwise() %>% - mutate(exclusion_threshold = param_mean + 3*param_sd) %>% - filter(fit < exclusion_threshold) %>% - mutate(log_vals = map2(fit, se.fit, log_transform, 1000)) %>% - unnest(log_vals) %>% - select(study_id, - TeamIdentifier, - estimate_type, - starts_with("response_"), - -response_id_S2, - ends_with("_log")) %>% - group_by(estimate_type) %>% - nest() - +# ---- new code ---- + +MA_yi_summary_stats <- + bind_rows( + {ManyEcoEvo_yi_viz %>% + filter(dataset == "eucalyptus", model_name == "MA_mod") %>% + unnest(cols = tidy_mod_summary) %>% + mutate(response_scale = list(log_back(estimate, std.error, 1000)), + .by = c(dataset, estimate_type, term, type), + .keep = "used") %>% + select(-estimate, -std.error) %>% + unnest_wider(response_scale) %>% + rename(estimate = mean_origin, conf.low = lower, conf.high = upper) %>% + nest(tidy_mod_summary = c(-dataset, -estimate_type))}, + {ManyEcoEvo_yi_viz %>% + filter(dataset == "blue tit", + model_name == "MA_mod") %>% + select(estimate_type, dataset, tidy_mod_summary)} + ) %>% + mutate(MA_mean = map(tidy_mod_summary, filter, type == "summary")) %>% + hoist(MA_mean, + mean = "estimate", + MA_conf.low = "conf.low", + MA_conf.high = "conf.high") %>% + mutate(max_min_est = + map(tidy_mod_summary, ~ filter(.x, type == "study") %>% + summarise(max_est = max(estimate), + min_est = min(estimate))), + max_min_CI = + map(tidy_mod_summary, ~ filter(.x, type == "study") %>% + summarise(max_upper_CI = max(conf.high), + min_lower_CI = min(conf.low)))) %>% + unnest_wider(col = c(max_min_est, max_min_CI)) %>% + select(-MA_mean) %>% + rename(MA_mean = mean) -mod_data_logged <- raw_mod_data_logged %>% - mutate(MA_mod = - map(data, - ~ ManyEcoEvo::fit_MA_mv(effects_analysis = .x, - Z_colname = mean_log, - VZ_colname = std.error_log, - estimate_type = "yi"))) - - -plot_data_logged <- mod_data_logged %>% - mutate(tidy_mod = map(.x = MA_mod, - ~broom::tidy(.x, - conf.int = TRUE, - include_studies = TRUE) %>% - rename(study_id = term))) %>% - select(tidy_mod) %>% - unnest(cols = c(tidy_mod)) - - -MA_yi_summary_stats <- # ALL ON logged RESPONSE SCALE for EUC, standardized response values for BT - plot_data_logged %>% - mutate(response_scale = map2(estimate, std.error, log_back, 100)) %>% - select(estimate_type, study_id, type, response_scale) %>% - unnest(response_scale) %>% - rename(estimate = mean_origin, conf.low = lower, conf.high = upper) %>% - nest(tidy_mod = -estimate_type) %>% - mutate(dataset = "eucalyptus") %>% - bind_rows({ - ManyEcoEvo_yi_results %>% - ungroup() %>% - filter(exclusion_set == "complete", dataset == "blue tit") %>% - select(dataset, estimate_type, MA_mod, effects_analysis, -exclusion_set) %>% - group_by(estimate_type, dataset) %>% - transmute(tidy_mod = map(.x = MA_mod, - ~broom::tidy(.x, - conf.int = TRUE, - include_studies = TRUE) %>% - rename(study_id = term))) - }) %>% - mutate(MA_mean = map(tidy_mod, filter, type == "summary")) %>% - hoist(MA_mean, - mean = "estimate", - MA_conf.low = "conf.low", - MA_conf.high = "conf.high") %>% - mutate(max_min_est = map(tidy_mod, - ~ filter(.x, type == "study") %>% - summarise(max_est = max(estimate), - min_est = min(estimate)))) %>% - mutate(max_min_CI = map(tidy_mod, - ~ filter(.x, type == "study") %>% - summarise(max_upper_CI = max(conf.high), - min_lower_CI = min(conf.low)))) %>% - unnest_wider(col = c(max_min_est, max_min_CI)) %>% - ungroup %>% - rows_update({plot_data_logged %>% #hells yes to this gem of a function! - mutate(dataset = "eucalyptus") %>% - filter(type != "summary") %>% - nest(tidy_mod = c(-estimate_type, -dataset))}, - by = c("dataset", "estimate_type")) %>% +eucalyptus_yi_plot_data <- MA_yi_summary_stats %>% #extract euc data for plotting (on count scale, not log scale) + filter(dataset == "eucalyptus") %>% + select(dataset, estimate_type, tidy_mod_summary) + +MA_yi_summary_stats <- + MA_yi_summary_stats %>% + left_join(., {MA_yi_summary_stats %>% + select(dataset, estimate_type, tidy_mod_summary) %>% + filter(dataset == "blue tit") %>% mutate(no_effect = - map_int(tidy_mod, - ~ filter(.x, - estimate >0 & conf.low <= 0 | estimate <0 & conf.high >= 0, - type == "study") %>% - nrow() ), - pos_sign = - map_int(tidy_mod, - ~ filter(.x, estimate >0, conf.low > 0, - type == "study") %>% - nrow()), - neg_sign = - map_int(tidy_mod, - ~ filter(.x, estimate < 0, conf.high < 0, - type == "study") %>% - nrow()), + map_int(tidy_mod_summary, + ~ filter(.x, + estimate >0 & conf.low <= 0 | estimate <0 & conf.high >= 0, + type == "study") %>% + nrow() ), + pos_sign = + map_int(tidy_mod_summary, + ~ filter(.x, estimate >0, conf.low > 0, + type == "study") %>% + nrow()), + neg_sign = + map_int(tidy_mod_summary, + ~ filter(.x, estimate < 0, conf.high < 0, + type == "study") %>% + nrow()), total_effects = - map_int(tidy_mod, + map_int(tidy_mod_summary, ~ filter(.x, type == "study") %>% - nrow() - )) %>% - select(-tidy_mod, -MA_mean) %>% - rename(MA_mean = mean) + nrow() + ), + .by = c("dataset", "estimate_type"), + .keep = "none")}, + by = join_by(dataset, estimate_type)) %>% + select(-tidy_mod_summary) + + +# --- old code ---- + +# +# back_transformed_predictions <- +# ManyEcoEvo_yi %>% +# prepare_response_variables_yi(estimate_type = "yi", +# param_table = ManyEcoEvo:::analysis_data_param_tables) %>% +# generate_yi_subsets() +# +# +# raw_mod_data_logged <- +# back_transformed_predictions %>% +# filter(dataset == "eucalyptus") %>% +# group_by(estimate_type) %>% +# select(estimate_type, data) %>% +# unnest(data) %>% +# rename(study_id = id_col) %>% +# hoist(params, param_mean = list("value", 1), param_sd = list("value", 2)) %>% +# rowwise() %>% +# mutate(exclusion_threshold = param_mean + 3*param_sd) %>% +# filter(fit < exclusion_threshold) %>% +# mutate(log_vals = map2(fit, se.fit, log_transform, 1000)) %>% +# unnest(log_vals) %>% +# select(study_id, +# TeamIdentifier, +# estimate_type, +# starts_with("response_"), +# -response_id_S2, +# ends_with("_log")) %>% +# group_by(estimate_type) %>% +# nest() +# +# +# mod_data_logged <- raw_mod_data_logged %>% +# mutate(MA_mod = +# map(data, +# ~ ManyEcoEvo::fit_MA_mv(effects_analysis = .x, +# Z_colname = mean_log, +# VZ_colname = se_log, +# estimate_type = "yi"))) +# +# +# plot_data_logged <- mod_data_logged %>% +# mutate(tidy_mod = map(.x = MA_mod, +# ~broom::tidy(.x, +# conf.int = TRUE, +# include_studies = TRUE) %>% +# rename(study_id = term))) %>% +# select(tidy_mod) %>% +# unnest(cols = c(tidy_mod)) +# +# MA_yi_summary_stats <- # ALL ON logged RESPONSE SCALE for EUC, standardized response values for BT +# plot_data_logged %>% +# mutate(response_scale = map2(estimate, std.error, log_back, 100)) %>% +# select(estimate_type, study_id, type, response_scale) %>% +# unnest(response_scale) %>% +# rename(estimate = mean_origin, conf.low = lower, conf.high = upper) %>% +# nest(tidy_mod = -estimate_type) %>% +# mutate(dataset = "eucalyptus") %>% +# bind_rows(., { +# ManyEcoEvo_yi_results %>% +# ungroup() %>% +# filter(exclusion_set == "complete", dataset == "blue tit") %>% +# select(dataset, estimate_type, MA_mod, effects_analysis, -exclusion_set) %>% +# group_by(estimate_type, dataset) %>% +# transmute(tidy_mod = map(.x = MA_mod, +# ~broom::tidy(.x, +# conf.int = TRUE, +# include_studies = TRUE) %>% +# rename(study_id = term))) +# }) %>% +# mutate(MA_mean = map(tidy_mod, filter, type == "summary")) %>% +# hoist(MA_mean, +# mean = "estimate", +# MA_conf.low = "conf.low", +# MA_conf.high = "conf.high") %>% +# mutate(max_min_est = map(tidy_mod, +# ~ filter(.x, type == "study") %>% +# summarise(max_est = max(estimate), +# min_est = min(estimate)))) %>% +# mutate(max_min_CI = map(tidy_mod, +# ~ filter(.x, type == "study") %>% +# summarise(max_upper_CI = max(conf.high), +# min_lower_CI = min(conf.low)))) %>% +# unnest_wider(col = c(max_min_est, max_min_CI)) %>% +# ungroup %>% +# rows_update({plot_data_logged %>% #hells yes to this gem of a function! +# mutate(dataset = "eucalyptus") %>% +# filter(type != "summary") %>% +# nest(tidy_mod = c(-estimate_type, -dataset))}, +# by = c("dataset", "estimate_type")) %>% +# mutate(no_effect = +# map_int(tidy_mod, +# ~ filter(.x, +# estimate >0 & conf.low <= 0 | estimate <0 & conf.high >= 0, +# type == "study") %>% +# nrow() ), +# pos_sign = +# map_int(tidy_mod, +# ~ filter(.x, estimate >0, conf.low > 0, +# type == "study") %>% +# nrow()), +# neg_sign = +# map_int(tidy_mod, +# ~ filter(.x, estimate < 0, conf.high < 0, +# type == "study") %>% +# nrow()), +# total_effects = +# map_int(tidy_mod, +# ~ filter(.x, +# type == "study") %>% +# nrow() +# )) %>% +# select(-tidy_mod, -MA_mean) %>% +# rename(MA_mean = mean) ``` As with the effect size $Z_r$, we observed substantial variability in the size of out-of-sample predictions derived from the analysts' models. @@ -2995,15 +3066,6 @@ The meta-analytic mean predictions for these three scenarios were similar; `r fi #| echo: false #| fig-keep: last -yi_plot_data_bt <- - ManyEcoEvo_yi_results %>% - filter(exclusion_set == "complete", dataset == "blue tit") %>% - select(MA_mod, -exclusion_set) %>% - group_by(estimate_type, dataset) %>% - transmute(tidy_mod = map(MA_mod, - ~ broom::tidy(.x, conf.int = TRUE, include_studies = TRUE) %>% - rename(id_col = term))) %>% - unnest(tidy_mod) plot_forest_yi <- function(data, intercept = TRUE, MA_mean = TRUE){ if (MA_mean == FALSE){ data <- filter(data, study_id != "overall") @@ -3059,6 +3121,15 @@ plot_forest_yi <- function(data, intercept = TRUE, MA_mean = TRUE){ print(p) } + +ManyEcoEvo_yi_viz %>% + filter(dataset == "blue tit") %>% + group_by(estimate_type, dataset) %>% + select(estimate_type, dataset, tidy_mod_summary) %>% + unnest(cols = tidy_mod_summary) %>% + rename(study_id = term) %>% + ungroup %>% + plot_forest_yi(., intercept = TRUE, MA_mean = TRUE) + theme(axis.text.y = element_blank()) ``` @@ -3070,13 +3141,13 @@ plot_forest_yi <- function(data, intercept = TRUE, MA_mean = TRUE){ #| message: false #| fig-keep: last -plot_data_logged %>% - mutate(response_scale = map2(estimate, std.error, log_back, 1000)) %>% - select(estimate_type, study_id, type, response_scale) %>% - unnest(response_scale) %>% - rename(estimate = mean_origin, conf.low = lower, conf.high = upper) %>% -# filter(estimate <1000) %>% - plot_forest_2(MA_mean = T,y_zoom = c(0,40)) + +eucalyptus_yi_plot_data %>% + group_by(estimate_type, dataset) %>% + select(estimate_type, dataset, tidy_mod_summary) %>% + unnest(cols = tidy_mod_summary) %>% + rename(study_id = term) %>% + ungroup %>% + plot_forest_2(MA_mean = T,y_zoom = c(0,150)) + theme(axis.text.y = element_blank()) ```