diff --git a/chapter_2_lm.qmd b/chapter_2_lm.qmd index 7feda7f..e3de57e 100644 --- a/chapter_2_lm.qmd +++ b/chapter_2_lm.qmd @@ -7,7 +7,7 @@ editor_options: ## Introduction -Gross Primary Production (GPP) is the total amount of carbon fixation by plants +Vegetation Gross Primary Production (GPP) is the total amount of carbon fixation by plants through photosynthesis [@badgley_terrestrial_2019]. Quantifying GPP is essential for understanding land-atmosphere carbon exchange [@kohler_global_2018], ecosystem function, and ecosystem responses to climate change diff --git a/scripts/lm_preparation.R b/scripts/lm_preparation.R index 6afd7fa..5d42b68 100644 --- a/scripts/lm_preparation.R +++ b/scripts/lm_preparation.R @@ -33,6 +33,14 @@ ind_mich <- michigan_monthly_500 %>% ind_sites <- bind_rows(ind_bar, ind_bor, ind_mich) +# Function to select variables for residuals plots +select_augmented <- function(data, variable) { + data %>% + select(variable, augmented) %>% + unnest(cols = c(augmented)) %>% + select(variable, gpp_dt_vut_ref, .resid) +} + # Linear model for all sites [A | Diff VIs] all_sites_lm <- ind_sites %>% select(-kndvi_mean) %>% @@ -45,6 +53,9 @@ all_sites_lm <- ind_sites %>% augmented = map(fit, augment) ) +all_sites_augmented_monthly <- select_augmented(all_sites_lm, "index") %>% + mutate(site = "All") + all_fit <- all_sites_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -77,6 +88,9 @@ evi_lm <- ind_sites %>% augmented = map(fit, augment) ) +evi_augmented_monthly <- select_augmented(evi_lm, "site") %>% + mutate(index = "EVI") + evi_fit <- evi_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -100,6 +114,9 @@ ndvi_lm <- ind_sites %>% augmented = map(fit, augment) ) +ndvi_augmented_monthly <- select_augmented(ndvi_lm, "site") %>% + mutate(index = "NDVI") + ndvi_fit <- ndvi_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -123,6 +140,9 @@ nirv_lm <- ind_sites %>% augmented = map(fit, augment) ) +nirv_augmented_monthly <- select_augmented(nirv_lm, "site") %>% + mutate(index = "NIRv") + nirv_fit <- nirv_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -146,6 +166,9 @@ cci_lm <- ind_sites %>% augmented = map(fit, augment) ) +cci_augmented_monthly <- select_augmented(cci_lm, "site") %>% + mutate(index = "CCI") + cci_fit <- cci_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -164,6 +187,10 @@ vis_site_glance_monthly <- bind_rows(evi_glance_monthly, nirv_glance_monthly, cci_glance_monthly) +vis_site_augmented_monthly <- bind_rows(evi_augmented_monthly, + ndvi_augmented_monthly, + nirv_augmented_monthly, + cci_augmented_monthly) # # Linear model for all sites kNDVI # kndvi_lm <- ind_sites %>% @@ -202,6 +229,9 @@ all_vis_lm <- ind_sites %>% augmented = map(fit, augment) ) +all_vis_augmented_monthly <- select_augmented(all_vis_lm, "site") %>% + mutate(index = "All") + all_fit <- all_vis_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -233,6 +263,9 @@ all_sites_all_indices <- lm(gpp_dt_vut_ref ~ evi_mean + metrics <- augment(all_sites_all_indices) %>% select(gpp_dt_vut_ref, .resid) +all_sites_all_vis_augmented_monthly <- metrics %>% + mutate(index = "All", site = "All") + rmse <- sqrt(mean((metrics$.resid)^2)) mae <- mean(abs(metrics$.resid)) @@ -275,6 +308,9 @@ all_sites_lm <- ind_sites %>% augmented = map(fit, augment) ) +all_sites_augmented_weekly <- select_augmented(all_sites_lm, "index") %>% + mutate(site = "All") + all_fit <- all_sites_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -307,6 +343,9 @@ evi_lm <- ind_sites %>% augmented = map(fit, augment) ) +evi_augmented_weekly <- select_augmented(evi_lm, "site") %>% + mutate(index = "EVI") + evi_fit <- evi_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -330,6 +369,9 @@ ndvi_lm <- ind_sites %>% augmented = map(fit, augment) ) +ndvi_augmented_weekly <- select_augmented(ndvi_lm, "site") %>% + mutate(index = "NDVI") + ndvi_fit <- ndvi_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -353,6 +395,9 @@ nirv_lm <- ind_sites %>% augmented = map(fit, augment) ) +nirv_augmented_weekly <- select_augmented(nirv_lm, "site") %>% + mutate(index = "NIRv") + nirv_fit <- nirv_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -376,6 +421,9 @@ cci_lm <- ind_sites %>% augmented = map(fit, augment) ) +cci_augmented_weekly <- select_augmented(cci_lm, "site") %>% + mutate(index = "CCI") + cci_fit <- cci_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -394,6 +442,11 @@ vis_site_glance_weekly <- bind_rows(evi_glance_weekly, nirv_glance_weekly, cci_glance_weekly) +vis_site_augmented_weekly <- bind_rows(evi_augmented_weekly, + ndvi_augmented_weekly, + nirv_augmented_weekly, + cci_augmented_weekly) + # Linear model for all VI's [C | All VIs] all_vis_lm <- ind_sites %>% select(-kndvi_mean) %>% @@ -408,6 +461,9 @@ all_vis_lm <- ind_sites %>% augmented = map(fit, augment) ) +all_vis_augmented_weekly <- select_augmented(all_vis_lm, "site") %>% + mutate(index = "All") + all_fit <- all_vis_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -436,10 +492,12 @@ all_sites_all_indices <- lm(gpp_dt_vut_ref ~ evi_mean + cci_mean, data = ind_sites) # summary(all_sites_all_indices) - metrics <- augment(all_sites_all_indices) %>% select(gpp_dt_vut_ref, .resid) +all_sites_all_vis_augmented_weekly <- metrics %>% + mutate(index = "All", site = "All") + rmse <- sqrt(mean((metrics$.resid)^2)) mae <- mean(abs(metrics$.resid)) @@ -487,6 +545,9 @@ all_fit <- all_sites_lm %>% select(-data, -fit, -glanced) %>% mutate(site = "All") +all_sites_augmented_daily <- select_augmented(all_sites_lm, "index") %>% + mutate(site = "All") + all_sites_glance_daily <- all_sites_lm %>% mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% @@ -514,6 +575,9 @@ evi_lm <- ind_sites %>% augmented = map(fit, augment) ) +evi_augmented_daily <- select_augmented(evi_lm, "site") %>% + mutate(index = "EVI") + evi_fit <- evi_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -537,6 +601,9 @@ ndvi_lm <- ind_sites %>% augmented = map(fit, augment) ) +ndvi_augmented_daily <- select_augmented(ndvi_lm, "site") %>% + mutate(index = "NDVI") + ndvi_fit <- ndvi_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -560,6 +627,9 @@ nirv_lm <- ind_sites %>% augmented = map(fit, augment) ) +nirv_augmented_daily <- select_augmented(nirv_lm, "site") %>% + mutate(index = "NIRv") + nirv_fit <- nirv_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -583,6 +653,9 @@ cci_lm <- ind_sites %>% augmented = map(fit, augment) ) +cci_augmented_daily <- select_augmented(cci_lm, "site") %>% + mutate(index = "CCI") + cci_fit <- cci_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -601,6 +674,11 @@ vis_site_glance_daily <- bind_rows(evi_glance_daily, nirv_glance_daily, cci_glance_daily) +vis_site_augmented_daily <- bind_rows(evi_augmented_daily, + ndvi_augmented_daily, + nirv_augmented_daily, + cci_augmented_daily) + # Linear model for all VI's [C | All VIs] all_vis_lm <- ind_sites %>% select(-kndvi_mean) %>% @@ -615,6 +693,9 @@ all_vis_lm <- ind_sites %>% augmented = map(fit, augment) ) +all_vis_augmented_daily <- select_augmented(all_vis_lm, "site") %>% + mutate(index = "All") + all_fit <- all_vis_lm %>% unnest(tidied) %>% select(-data, -fit, -glanced) %>% @@ -643,10 +724,12 @@ all_sites_all_indices <- lm(gpp_dt_vut_ref ~ evi_mean + cci_mean, data = ind_sites) # summary(all_sites_all_indices) - metrics <- augment(all_sites_all_indices) %>% select(gpp_dt_vut_ref, .resid) +all_sites_all_vis_augmented_daily <- metrics %>% + mutate(index = "All", site = "All") + rmse <- sqrt(mean((metrics$.resid)^2)) mae <- mean(abs(metrics$.resid)) diff --git a/test_plot_residuals.R b/test_plot_residuals.R index 8f538c4..e7fb2f0 100644 --- a/test_plot_residuals.R +++ b/test_plot_residuals.R @@ -6,19 +6,18 @@ all_sites_lm library(cowplot) library(ggridges) -all_sites_lm %>% - # mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% - select(index, augmented) %>% - unnest(cols = c(augmented)) %>% - select(index, gpp_dt_vut_ref, .fitted, .resid) %>% - ggplot(aes(x = .resid, fill = index)) + - geom_density(alpha = .7) + - scale_fill_viridis_d() + - scale_y_continuous(expand = c(0, 0)) + - theme_half_open(12) +# all_sites_lm %>% +# # mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% +# select(index, augmented) %>% +# unnest(cols = c(augmented)) %>% +# select(index, gpp_dt_vut_ref, .fitted, .resid) %>% +# ggplot(aes(x = .resid, fill = index)) + +# geom_density(alpha = .7) + +# scale_fill_viridis_d() + +# scale_y_continuous(expand = c(0, 0)) + +# theme_half_open(12) - -all_sites_lm %>% +all_sites_lm %>% # mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% select(index, augmented) %>% unnest(cols = c(augmented)) %>% @@ -32,9 +31,6 @@ all_sites_lm %>% theme_ridges() + theme(legend.position = "none") -# all_sites_lm %>% -# unnest(augmented) %>% glimpse() - all_sites_lm %>% # mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% select(index, augmented) %>% @@ -45,6 +41,131 @@ all_sites_lm %>% scale_color_viridis_d() + theme_ridges() +evi_lm %>% + # mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + select(site, augmented) %>% + unnest(cols = c(augmented)) %>% + select(site, gpp_dt_vut_ref, .fitted, .resid) %>% + rename("residuals" = ".resid") %>% + ggplot(aes(x = residuals, y = site, fill = site)) + + geom_density_ridges() + + geom_vline(xintercept = 0, colour = "#FF5500", + linewidth = 0.7, linetype = "dashed") + + scale_fill_viridis_d() + + theme_ridges() + + theme(legend.position = "none") + +# ------------------------------------------------------------------------------ +# Probando la union de todos los datos: +# vis_site_augmented_monthly %>% +# bind_rows(all_sites_augmented_monthly, +# all_sites_all_vis_augmented_monthly, +# all_vis_augmented_monthly) %>% +# mutate(index = case_when( +# index == "evi_mean" ~ "EVI", +# index == "ndvi_mean" ~ "NDVI", +# index == "nirv_mean" ~ "NIRv", +# index == "cci_mean" ~ "CCI", +# .default = index +# )) %>% +# rename("residuals" = ".resid") %>% +# group_by(site, index) %>% +# nest() %>% +# arrange(site) + + + + +# Function to create residuals plot +create_residuals_plot <- function(data, site) { + data %>% + filter(site == {{site}}) %>% + ggplot(aes(x = residuals, y = index, fill = index)) + + geom_density_ridges() + + geom_vline(xintercept = 0, colour = "#FF5500", + linewidth = 0.7, linetype = "dashed") + + scale_fill_viridis_d() + + # scale_x_continuous(limits = c(-30, 20), n.breaks = 10) + + theme_ridges() + + theme(legend.position = "none") +} + +# Vector with sites for looping in the plots +sites <- c("Bartlett", "Borden", "Michigan", "All") + +# Monthly +monthly_residuals_data <- vis_site_augmented_monthly %>% + bind_rows(all_sites_augmented_monthly, + all_sites_all_vis_augmented_monthly, + all_vis_augmented_monthly) %>% + mutate(index = case_when( + index == "evi_mean" ~ "EVI", + index == "ndvi_mean" ~ "NDVI", + index == "nirv_mean" ~ "NIRv", + index == "cci_mean" ~ "CCI", + .default = index + )) %>% + rename("residuals" = ".resid") + +# Loop over all the sites +monthly_residuals_plots <- + map(sites, ~ create_residuals_plot(monthly_residuals_data, .x)) + +# Weekly +weekly_residuals_data <- vis_site_augmented_weekly %>% + bind_rows(all_sites_augmented_weekly, + all_sites_all_vis_augmented_weekly, + all_vis_augmented_weekly) %>% + mutate(index = case_when( + index == "evi_mean" ~ "EVI", + index == "ndvi_mean" ~ "NDVI", + index == "nirv_mean" ~ "NIRv", + index == "cci_mean" ~ "CCI", + .default = index + )) %>% + rename("residuals" = ".resid") + +# Loop over all the sites +weekly_residuals_plots <- + map(sites, ~ create_residuals_plot(weekly_residuals_data, .x)) + +# Daily +daily_residuals_data <- vis_site_augmented_daily %>% + bind_rows(all_sites_augmented_daily, + all_sites_all_vis_augmented_daily, + all_vis_augmented_daily) %>% + mutate(index = case_when( + index == "evi_mean" ~ "EVI", + index == "ndvi_mean" ~ "NDVI", + index == "nirv_mean" ~ "NIRv", + index == "cci_mean" ~ "CCI", + .default = index + )) %>% + rename("residuals" = ".resid") + +# Loop over all the sites +daily_residuals_plots <- + map(sites, ~ create_residuals_plot(daily_residuals_data, .x)) + +plot_grid(monthly_residuals_plots[[1]] + labs(title = "monthly 1"), + monthly_residuals_plots[[2]], + monthly_residuals_plots[[3]], + monthly_residuals_plots[[4]] + labs(title = "monthly 4"), + weekly_residuals_plots[[1]], + weekly_residuals_plots[[2]], + weekly_residuals_plots[[3]], + weekly_residuals_plots[[4]], + daily_residuals_plots[[1]], + daily_residuals_plots[[2]], + daily_residuals_plots[[3]], + daily_residuals_plots[[4]], + nrow = 3, + ncol = 4) + + + + + # Si tengo tiempo, revisar este proceso