diff --git a/chapter_2_lm.qmd b/chapter_2_lm.qmd index d5d0a18..7feda7f 100644 --- a/chapter_2_lm.qmd +++ b/chapter_2_lm.qmd @@ -656,150 +656,152 @@ The complete metrics results for each of the linear models are in #| warning: false source("scripts/lm_preparation.R") +create_metrics_table <- function(data) { + data %>% + gt() %>% + tab_spanner(label = md("NDVI"), columns = ends_with("NDVI")) %>% + tab_spanner(label = "EVI", columns = ends_with("EVI")) %>% + tab_spanner(label = "NIRv", columns = ends_with("NIRv")) %>% + tab_spanner(label = "CCI", columns = ends_with("CCI")) %>% + tab_spanner(label = "All", columns = ends_with("All")) %>% + cols_label( + .list = list( + "site" = "Site", + "r.squared_EVI" = "R2", + "mae_EVI" = "MAE", + "rmse_EVI" = "RMSE", + "r.squared_NDVI" = "R2", + "mae_NDVI" = "MAE", + "rmse_NDVI" = "RMSE", + "r.squared_NIRv" = "R2", + "mae_NIRv" = "MAE", + "rmse_NIRv" = "RMSE", + "r.squared_CCI" = "R2", + "mae_CCI" = "MAE", + "rmse_CCI" = "RMSE", + "r.squared_All" = "R2", + "mae_All" = "MAE", + "rmse_All" = "RMSE" + ) + ) %>% + cols_align( + align = "center", + columns = 2:16 + ) %>% + fmt_number( + columns = 2:16, + decimals = 2) %>% + tab_options( + row_group.background.color = "#E9E0E1", + row_group.font.weight = "bold" + ) %>% + cols_width(everything() ~ px(50)) +} + # Monthly table -vis_site_glance_montly %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% +vis_site_glance_monthly %>% + select(site, index, r.squared, mae, rmse) %>% bind_rows(all_sites_glance_monthly, all_sites_all_vis_glance_monthly, all_vis_glance_monthly) %>% pivot_wider(names_from = index, - values_from = c(r.squared, adj.r.squared, rmse)) %>% - gt() %>% - tab_spanner(label = md("NDVI"), columns = ends_with("NDVI")) %>% - tab_spanner(label = "EVI", columns = ends_with("EVI")) %>% - tab_spanner(label = "NIRv", columns = ends_with("NIRv")) %>% - tab_spanner(label = "CCI", columns = ends_with("CCI")) %>% - tab_spanner(label = "All", columns = ends_with("All")) %>% - cols_label( - .list = list( - "site" = "Site", - "r.squared_EVI" = "R2", - "adj.r.squared_EVI" = "adj R2", - "rmse_EVI" = "RMSE", - "r.squared_NDVI" = "R2", - "adj.r.squared_NDVI" = "adj R2", - "rmse_NDVI" = "RMSE", - "r.squared_NIRv" = "R2", - "adj.r.squared_NIRv" = "adj R2", - "rmse_NIRv" = "RMSE", - "r.squared_CCI" = "R2", - "adj.r.squared_CCI" = "adj R2", - "rmse_CCI" = "RMSE", - "r.squared_All" = "R2", - "adj.r.squared_All" = "adj R2", - "rmse_All" = "RMSE" - ) - ) %>% - cols_align( - align = "center", - columns = 2:16 - ) %>% - fmt_number( - columns = 2:16, - decimals = 2) %>% - tab_options( - row_group.background.color = "#E9E0E1", - row_group.font.weight = "bold" - ) %>% - cols_width(everything() ~ px(50)) + values_from = c(r.squared, mae, rmse)) %>% + create_metrics_table() + # Weekly table vis_site_glance_weekly %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% bind_rows(all_sites_glance_weekly, all_sites_all_vis_glance_weekly, all_vis_glance_weekly) %>% pivot_wider(names_from = index, - values_from = c(r.squared, adj.r.squared, rmse)) %>% - gt() %>% - tab_spanner(label = md("NDVI"), columns = ends_with("NDVI")) %>% - tab_spanner(label = "EVI", columns = ends_with("EVI")) %>% - tab_spanner(label = "NIRv", columns = ends_with("NIRv")) %>% - tab_spanner(label = "CCI", columns = ends_with("CCI")) %>% - tab_spanner(label = "All", columns = ends_with("All")) %>% - cols_label( - .list = list( - "site" = "Site", - "r.squared_EVI" = "R2", - "adj.r.squared_EVI" = "adj R2", - "rmse_EVI" = "RMSE", - "r.squared_NDVI" = "R2", - "adj.r.squared_NDVI" = "adj R2", - "rmse_NDVI" = "RMSE", - "r.squared_NIRv" = "R2", - "adj.r.squared_NIRv" = "adj R2", - "rmse_NIRv" = "RMSE", - "r.squared_CCI" = "R2", - "adj.r.squared_CCI" = "adj R2", - "rmse_CCI" = "RMSE", - "r.squared_All" = "R2", - "adj.r.squared_All" = "adj R2", - "rmse_All" = "RMSE" - ) - ) %>% - cols_align( - align = "center", - columns = 2:16 - ) %>% - fmt_number( - columns = 2:16, - decimals = 2) %>% - tab_options( - row_group.background.color = "#E9E0E1", - row_group.font.weight = "bold" - ) %>% - cols_width(everything() ~ px(50)) + values_from = c(r.squared, mae, rmse)) %>% + create_metrics_table() # Daily table vis_site_glance_daily %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% bind_rows(all_sites_glance_daily, all_sites_all_vis_glance_daily, all_vis_glance_daily) %>% pivot_wider(names_from = index, - values_from = c(r.squared, adj.r.squared, rmse)) %>% - gt() %>% - tab_spanner(label = md("NDVI"), columns = ends_with("NDVI")) %>% - tab_spanner(label = "EVI", columns = ends_with("EVI")) %>% - tab_spanner(label = "NIRv", columns = ends_with("NIRv")) %>% - tab_spanner(label = "CCI", columns = ends_with("CCI")) %>% - tab_spanner(label = "All", columns = ends_with("All")) %>% - cols_label( - .list = list( - "site" = "Site", - "r.squared_EVI" = "R2", - "adj.r.squared_EVI" = "adj R2", - "rmse_EVI" = "RMSE", - "r.squared_NDVI" = "R2", - "adj.r.squared_NDVI" = "adj R2", - "rmse_NDVI" = "RMSE", - "r.squared_NIRv" = "R2", - "adj.r.squared_NIRv" = "adj R2", - "rmse_NIRv" = "RMSE", - "r.squared_CCI" = "R2", - "adj.r.squared_CCI" = "adj R2", - "rmse_CCI" = "RMSE", - "r.squared_All" = "R2", - "adj.r.squared_All" = "adj R2", - "rmse_All" = "RMSE" - ) - ) %>% - cols_align( - align = "center", - columns = 2:16 - ) %>% - fmt_number( - columns = 2:16, - decimals = 2) %>% - tab_options( - row_group.background.color = "#E9E0E1", - row_group.font.weight = "bold" - ) %>% - cols_width(everything() ~ px(50)) + values_from = c(r.squared, mae, rmse)) %>% + create_metrics_table() ``` \endgroup + +```{r lm_barplot_metrics} +#| label: fig-lm_metrics +#| fig-cap: "Summary of Linear models for GPP estimation using the vegetation indices on a monthly (a), weekly (b), and daily (c) basis." +#| fig-width: 7 +#| fig-height: 9 +#| echo: false +#| message: false +#| warning: false +library(cowplot) + +# Create a function to generate the plot +create_metrics_plot <- function(timescale, y_var, ylim_range) { + get(paste0("vis_site_glance_", timescale)) %>% + select(site, index, {{ y_var }}) %>% + bind_rows(get(paste0("all_sites_glance_", timescale)), + get(paste0("all_sites_all_vis_glance_", timescale)), + get(paste0("all_vis_glance_", timescale))) %>% + ggplot(aes(x = site, y = .data[[y_var]], fill = index)) + + geom_col(position = "dodge") + + coord_cartesian(ylim = ylim_range) + + scale_fill_viridis_d() + + labs(x = "") + + theme_minimal_hgrid(font_size = 12) + + theme(axis.text.x = element_text(angle = 45, h = 1)) +} + +# Response variables are going to be the same: +response_vars <- c("r.squared", "mae", "rmse") + +# Monthly plots +monthly_metrics_plots <- map2(response_vars, + list(c(0.3, 1), c(0.5, 4), c(0.5, 5)), + ~ create_metrics_plot("monthly", .x, .y)) + +# Weekly plots +weekly_metrics_plots <- map2(response_vars, + list(c(0.3, 1), c(0.5, 4), c(0.5, 5)), + ~ create_metrics_plot("weekly", .x, .y)) + +# Daily plots +daily_metrics_plots <- map2(response_vars, + list(c(0.3, 1), c(0.5, 4), c(0.5, 5)), + ~ create_metrics_plot("daily", .x, .y)) + +# Grid the plots as should go in the chapter +lm_metrics_plots <- plot_grid( + monthly_metrics_plots[[1]] + theme(legend.position = "none"), + weekly_metrics_plots[[1]] + theme(legend.position = "none"), + daily_metrics_plots[[1]] + theme(legend.position = "none"), + monthly_metrics_plots[[2]] + theme(legend.position = "none"), + weekly_metrics_plots[[2]] + theme(legend.position = "none"), + daily_metrics_plots[[2]] + theme(legend.position = "none"), + monthly_metrics_plots[[3]] + theme(legend.position = "none"), + weekly_metrics_plots[[3]] + theme(legend.position = "none"), + daily_metrics_plots[[3]] + theme(legend.position = "none"), + nrow = 3, + ncol = 3, + labels = c("A", "B", "C")) + +plot_legend <- get_legend( + monthly_metrics_plots[[1]] + + guides(color = guide_legend(nrow = 3)) +) + +plot_grid(lm_metrics_plots, plot_legend, ncol = 2, rel_widths = c(1, .1)) +rm(plot_legend) +``` + + ### Analysis of GPP-Vegetation Index Relationships Using GAM Models This variance implied that a linear @@ -848,52 +850,57 @@ provide a more comprehensive explanation for the relationship. This adaptabilit #| warning: false source("scripts/gam_preparation.R") -# Daily table -all_sites_gam_daily %>% - bind_rows(vis_sites_gam_daily, - all_sites_all_vis_gam_daily, - all_vis_gam_daily, - ) %>% - pivot_wider(names_from = index, - values_from = c(rsq, mae, rmse)) %>% - gt() %>% - tab_spanner(label = md("NDVI"), columns = contains("NDVI")) %>% - tab_spanner(label = "EVI", columns = contains("EVI")) %>% - tab_spanner(label = "NIRv", columns = contains("NIRv")) %>% - tab_spanner(label = "CCI", columns = contains("CCI")) %>% - tab_spanner(label = "All", columns = contains("All")) %>% - cols_label( - .list = list( - "site" = "Site", - "rsq_evi_mean" = "R2", - "mae_evi_mean" = "MAE", - "rmse_evi_mean" = "RMSE", - "rsq_ndvi_mean" = "R2", - "mae_ndvi_mean" = "MAE", - "rmse_ndvi_mean" = "RMSE", - "rsq_nirv_mean" = "R2", - "mae_nirv_mean" = "MAE", - "rmse_nirv_mean" = "RMSE", - "rsq_cci_mean" = "R2", - "mae_cci_mean" = "MAE", - "rmse_cci_mean" = "RMSE", - "rsq_All" = "R2", - "mae_All" = "MAE", - "rmse_All" = "RMSE" - ) - ) %>% - cols_align( - align = "center", - columns = 2:16 - ) %>% - fmt_number( - columns = 2:16, - decimals = 2) %>% - tab_options( - row_group.background.color = "#E9E0E1", - row_group.font.weight = "bold" +create_metrics_table <- function(data) { + data %>% + pivot_wider(names_from = index, + values_from = c(rsq, mae, rmse)) %>% + gt() %>% + tab_spanner(label = md("NDVI"), columns = contains("NDVI")) %>% + tab_spanner(label = "EVI", columns = contains("EVI")) %>% + tab_spanner(label = "NIRv", columns = contains("NIRv")) %>% + tab_spanner(label = "CCI", columns = contains("CCI")) %>% + tab_spanner(label = "All", columns = contains("All")) %>% + cols_label( + .list = list( + "site" = "Site", + "rsq_evi_mean" = "R2", + "mae_evi_mean" = "MAE", + "rmse_evi_mean" = "RMSE", + "rsq_ndvi_mean" = "R2", + "mae_ndvi_mean" = "MAE", + "rmse_ndvi_mean" = "RMSE", + "rsq_nirv_mean" = "R2", + "mae_nirv_mean" = "MAE", + "rmse_nirv_mean" = "RMSE", + "rsq_cci_mean" = "R2", + "mae_cci_mean" = "MAE", + "rmse_cci_mean" = "RMSE", + "rsq_All" = "R2", + "mae_All" = "MAE", + "rmse_All" = "RMSE" + ) + ) %>% + cols_align( + align = "center", + columns = 2:16 + ) %>% + fmt_number( + columns = 2:16, + decimals = 2) %>% + tab_options( + row_group.background.color = "#E9E0E1", + row_group.font.weight = "bold" + ) %>% + cols_width(everything() ~ px(50)) +} + +# Monthly table +all_sites_gam_monthly %>% + bind_rows(vis_sites_gam_monthly, + all_sites_all_vis_gam_monthly, + all_vis_gam_monthly, ) %>% - cols_width(everything() ~ px(50)) + create_metrics_table() # Weekly table all_sites_gam_weekly %>% @@ -901,96 +908,89 @@ all_sites_gam_weekly %>% all_sites_all_vis_gam_weekly, all_vis_gam_weekly, ) %>% - pivot_wider(names_from = index, - values_from = c(rsq, mae, rmse)) %>% - gt() %>% - tab_spanner(label = md("NDVI"), columns = contains("NDVI")) %>% - tab_spanner(label = "EVI", columns = contains("EVI")) %>% - tab_spanner(label = "NIRv", columns = contains("NIRv")) %>% - tab_spanner(label = "CCI", columns = contains("CCI")) %>% - tab_spanner(label = "All", columns = contains("All")) %>% - cols_label( - .list = list( - "site" = "Site", - "rsq_evi_mean" = "R2", - "mae_evi_mean" = "MAE", - "rmse_evi_mean" = "RMSE", - "rsq_ndvi_mean" = "R2", - "mae_ndvi_mean" = "MAE", - "rmse_ndvi_mean" = "RMSE", - "rsq_nirv_mean" = "R2", - "mae_nirv_mean" = "MAE", - "rmse_nirv_mean" = "RMSE", - "rsq_cci_mean" = "R2", - "mae_cci_mean" = "MAE", - "rmse_cci_mean" = "RMSE", - "rsq_All" = "R2", - "mae_All" = "MAE", - "rmse_All" = "RMSE" - ) - ) %>% - cols_align( - align = "center", - columns = 2:16 - ) %>% - fmt_number( - columns = 2:16, - decimals = 2) %>% - tab_options( - row_group.background.color = "#E9E0E1", - row_group.font.weight = "bold" - ) %>% - cols_width(everything() ~ px(50)) + create_metrics_table() -# Monthly table -all_sites_gam_monthly %>% - bind_rows(vis_sites_gam_monthly, - all_sites_all_vis_gam_monthly, - all_vis_gam_monthly, - ) %>% - pivot_wider(names_from = index, - values_from = c(rsq, mae, rmse)) %>% - gt() %>% - tab_spanner(label = md("NDVI"), columns = contains("NDVI")) %>% - tab_spanner(label = "EVI", columns = contains("EVI")) %>% - tab_spanner(label = "NIRv", columns = contains("NIRv")) %>% - tab_spanner(label = "CCI", columns = contains("CCI")) %>% - tab_spanner(label = "All", columns = contains("All")) %>% - cols_label( - .list = list( - "site" = "Site", - "rsq_evi_mean" = "R2", - "mae_evi_mean" = "MAE", - "rmse_evi_mean" = "RMSE", - "rsq_ndvi_mean" = "R2", - "mae_ndvi_mean" = "MAE", - "rmse_ndvi_mean" = "RMSE", - "rsq_nirv_mean" = "R2", - "mae_nirv_mean" = "MAE", - "rmse_nirv_mean" = "RMSE", - "rsq_cci_mean" = "R2", - "mae_cci_mean" = "MAE", - "rmse_cci_mean" = "RMSE", - "rsq_All" = "R2", - "mae_All" = "MAE", - "rmse_All" = "RMSE" - ) - ) %>% - cols_align( - align = "center", - columns = 2:16 - ) %>% - fmt_number( - columns = 2:16, - decimals = 2) %>% - tab_options( - row_group.background.color = "#E9E0E1", - row_group.font.weight = "bold" +# Daily table +all_sites_gam_daily %>% + bind_rows(vis_sites_gam_daily, + all_sites_all_vis_gam_daily, + all_vis_gam_daily, ) %>% - cols_width(everything() ~ px(50)) + create_metrics_table() ``` \endgroup +```{r gam_barplot_metrics} +#| label: fig-gam_metrics +#| fig-cap: "Summary of GAM models for GPP estimation using the vegetation indices. Column A represents the metrics for the monthly models, B the weekly, and C the daily metrics." +#| fig-width: 7 +#| fig-height: 9 +#| echo: false +#| message: false +#| warning: false + +# Create a function to generate the plot +create_metrics_plot <- function(timescale, y_var, ylim_range) { + get(paste0("all_sites_gam_", timescale)) %>% + # select(site, index, {{ y_var }}) %>% + bind_rows(get(paste0("vis_sites_gam_", timescale)), + get(paste0("all_sites_all_vis_gam_", timescale)), + get(paste0("all_vis_gam_", timescale))) %>% + ggplot(aes(x = site, y = .data[[y_var]], fill = index)) + + geom_col(position = "dodge") + + coord_cartesian(ylim = ylim_range) + + scale_fill_viridis_d() + + labs(x = "") + + theme_minimal_hgrid(font_size = 12) + + theme(axis.text.x = element_text(angle = 45, h = 1)) +} + +# Response variables are going to be the same: +response_vars <- c("rsq", "mae", "rmse") + +# Monthly plots +monthly_metrics_plots <- map2(response_vars, + list(c(0.3, 1), c(0.5, 4), c(0.8, 5)), + ~ create_metrics_plot("monthly", .x, .y)) + +# Weekly plots +weekly_metrics_plots <- map2(response_vars, + list(c(0.3, 1), c(0.5, 4), c(0.8, 5)), + ~ create_metrics_plot("weekly", .x, .y)) + +# Daily plots +daily_metrics_plots <- map2(response_vars, + list(c(0.3, 1), c(0.5, 4), c(0.8, 5)), + ~ create_metrics_plot("daily", .x, .y)) + +# Grid the plots as should go in the chapter +gam_metrics_plots <- plot_grid( + monthly_metrics_plots[[1]] + theme(legend.position = "none"), + weekly_metrics_plots[[1]] + theme(legend.position = "none"), + daily_metrics_plots[[1]] + theme(legend.position = "none"), + monthly_metrics_plots[[2]] + theme(legend.position = "none"), + weekly_metrics_plots[[2]] + theme(legend.position = "none"), + daily_metrics_plots[[2]] + theme(legend.position = "none"), + monthly_metrics_plots[[3]] + theme(legend.position = "none"), + weekly_metrics_plots[[3]] + theme(legend.position = "none"), + daily_metrics_plots[[3]] + theme(legend.position = "none"), + nrow = 3, + ncol = 3, + labels = c("A", "B", "C")) + +plot_legend <- get_legend( + monthly_metrics_plots[[1]] + + guides(color = guide_legend(nrow = 3)) +) + +plot_grid(gam_metrics_plots, plot_legend, ncol = 2, rel_widths = c(1, .1)) +rm(plot_legend) +``` + + + + + \newpage ## Discussion diff --git a/scripts/gam_preparation.R b/scripts/gam_preparation.R index a42896a..99470c5 100644 --- a/scripts/gam_preparation.R +++ b/scripts/gam_preparation.R @@ -17,7 +17,6 @@ library(tidymodels) library(broom) library(usemodels) library(vip) -library(h2o) library(mgcv) source("scripts/models_data_preparation.R") @@ -141,7 +140,7 @@ mod_fun <- function(df) { s(evi_mean) + s(nirv_mean) + s(cci_mean), - data = daily_gam, + data = df, method = 'REML') } @@ -252,7 +251,7 @@ mod_fun <- function(df) { s(evi_mean) + s(nirv_mean) + s(cci_mean), - data = weekly_gam, + data = df, method = 'REML') } @@ -363,7 +362,7 @@ mod_fun <- function(df) { s(evi_mean) + s(nirv_mean) + s(cci_mean), - data = weekly_gam, + data = df, method = 'REML') } @@ -371,6 +370,8 @@ group_site <- monthly_gam %>% nest(data = c(-site)) models <- group_site %>% + # Michigan & Bartlett have few data points. + filter(site == "Borden") %>% mutate(model = map(data, mod_fun), index = "All") @@ -379,7 +380,18 @@ all_vis_gam_monthly <- models %>% rsq = map_dbl(model, rsq_fun), rmse = map_dbl(model, rmse_fun), mae = map_dbl(model, mae_fun)) %>% - arrange(desc(rsq)) + arrange(desc(rsq)) %>% + # Insert NA values for Bartlett & Michigan which does not have + # enough obs. for this kind of model + bind_rows( + tibble( + index = rep("All", 2), + site = c("Bartlett", "Michigan"), + rsq = rep(NA, 2), + rmse = rep(NA, 2), + mae = rep(NA, 2) + ) + ) # GAM model for all sites and all indices (covariates) [H | All VIs + site] single_vis_monthly <- gam(gpp_dt_vut_ref ~ ndvi_mean + diff --git a/scripts/lm_preparation.R b/scripts/lm_preparation.R index ebf0703..6afd7fa 100644 --- a/scripts/lm_preparation.R +++ b/scripts/lm_preparation.R @@ -10,6 +10,7 @@ library(dplyr) library(broom) library(tidymodels) +library(purrr) source("scripts/models_data_preparation.R") @@ -50,12 +51,13 @@ all_fit <- all_sites_lm %>% mutate(site = "All") all_sites_glance_monthly <- all_sites_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(site = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% mutate(index = case_when( index == "evi_mean" ~ "EVI", index == "ndvi_mean" ~ "NDVI", @@ -81,7 +83,8 @@ evi_fit <- evi_lm %>% mutate(index = "EVI") evi_glance_monthly <- evi_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -103,7 +106,8 @@ ndvi_fit <- ndvi_lm %>% mutate(index = "NDVI") ndvi_glance_monthly <- ndvi_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -125,7 +129,8 @@ nirv_fit <- nirv_lm %>% mutate(index = "NIRv") nirv_glance_monthly <- nirv_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -147,16 +152,17 @@ cci_fit <- cci_lm %>% mutate(index = "CCI") cci_glance_monthly <- cci_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(index = "CCI") -vis_site_glance_montly <- bind_rows(evi_glance_monthly, - ndvi_glance_monthly, - nirv_glance_monthly, - cci_glance_monthly) +vis_site_glance_monthly <- bind_rows(evi_glance_monthly, + ndvi_glance_monthly, + nirv_glance_monthly, + cci_glance_monthly) # # Linear model for all sites kNDVI @@ -202,12 +208,13 @@ all_fit <- all_vis_lm %>% mutate(site = "All") all_vis_glance_monthly <- all_vis_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(index = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% mutate(index = case_when( index == "evi_mean" ~ "EVI", index == "ndvi_mean" ~ "NDVI", @@ -224,16 +231,17 @@ all_sites_all_indices <- lm(gpp_dt_vut_ref ~ evi_mean + # summary(all_sites_all_indices) metrics <- augment(all_sites_all_indices) %>% - select(gpp_dt_vut_ref, .resid) %>% - mutate(rmse = (sqrt(mean((.resid)^2)))) + select(gpp_dt_vut_ref, .resid) rmse <- sqrt(mean((metrics$.resid)^2)) +mae <- mean(abs(metrics$.resid)) all_sites_all_vis_glance_monthly <- glance(all_sites_all_indices) %>% mutate(rmse = rmse, + mae = mae, site = "All", index = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) + select(site, index, r.squared, mae, rmse) # WEEKLY LMS ------------------------------------------------------------------ # Prepare data with all sites @@ -273,12 +281,13 @@ all_fit <- all_sites_lm %>% mutate(site = "All") all_sites_glance_weekly <- all_sites_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(site = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% mutate(index = case_when( index == "evi_mean" ~ "EVI", index == "ndvi_mean" ~ "NDVI", @@ -304,7 +313,8 @@ evi_fit <- evi_lm %>% mutate(index = "EVI") evi_glance_weekly <- evi_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -326,7 +336,8 @@ ndvi_fit <- ndvi_lm %>% mutate(index = "NDVI") ndvi_glance_weekly <- ndvi_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -348,7 +359,8 @@ nirv_fit <- nirv_lm %>% mutate(index = "NIRv") nirv_glance_weekly <- nirv_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -370,16 +382,17 @@ cci_fit <- cci_lm %>% mutate(index = "CCI") cci_glance_weekly <- cci_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(index = "CCI") vis_site_glance_weekly <- bind_rows(evi_glance_weekly, - ndvi_glance_weekly, - nirv_glance_weekly, - cci_glance_weekly) + ndvi_glance_weekly, + nirv_glance_weekly, + cci_glance_weekly) # Linear model for all VI's [C | All VIs] all_vis_lm <- ind_sites %>% @@ -401,12 +414,13 @@ all_fit <- all_vis_lm %>% mutate(site = "All") all_vis_glance_weekly <- all_vis_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(index = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% mutate(index = case_when( index == "evi_mean" ~ "EVI", index == "ndvi_mean" ~ "NDVI", @@ -424,16 +438,17 @@ all_sites_all_indices <- lm(gpp_dt_vut_ref ~ evi_mean + # summary(all_sites_all_indices) metrics <- augment(all_sites_all_indices) %>% - select(gpp_dt_vut_ref, .resid) %>% - mutate(rmse = (sqrt(mean((.resid)^2)))) + select(gpp_dt_vut_ref, .resid) rmse <- sqrt(mean((metrics$.resid)^2)) +mae <- mean(abs(metrics$.resid)) all_sites_all_vis_glance_weekly <- glance(all_sites_all_indices) %>% mutate(rmse = rmse, + mae = mae, site = "All", index = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) + select(site, index, r.squared, mae, rmse) # DAILY LMS ------------------------------------------------------------------ @@ -473,12 +488,13 @@ all_fit <- all_sites_lm %>% mutate(site = "All") all_sites_glance_daily <- all_sites_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(site = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% mutate(index = case_when( index == "evi_mean" ~ "EVI", index == "ndvi_mean" ~ "NDVI", @@ -504,7 +520,8 @@ evi_fit <- evi_lm %>% mutate(index = "EVI") evi_glance_daily <- evi_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -526,7 +543,8 @@ ndvi_fit <- ndvi_lm %>% mutate(index = "NDVI") ndvi_glance_daily <- ndvi_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -548,7 +566,8 @@ nirv_fit <- nirv_lm %>% mutate(index = "NIRv") nirv_glance_daily <- nirv_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -570,16 +589,17 @@ cci_fit <- cci_lm %>% mutate(index = "CCI") cci_glance_daily <- cci_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(index = "CCI") vis_site_glance_daily <- bind_rows(evi_glance_daily, - ndvi_glance_daily, - nirv_glance_daily, - cci_glance_daily) + ndvi_glance_daily, + nirv_glance_daily, + cci_glance_daily) # Linear model for all VI's [C | All VIs] all_vis_lm <- ind_sites %>% @@ -601,12 +621,13 @@ all_fit <- all_vis_lm %>% mutate(site = "All") all_vis_glance_daily <- all_vis_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(index = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% mutate(index = case_when( index == "evi_mean" ~ "EVI", index == "ndvi_mean" ~ "NDVI", @@ -624,14 +645,15 @@ all_sites_all_indices <- lm(gpp_dt_vut_ref ~ evi_mean + # summary(all_sites_all_indices) metrics <- augment(all_sites_all_indices) %>% - select(gpp_dt_vut_ref, .resid) %>% - mutate(rmse = (sqrt(mean((.resid)^2)))) + select(gpp_dt_vut_ref, .resid) rmse <- sqrt(mean((metrics$.resid)^2)) +mae <- mean(abs(metrics$.resid)) all_sites_all_vis_glance_daily <- glance(all_sites_all_indices) %>% mutate(rmse = rmse, + mae = mae, site = "All", index = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) + select(site, index, r.squared, mae, rmse) diff --git a/test_plot_residuals.R b/test_plot_residuals.R new file mode 100644 index 0000000..8f538c4 --- /dev/null +++ b/test_plot_residuals.R @@ -0,0 +1,83 @@ +# File to check if we can plot the residuals from each of the models + +# Object for residuals distribution: ------------------------------------------- +all_sites_lm + +library(cowplot) +library(ggridges) + +all_sites_lm %>% + # mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + select(index, augmented) %>% + unnest(cols = c(augmented)) %>% + select(index, gpp_dt_vut_ref, .fitted, .resid) %>% + ggplot(aes(x = .resid, fill = index)) + + geom_density(alpha = .7) + + scale_fill_viridis_d() + + scale_y_continuous(expand = c(0, 0)) + + theme_half_open(12) + + +all_sites_lm %>% + # mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + select(index, augmented) %>% + unnest(cols = c(augmented)) %>% + select(index, gpp_dt_vut_ref, .fitted, .resid) %>% + rename("residuals" = ".resid") %>% + ggplot(aes(x = residuals, y = index, fill = index)) + + geom_density_ridges() + + geom_vline(xintercept = 0, colour = "#FF5500", + linewidth = 0.7, linetype = "dashed") + + scale_fill_viridis_d() + + theme_ridges() + + theme(legend.position = "none") + +# all_sites_lm %>% +# unnest(augmented) %>% glimpse() + +all_sites_lm %>% + # mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + select(index, augmented) %>% + unnest(cols = c(augmented)) %>% + ggplot(aes(x = .fitted, y = gpp_dt_vut_ref, colour = index)) + + geom_point(size = 4, alpha = .8) + + geom_abline(lty = 1, color = "#E20D6A", linewidth = 2) + + scale_color_viridis_d() + + theme_ridges() + + + +# Si tengo tiempo, revisar este proceso +# library(tidymodels) +# tidymodels_prefer() +# +# +# all_sites <- +# +# +# +# basic_rec <- +# recipe(Sale_Price ~ Neighborhood + Gr_Liv_Area + Year_Built + Bldg_Type + +# Latitude + Longitude, data = ames_train) %>% +# step_log(Gr_Liv_Area, base = 10) %>% +# step_other(Neighborhood, threshold = 0.01) %>% +# step_dummy(all_nominal_predictors()) +# +# interaction_rec <- +# basic_rec %>% +# step_interact( ~ Gr_Liv_Area:starts_with("Bldg_Type_") ) +# +# spline_rec <- +# interaction_rec %>% +# step_ns(Latitude, Longitude, deg_free = 50) +# +# preproc <- +# list(basic = basic_rec, +# interact = interaction_rec, +# splines = spline_rec +# ) +# +# lm_models <- workflow_set(preproc, list(lm = linear_reg()), cross = FALSE) +# lm_models +# +