From d4f95607d346c5de8648ca566cd4a02fab112f4d Mon Sep 17 00:00:00 2001
From: Dario Azzimonti <dario.azzimonti@gmail.com>
Date: Fri, 1 Dec 2023 17:04:18 +0100
Subject: [PATCH] Updated the warnings for weights and sample size, updated
 documentation and tests (issue #1). Updated documentation in reconc_gaussian
 (issue #9).

---
 R/reconc.R                        | 69 ++++++++++++++++++-------------
 R/utils.R                         |  2 +-
 man/reconc_BUIS.Rd                |  6 ++-
 man/reconc_gaussian.Rd            |  4 +-
 tests/testthat/test-examples.R    |  4 +-
 tests/testthat/test-weightswarn.R |  8 ++--
 6 files changed, 53 insertions(+), 40 deletions(-)

diff --git a/R/reconc.R b/R/reconc.R
index 90f730b..3ba2eb6 100644
--- a/R/reconc.R
+++ b/R/reconc.R
@@ -39,39 +39,42 @@
   return(w)
 }
 
-.check_weigths <- function(w, n_eff_min=200, p_n_eff=0.05) {
+.check_weigths <- function(w, n_eff_min=200, p_n_eff=0.01) {
   warning = FALSE
   warning_code = c()
   warning_msg = c()
   
-  # Effective sample size
   n = length(w)
-  n_eff = (sum(w)^2) / sum(w^2)
+  n_eff = n
   
-  # 1. Uniform w
-  if (mean(w) == 1) {
-    warning = TRUE
-    warning_code = c(warning_code, 1) 
-    warning_msg = c(warning_msg, 
-                    paste0("Importance Sampling: all the weights are zeros. This is probably caused by a strong incoherence between bottom and upper base forecasts. An uniform distribution for the weights is used. effective_sample_size= ", round(n_eff,2),"."))
-  }
-  
-  # 2. n_eff < threshold
-  if (n_eff < n_eff_min) {
-    warning = TRUE
-    warning_code = c(warning_code, 2) 
-    warning_msg = c(warning_msg, 
-                    paste0("Importance Sampling: effective_sample_size= ", round(n_eff,2), " (< ", n_eff_min,")."))
-  }
   
-  # 3. n_eff < p*n, e.g. p = 0.05
-  if (n_eff < p_n_eff*n) {
+  # 1. w==0
+  if (all(w==0)) {
     warning = TRUE
-    warning_code = c(warning_code, 3) 
+    warning_code = c(warning_code, 1) 
     warning_msg = c(warning_msg, 
-                    paste0("Importance Sampling: effective_sample_size= ", round(n_eff,2), " (< ", round(p_n_eff * 100, 2),"%)."))
+                    "Importance Sampling: all the weights are zeros. This is probably caused by a strong incoherence between bottom and upper base forecasts.")
+  }else{
+    
+    # Effective sample size
+    n_eff = (sum(w)^2) / sum(w^2)
+    
+    # 2. n_eff < threshold
+    if (n_eff < n_eff_min) {
+      warning = TRUE
+      warning_code = c(warning_code, 2) 
+      warning_msg = c(warning_msg, 
+                      paste0("Importance Sampling: effective_sample_size= ", round(n_eff,2), " (< ", n_eff_min,")."))
+    }
+    
+    # 3. n_eff < p*n, e.g. p = 0.05
+    if (n_eff < p_n_eff*n) {
+      warning = TRUE
+      warning_code = c(warning_code, 3) 
+      warning_msg = c(warning_msg, 
+                      paste0("Importance Sampling: effective_sample_size= ", round(n_eff,2), " (< ", round(p_n_eff * 100, 2),"%)."))
+    }
   }
-  
   res = list(warning = warning,
              warning_code = warning_code,
              warning_msg = warning_msg,
@@ -99,7 +102,7 @@
     w = .distr_pmf(b, u, distr_)   # this never returns NA
   }
   # be sure not to return all 0 weights, return ones instead
-  if (sum(w) == 0) { w = w + 1 }
+  # if (sum(w) == 0) { w = w + 1 }
   return(w)
 }
 
@@ -137,9 +140,11 @@
 #' 
 #' Warnings are triggered from the Importance Sampling step if:
 #' 
-#' * weights are all zeros;
+#' * weights are all zeros, then the upper is ignored during reconciliation;
 #' * the effective sample size is < 200;
-#' * the effective sample size is < 5% of the sample size (`num_samples` if `in_type` is 'params' or the size of the base forecast if if `in_type` is 'samples').
+#' * the effective sample size is < 1% of the sample size (`num_samples` if `in_type` is 'params' or the size of the base forecast if if `in_type` is 'samples').
+#' 
+#' Note that warnings are an indication that the base forecasts might have issues. Please check the base forecasts in case of warnings.
 #'
 #' @param S Summing matrix (n x n_bottom).
 #' @param base_forecasts A list containing the base_forecasts, see details.
@@ -331,6 +336,9 @@ reconc_BUIS <- function(S,
         warning(wmsg)
       }
     }
+    if(check_weights.res$warning & (1 %in% check_weights.res$warning_code)){
+      next
+    }
     B[, b_mask] = .resample(B[, b_mask], weights)
   }
 
@@ -361,7 +369,10 @@ reconc_BUIS <- function(S,
         warning(wmsg)
       }
     }
-    B = .resample(B, weights)
+    if(!(check_weights.res$warning & (1 %in% check_weights.res$warning_code))){
+      B = .resample(B, weights)
+    }
+    
   }
 
   B = t(B)
@@ -391,8 +402,8 @@ reconc_BUIS <- function(S,
 #' The order of the base forecast means and covariance is given by the order of the time series in the summing matrix.
 #' 
 #' The function returns only the reconciled parameters of the bottom variables.
-#' The reconciled parameters for the upper variables or reconciled samples for the entire hierarchy can be obtained from these.
-#' The Examples section shows how.
+#' The reconciled upper parameters and the reconciled samples for the entire hierarchy can be obtained from the reconciled bottom parameters. 
+#' See the example section.
 #'
 #'
 #' @return A list containing the bottom reconciled forecasts. The list has the following named elements:
diff --git a/R/utils.R b/R/utils.R
index 6abcf49..4feeb00 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -151,7 +151,7 @@
     distr_bottom = distr[sh.res$bottom_idxs][[bi]]
     rel_upper_i = sh.res$A[,bi]
     rel_distr_upper = unlist(distr[sh.res$upper_idxs])[rel_upper_i == 1]
-    err_message = "A continuous bottom distribution is child of a discrete one"
+    err_message = "A continuous bottom distribution is child of a discrete one."
     if (distr_bottom == .DISTR_SET2[1]) {              
       if (sum(rel_distr_upper == .DISTR_SET2[2]) | 
           sum(rel_distr_upper == .DISTR_SET[2]) | sum(rel_distr_upper == .DISTR_SET[3])) {
diff --git a/man/reconc_BUIS.Rd b/man/reconc_BUIS.Rd
index 7a1168f..5539108 100644
--- a/man/reconc_BUIS.Rd
+++ b/man/reconc_BUIS.Rd
@@ -73,10 +73,12 @@ The order of the \code{base_forecast} list is given by the order of the time ser
 
 Warnings are triggered from the Importance Sampling step if:
 \itemize{
-\item weights are all zeros;
+\item weights are all zeros, then the upper is ignored during reconciliation;
 \item the effective sample size is < 200;
-\item the effective sample size is < 5\% of the sample size (\code{num_samples} if \code{in_type} is 'params' or the size of the base forecast if if \code{in_type} is 'samples').
+\item the effective sample size is < 1\% of the sample size (\code{num_samples} if \code{in_type} is 'params' or the size of the base forecast if if \code{in_type} is 'samples').
 }
+
+Note that warnings are an indication that the base forecasts might have issues. Please check the base forecasts in case of warnings.
 }
 \examples{
 
diff --git a/man/reconc_gaussian.Rd b/man/reconc_gaussian.Rd
index abc5151..e4f70ee 100644
--- a/man/reconc_gaussian.Rd
+++ b/man/reconc_gaussian.Rd
@@ -27,8 +27,8 @@ Closed form computation of the reconciled forecasts in case of Gaussian base for
 The order of the base forecast means and covariance is given by the order of the time series in the summing matrix.
 
 The function returns only the reconciled parameters of the bottom variables.
-The reconciled parameters for the upper variables or reconciled samples for the entire hierarchy can be obtained from these.
-The Examples section shows how.
+The reconciled upper parameters and the reconciled samples for the entire hierarchy can be obtained from the reconciled bottom parameters.
+See the example section.
 }
 \examples{
 
diff --git a/tests/testthat/test-examples.R b/tests/testthat/test-examples.R
index c57e13a..5ae3894 100644
--- a/tests/testthat/test-examples.R
+++ b/tests/testthat/test-examples.R
@@ -13,7 +13,7 @@ test_that("Monthly, in_type=='params', distr='gaussian'",{
   res.gauss = reconc_gaussian(S, base_forecasts_in[[1]], diag(base_forecasts_in[[2]]^2))
   # Test
   b_mask = rowSums(S) == 1
-  m = mean(rowMeans(res.buis$reconciled_samples)[b_mask] - as.numeric(rbind(res.gauss$upper_reconciled_mean, res.gauss$bottom_reconciled_mean)))
+  m = mean(rowMeans(res.buis$reconciled_samples)[b_mask] - as.numeric(res.gauss$bottom_reconciled_mean))
   expect_equal(abs(m) < 8e-3, TRUE)
 })
 
@@ -32,7 +32,7 @@ test_that("Weekly, in_type=='params', distr='gaussian'",{
   res.gauss = reconc_gaussian(S, base_forecasts_in[[1]], diag(base_forecasts_in[[2]]^2))
   # Test
   b_mask = rowSums(S) == 1
-  m = mean(rowMeans(res.buis$reconciled_samples)[b_mask] - as.numeric(rbind(res.gauss$upper_reconciled_mean, res.gauss$bottom_reconciled_mean)))
+  m = mean(rowMeans(res.buis$reconciled_samples)[b_mask] - as.numeric(res.gauss$bottom_reconciled_mean))
   expect_equal(abs(m) < 2e-2, TRUE)
 })
 
diff --git a/tests/testthat/test-weightswarn.R b/tests/testthat/test-weightswarn.R
index 3dba1ce..1724c77 100644
--- a/tests/testthat/test-weightswarn.R
+++ b/tests/testthat/test-weightswarn.R
@@ -19,7 +19,7 @@ test_that("Test effective sample size", {
   
   # Try the warning message
   # base_forecast = list(u,b1,b2)
-  # a = bayesRecon::reconc_BUIS(S, base_forecast, in_type = "samples", distr = list("continuous","discrete","discrete"), seed=42)
+  # a = reconc_BUIS(S, base_forecast, in_type = "samples", distr = list("continuous","discrete","discrete"), seed=42)
   
   # -----------
   n = 199
@@ -34,7 +34,7 @@ test_that("Test effective sample size", {
   
   check_w = .check_weigths(w, n_eff_min=200)
   expect_equal(check_w$warning, TRUE)
-  expect_equal(check_w$warning_code, c(1,2))
+  expect_equal(check_w$warning_code, 1)
   expect_equal(check_w$n_eff, n)
   
   # Try the warning message
@@ -45,14 +45,14 @@ test_that("Test effective sample size", {
   n = 2000
   b1 = rpois(n=n, lambda = 3)
   b2 = rpois(n=n, lambda = 4)
-  u = rnorm(n=n, mean = 15, sd = 1)
+  u = rnorm(n=n, mean = 18, sd = 1)
   B = cbind(b1,b2)
   c = matrix(S[1,])
   b = (B %*% c)
   
   w = .compute_weights(b, u, "samples", "continuous")
   
-  check_w = .check_weigths(w, n_eff_min=200, p_n_eff=0.05)
+  check_w = .check_weigths(w, n_eff_min=200, p_n_eff=0.01)
   expect_equal(check_w$warning, TRUE)
   expect_equal(check_w$warning_code, c(2,3))
   expect_equal(check_w$n_eff < 200, TRUE)