From 6cba7edfebd29bbdd2c956a6f9ea587f6bc3b485 Mon Sep 17 00:00:00 2001
From: Dirk Schumacher <mail@dirk-schumacher.net>
Date: Sat, 12 Oct 2024 12:28:50 +0200
Subject: [PATCH] Fix bug in prevalence computation with NAs in group

Fixed a bug during prevalence computation when a value used as a group
(like sex) contains `NA` and at the same time all values in that group are
`NA` as well. Previously this resulted in a hard stop of the computation,
now the code gracefully handles it.
---
 NEWS.md                          |  7 +++++++
 R/prevalence.R                   |  8 ++++++--
 tests/testthat/test-prevalence.R | 12 ++++++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 3f5d55b..3bb93bd 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,12 @@
 # anthro (development version)
 
+## Bugfix
+
+* Fixed a bug during prevalence computation when a value used as a group
+  (like sex) contains `NA` and at the same time all values in that group are
+  `NA` as well. Previously this resulted in a hard stop of the computation,
+  now the code gracefully handles it.
+
 # anthro 1.0.1
 
 * Fixed the package documentation to adhere to CRAN guidelines.
diff --git a/R/prevalence.R b/R/prevalence.R
index fa3ae54..3b60427 100644
--- a/R/prevalence.R
+++ b/R/prevalence.R
@@ -773,7 +773,7 @@ compute_prevalence_estimates_for_column <- function(survey_design, indicator_nam
 
   res <- if (all_na) {
     data.frame(
-      Group = as.character(unique(survey_design$variables[[subset_col_name]])),
+      Group = unique_groups(survey_design$variables[[subset_col_name]]),
       r = NA_real_,
       se = NA_real_,
       ll = NA_real_,
@@ -829,7 +829,7 @@ compute_prevalence_zscore_summaries <- function(survey_design,
   all_na <- all(is.na(survey_design$variables[[zscore_col_name]]))
   res <- if (all_na) {
     data.frame(
-      Group = as.character(unique(survey_design$variables[[subset_col_name]])),
+      Group = unique_groups(survey_design$variables[[subset_col_name]]),
       r = NA_real_,
       se = NA_real_,
       ll = NA_real_,
@@ -895,3 +895,7 @@ compute_prevalence_zscore_summaries <- function(survey_design,
   colnames(res) <- c("Group", value_col_names)
   res
 }
+
+unique_groups <- function(values) {
+  as.character(unique(values[!is.na(values)]))
+}
diff --git a/tests/testthat/test-prevalence.R b/tests/testthat/test-prevalence.R
index 6ecbef5..c5a1d2b 100644
--- a/tests/testthat/test-prevalence.R
+++ b/tests/testthat/test-prevalence.R
@@ -400,3 +400,15 @@ test_that("var can be computed even if some levels have just one observation", {
   expect_true(is.na(res[res$Group == "Sex: Female", "HA_stdev"]))
   expect_false(is.na(res[res$Group == "Sex: Male", "HA_stdev"]))
 })
+
+test_that("sex = NA with all values NA does not stop computation", {
+  expect_silent(
+    anthro_prevalence(
+      sex = c(1, 1, 1, 1, 1, NA),
+      age = 30,
+      is_age_in_month = TRUE,
+      weight = 500,
+      lenhei = 700
+    )
+  )
+})