From 6a0eccb0d374888c26473674cfaee867a061f57a Mon Sep 17 00:00:00 2001 From: etiennebacher Date: Mon, 7 Oct 2024 22:21:08 +0200 Subject: [PATCH] Do not error when bootstrapping CIs if sample is too sparse --- DESCRIPTION | 2 +- NEWS.md | 7 +++++++ R/describe_distribution.R | 22 ++++++++++++++++----- tests/testthat/test-describe_distribution.R | 12 +++++++++++ 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f3e7599af..8dd92fb7e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.13.0 +Version: 0.13.0.1 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531")), diff --git a/NEWS.md b/NEWS.md index f2ade5883..68d290dbe 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# datawizard (development) + +BUG FIXES + +* `describe_distribution()` no longer errors if sample was too sparse to compute + CIs. Instead, it warns the user and returns `NA` (#550). + # datawizard 0.13.0 BREAKING CHANGES diff --git a/R/describe_distribution.R b/R/describe_distribution.R index 41f2a8b83..b35672799 100644 --- a/R/describe_distribution.R +++ b/R/describe_distribution.R @@ -186,11 +186,23 @@ describe_distribution.numeric <- function(x, # Confidence Intervals if (!is.null(ci)) { insight::check_if_installed("boot") - results <- boot::boot( - data = x, - statistic = .boot_distribution, - R = iterations, - centrality = centrality + results <- tryCatch({ + boot::boot( + data = x, + statistic = .boot_distribution, + R = iterations, + centrality = centrality + ) + }, + error = function(e) { + msg <- conditionMessage(e) + if (!is.null(msg) && msg == "sample is too sparse to find TD") { + insight::format_warning( + "When bootstrapping CIs, sample was too sparse to find TD. Returning NA for CIs." + ) + return(list(t = c(NA_real_, NA_real_))) + } + } ) out_ci <- bayestestR::ci(results$t, ci = ci, verbose = FALSE) out <- cbind(out, data.frame(CI_low = out_ci$CI_low[1], CI_high = out_ci$CI_high[1])) diff --git a/tests/testthat/test-describe_distribution.R b/tests/testthat/test-describe_distribution.R index 83d2abb33..eb2db30ea 100644 --- a/tests/testthat/test-describe_distribution.R +++ b/tests/testthat/test-describe_distribution.R @@ -286,3 +286,15 @@ test_that("describe_distribution formatting", { x <- describe_distribution(iris$Sepal.Width, quartiles = TRUE) expect_snapshot(format(x)) }) + +# other ----------------------------------- + +test_that("return NA in CI if sample is too sparse", { + set.seed(123456) + expect_warning( + res <- describe_distribution(mtcars[mtcars$cyl=="6",], wt, centrality = "map", ci = 0.95), + "When bootstrapping CIs, sample was too sparse to find TD" + ) + expect_identical(res$CI_low, NA) + expect_identical(res$CI_high, NA) +})