diff --git a/R/data_summary.R b/R/data_summary.R index af1ec65a4..5e3627c54 100644 --- a/R/data_summary.R +++ b/R/data_summary.R @@ -64,6 +64,18 @@ data_summary.data.frame <- function(x, ..., by = NULL) { out <- data.frame(summarise) colnames(out) <- vapply(summarise, names, character(1)) } else { + # sanity check - is "by" a character string? + if (!is.character(by)) { + insight::format_error("Argument `by` must be a character string, indicating the name of a variable in the data.") + } + # is "by" in the data? + if (!all(by %in% colnames(x))) { + by_not_found <- by[!by %in% colnames(x)] + insight::format_error( + paste0("Variable \"", by_not_found, "\" not found in the data."), + .misspelled_string(colnames(x), by_not_found, "Possibly misspelled?") + ) + } # split data splitted_data <- split(x, x[by]) out <- lapply(splitted_data, function(s) { diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R new file mode 100644 index 000000000..5d48ee84d --- /dev/null +++ b/tests/testthat/test-data_summary.R @@ -0,0 +1,134 @@ +test_that("data_summary, single row summary", { + data(iris) + out <- data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width)) + expect_equal(out$MW, mean(iris$Sepal.Width), tolerance = 1e-4) + expect_equal(out$SD, sd(iris$Sepal.Width), tolerance = 1e-4) +}) + + +test_that("data_summary, single row summary, string expression", { + data(iris) + out <- data_summary(iris, "MW = mean(Sepal.Width)", "SD = sd(Sepal.Width)") + expect_equal(out$MW, mean(iris$Sepal.Width), tolerance = 1e-4) + expect_equal(out$SD, sd(iris$Sepal.Width), tolerance = 1e-4) +}) + + +test_that("data_summary, summary for groups", { + data(iris) + out <- data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width), by = "Species") + expect_equal( + out$MW, + aggregate(iris["Sepal.Width"], list(iris$Species), mean)$Sepal.Width, + tolerance = 1e-4 + ) + expect_equal( + out$SD, + aggregate(iris["Sepal.Width"], list(iris$Species), sd)$Sepal.Width, + tolerance = 1e-4 + ) +}) + + +test_that("data_summary, summary for groups, string expression", { + data(iris) + out <- data_summary( + iris, + "MW = mean(Sepal.Width)", + "SD = sd(Sepal.Width)", + by = "Species" + ) + expect_equal( + out$MW, + aggregate(iris["Sepal.Width"], list(iris$Species), mean)$Sepal.Width, + tolerance = 1e-4 + ) + expect_equal( + out$SD, + aggregate(iris["Sepal.Width"], list(iris$Species), sd)$Sepal.Width, + tolerance = 1e-4 + ) +}) + + +test_that("data_summary, grouped data frames", { + data(iris) + d <- data_group(iris, "Species") + out <- data_summary(d, MW = mean(Sepal.Width), SD = sd(Sepal.Width)) + expect_equal( + out$MW, + aggregate(iris["Sepal.Width"], list(iris$Species), mean)$Sepal.Width, + tolerance = 1e-4 + ) + expect_equal( + out$SD, + aggregate(iris["Sepal.Width"], list(iris$Species), sd)$Sepal.Width, + tolerance = 1e-4 + ) + # "by" overrides groups + data(mtcars) + d <- data_group(mtcars, "gear") + out <- data_summary(d, MW = mean(mpg), SD = sd(mpg), by = "am") + expect_identical( + out$MW, + aggregate(mtcars["mpg"], list(mtcars$am), mean)$mpg + ) +}) + + +test_that("data_summary, summary for multiple groups", { + data(mtcars) + out <- data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("am", "gear")) + expect_equal( + out$MW, + aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), mean)$mpg, + tolerance = 1e-4 + ) + expect_equal( + out$SD, + aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), sd)$mpg, + tolerance = 1e-4 + ) + x <- data_group(mtcars, c("am", "gear")) + out <- data_summary(x, MW = mean(mpg), SD = sd(mpg)) + expect_equal( + out$MW, + aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), mean)$mpg, + tolerance = 1e-4 + ) + expect_equal( + out$SD, + aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), sd)$mpg, + tolerance = 1e-4 + ) +}) + + +test_that("data_summary, errors", { + data(iris) + data(mtcars) + # "by" must be character + expect_error( + data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width), by = 5), + regex = "Argument `by` must be a character string" + ) + # "by" must be in data + expect_error( + data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width), by = "Speceis"), + regex = "Variable \"Speceis\" not" + ) + # by for multiple variables + expect_error( + data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("bam", "gear")), + regex = "Variable \"bam\" not" + ) + expect_error( + data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("bam", "geas")), + regex = "Did you mean one of \"am\" or \"gear\"?" + ) + # not a data frame + expect_error( + data_summary(iris$Sepal.Width, MW = mean(Sepal.Width), SD = sd(Sepal.Width)), + regex = "only works for (grouped) data frames" + ) +})