From 1545cb210b3ecbeef2360f1eeed5a20bcc95912c Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 15:42:42 +0100 Subject: [PATCH 01/15] Fix lintrs --- R/data_rotate.R | 2 +- R/data_seek.R | 18 +++++++++--------- R/data_separate.R | 40 +++++++++++++++++++--------------------- 3 files changed, 29 insertions(+), 31 deletions(-) diff --git a/R/data_rotate.R b/R/data_rotate.R index f9a3389d3..2bb39a85d 100644 --- a/R/data_rotate.R +++ b/R/data_rotate.R @@ -56,7 +56,7 @@ data_rotate <- function(data, rownames = NULL, colnames = FALSE, verbose = TRUE) # warning after possible removal of columns if (verbose && insight::n_unique(vapply(data, typeof, FUN.VALUE = character(1L))) > 1L) { - insight::format_warning("Your data frame contains mixed types of data. After transposition, all variables will be transformed into characters.") + insight::format_warning("Your data frame contains mixed types of data. After transposition, all variables will be transformed into characters.") # nolint } # rotate data frame by 90 degrees diff --git a/R/data_seek.R b/R/data_seek.R index c0a56ab8f..c09e57489 100644 --- a/R/data_seek.R +++ b/R/data_seek.R @@ -71,7 +71,7 @@ data_seek <- function(data, pattern, seek = c("names", "labels"), fuzzy = FALSE) pos <- unlist(lapply(pattern, function(search_pattern) { # search in variable names? if (any(seek %in% c("names", "columns", "column_names", "all"))) { - pos1 <- which(grepl(search_pattern, colnames(data))) + pos1 <- grep(search_pattern, colnames(data)) # find in near distance? if (fuzzy) { pos1 <- c(pos1, .fuzzy_grep(x = colnames(data), pattern = search_pattern)) @@ -80,15 +80,15 @@ data_seek <- function(data, pattern, seek = c("names", "labels"), fuzzy = FALSE) # search in variable labels? if (any(seek %in% c("labels", "all"))) { - labels <- insight::compact_character(unlist(lapply(data, attr, which = "label", exact = TRUE))) - if (!is.null(labels) && length(labels)) { - found <- grepl(search_pattern, labels) - pos2 <- match(names(labels)[found], colnames(data)) + var_labels <- insight::compact_character(unlist(lapply(data, attr, which = "label", exact = TRUE))) + if (!is.null(var_labels) && length(var_labels)) { + found <- grepl(search_pattern, var_labels) + pos2 <- match(names(var_labels)[found], colnames(data)) # find in near distanc? if (fuzzy) { - found <- .fuzzy_grep(x = labels, pattern = search_pattern) + found <- .fuzzy_grep(x = var_labels, pattern = search_pattern) if (length(found)) { - pos2 <- c(pos2, match(names(labels)[found], colnames(data))) + pos2 <- c(pos2, match(names(var_labels)[found], colnames(data))) } } } @@ -129,7 +129,7 @@ data_seek <- function(data, pattern, seek = c("names", "labels"), fuzzy = FALSE) pos <- unique(pos) # variable labels of matching variables - labels <- vapply( + var_labels <- vapply( colnames(data[pos]), function(i) { l <- attr(data[[i]], "label", exact = TRUE) @@ -145,7 +145,7 @@ data_seek <- function(data, pattern, seek = c("names", "labels"), fuzzy = FALSE) out <- data.frame( index = pos, column = colnames(data)[pos], - labels = labels, + labels = var_labels, stringsAsFactors = FALSE ) # no row names diff --git a/R/data_separate.R b/R/data_separate.R index c5e83183d..d2c3362b0 100644 --- a/R/data_separate.R +++ b/R/data_separate.R @@ -254,7 +254,7 @@ data_separate <- function(data, # catch error if (is.null(separated_columns)) { insight::format_error( - "Something went wrong. Probably the number of provided column names did not match number of newly created columns?" + "Something went wrong. Probably the number of provided column names did not match number of newly created columns?" # nolint ) } @@ -338,29 +338,27 @@ data_separate <- function(data, out <- rep(NA_character_, times = n_cols) } else if (n_values > n_cols) { # we have more values than required - drop extra columns - if (extra == "drop_left") { - out <- i[(n_values - n_cols + 1):n_values] - } else if (extra == "drop_right") { - out <- i[1:n_cols] - } else if (extra == "merge_left") { - out <- paste(i[1:(n_values - n_cols + 1)], collapse = " ") - out <- c(out, i[(n_values - n_cols + 2):n_values]) - } else { - out <- i[1:(n_cols - 1)] - out <- c(out, paste(i[n_cols:n_values], collapse = " ")) - } + out <- switch(extra, + drop_left = i[(n_values - n_cols + 1):n_values], + drop_right = i[1:n_cols], + merge_left = { + tmp <- paste(i[1:(n_values - n_cols + 1)], collapse = " ") + c(tmp, i[(n_values - n_cols + 2):n_values]) + }, + { + tmp <- i[1:(n_cols - 1)] + c(tmp, paste(i[n_cols:n_values], collapse = " ")) + } + ) warn_extra <- TRUE } else if (n_values < n_cols) { # we have fewer values than required - fill columns - if (fill == "left") { - out <- c(rep(NA_character_, times = n_cols - n_values), i) - } else if (fill == "right") { - out <- c(i, rep(NA_character_, times = n_cols - n_values)) - } else if (fill == "value_left") { - out <- c(rep(i[1], times = n_cols - n_values), i) - } else { - out <- c(i, rep(i[length(i)], times = n_cols - n_values)) - } + out <- switch(fill, + left = c(rep(NA_character_, times = n_cols - n_values), i), + right = c(i, rep(NA_character_, times = n_cols - n_values)), + value_left = c(rep(i[1], times = n_cols - n_values), i), + c(i, rep(i[length(i)], times = n_cols - n_values)) + ) warn_fill <- TRUE } else { out <- i From 914a504d68a3c8d3121c04fee7cca6a3411e4737 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 15:51:03 +0100 Subject: [PATCH 02/15] lintr --- R/describe_distribution.R | 18 ++++---- tests/testthat/test-describe_distribution.R | 47 +++++++++++---------- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/R/describe_distribution.R b/R/describe_distribution.R index 00a8a2475..ee5c3a903 100644 --- a/R/describe_distribution.R +++ b/R/describe_distribution.R @@ -85,15 +85,15 @@ describe_distribution.list <- function(x, # ex: list(mtcars$mpg, mtcars$cyl) -> c("mtcars$mpg", "mtcars$cyl") nm <- vapply(sys.call()[[2]], insight::safe_deparse, FUN.VALUE = character(1L))[-1] - if (!isTRUE(include_factors)) { - x <- x[num_el] + if (isTRUE(include_factors)) { + x <- x[c(num_el, factor_el)] if (length(nm) != 0) { - nm <- nm[num_el] + nm <- nm[c(num_el, factor_el)] } } else { - x <- x[c(num_el, factor_el)] + x <- x[num_el] if (length(nm) != 0) { - nm <- nm[c(num_el, factor_el)] + nm <- nm[num_el] } } @@ -123,12 +123,12 @@ describe_distribution.list <- function(x, })) - if (!is.null(names(x))) { - empty_names <- which(names(x) == "") + if (is.null(names(x))) { + new_names <- nm + } else { + empty_names <- which(!nzchar(names(x), keepNA = TRUE)) new_names <- names(x) new_names[empty_names] <- nm[empty_names] - } else { - new_names <- nm } out$Variable <- new_names diff --git a/tests/testthat/test-describe_distribution.R b/tests/testthat/test-describe_distribution.R index 5976286ae..a33e2c52a 100644 --- a/tests/testthat/test-describe_distribution.R +++ b/tests/testthat/test-describe_distribution.R @@ -4,7 +4,7 @@ test_that("describe_distribution - numeric: works with basic numeric vector", { skip_if_not_installed("bayestestR") x <- describe_distribution(mtcars$mpg) - expect_equal(dim(x), c(1, 9)) + expect_identical(dim(x), c(1L, 9L)) expect_equal(round(x$Mean), 20) }) @@ -140,14 +140,14 @@ test_that("describe_distribution - list: works with include_factors", { x2 <- describe_distribution(list(mtcars$mpg, factor(mtcars$cyl)), include_factors = TRUE ) - expect_equal(dim(x2), c(2, 10)) - expect_equal(x2$Variable, c("mtcars$mpg", "factor(mtcars$cyl)")) + expect_identical(dim(x2), c(2L, 10L)) + expect_identical(x2$Variable, c("mtcars$mpg", "factor(mtcars$cyl)")) x3 <- describe_distribution(list(mtcars$mpg, foo = factor(mtcars$cyl)), include_factors = TRUE ) - expect_equal(dim(x3), c(2, 10)) - expect_equal(x3$Variable, c("mtcars$mpg", "foo")) + expect_identical(dim(x3), c(2L, 10L)) + expect_identical(x3$Variable, c("mtcars$mpg", "foo")) }) test_that("describe_distribution - list: correctly removes character elements", { @@ -167,10 +167,10 @@ test_that("describe_distribution - list: correctly handles variable names", { named <- describe_distribution(list(foo = mtcars$mpg, foo2 = mtcars$cyl)) mix <- describe_distribution(list(foo = mtcars$mpg, mtcars$cyl)) - expect_equal(stored$Variable, c("Var_1", "Var_2")) - expect_equal(unnamed$Variable, c("mtcars$mpg", "mtcars$cyl")) - expect_equal(named$Variable, c("foo", "foo2")) - expect_equal(mix$Variable, c("foo", "mtcars$cyl")) + expect_identical(stored$Variable, c("Var_1", "Var_2")) + expect_identical(unnamed$Variable, c("mtcars$mpg", "mtcars$cyl")) + expect_identical(named$Variable, c("foo", "foo2")) + expect_identical(mix$Variable, c("foo", "mtcars$cyl")) }) test_that("describe_distribution - list: correctly handles missing values", { @@ -182,8 +182,8 @@ test_that("describe_distribution - list: correctly handles missing values", { test[1] <- NA test2[1] <- NA with_missing <- describe_distribution(list(test, test2)) - expect_equal(unique(with_missing$n), 31) - expect_equal(unique(with_missing$n_Missing), 1) + expect_identical(unique(with_missing$n), 31) + expect_identical(unique(with_missing$n_Missing), 1) expect_false(unique(with_missing$Mean == no_missing$Mean)) }) @@ -191,7 +191,7 @@ test_that("describe_distribution - list: works with quartiles", { skip_if_not_installed("bayestestR") x <- describe_distribution(list(mtcars$mpg, mtcars$cyl), quartiles = TRUE) - expect_equal(dim(x), c(2, 12)) + expect_identical(dim(x), c(2L, 12L)) expect_true("Q1" %in% names(x)) expect_true("Q3" %in% names(x)) }) @@ -200,7 +200,7 @@ test_that("describe_distribution - list: works with range", { skip_if_not_installed("bayestestR") x <- describe_distribution(list(mtcars$mpg, mtcars$cyl), range = FALSE) - expect_equal(dim(x), c(2, 8)) + expect_identical(dim(x), c(2L, 8L)) expect_false("min" %in% names(x)) expect_false("max" %in% names(x)) }) @@ -215,7 +215,7 @@ test_that("describe_distribution - select", { data(iris) out <- describe_distribution(iris, select = starts_with("Petal")) - expect_equal(out$Variable, c("Petal.Length", "Petal.Width")) + expect_identical(out$Variable, c("Petal.Length", "Petal.Width")) expect_equal(out$Mean, c(3.758000, 1.199333), tolerance = 1e-3) expect_null(describe_distribution(iris, select = "Species")) @@ -235,12 +235,12 @@ test_that("describe_distribution - grouped df", { x <- data_group(iris, Species) out <- describe_distribution(x, select = starts_with("Petal")) - expect_equal(out$.group, c( + expect_identical(out$.group, c( "Species=setosa", "Species=setosa", "Species=versicolor", "Species=versicolor", "Species=virginica", "Species=virginica" )) - expect_equal(out$Variable, c( + expect_identical(out$Variable, c( "Petal.Length", "Petal.Width", "Petal.Length", "Petal.Width", "Petal.Length", "Petal.Width" @@ -255,18 +255,18 @@ test_that("distribution_mode works as expected", { skip_if_not_installed("bayestestR") # atomic vector - expect_equal(distribution_mode(c(1, 2, 3, 3, 4, 5)), 3) - expect_equal(distribution_mode(c(1, 2, 3, 3, 4, 4, 5)), 3) - expect_equal(distribution_mode(c(1.5, 2.3, 3.7, 3.7, 4.0, 5)), 3.7) + expect_identical(distribution_mode(c(1, 2, 3, 3, 4, 5)), 3) + expect_identical(distribution_mode(c(1, 2, 3, 3, 4, 4, 5)), 3) + expect_identical(distribution_mode(c(1.5, 2.3, 3.7, 3.7, 4.0, 5)), 3.7) # list - expect_equal(distribution_mode(list(1, 2, 3, 3, 4, 5)), list(3)) + expect_identical(distribution_mode(list(1, 2, 3, 3, 4, 5)), list(3)) # scalar - expect_equal(distribution_mode("a"), "a") + expect_identical(distribution_mode("a"), "a") # empty - expect_null(distribution_mode(c())) + expect_null(distribution_mode(NULL)) }) # select helpers ------------------------------ @@ -275,6 +275,7 @@ test_that("describe_distribution regex", { expect_equal( describe_distribution(mtcars, select = "pg", regex = TRUE), - describe_distribution(mtcars, select = "mpg") + describe_distribution(mtcars, select = "mpg"), + ignore_attr = TRUE ) }) From 5ce56de7c2cfee54cebad8d58d23bbd363da0600 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 15:51:31 +0100 Subject: [PATCH 03/15] lintr --- R/labels_to_levels.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/labels_to_levels.R b/R/labels_to_levels.R index ed63e0583..c1ff97a16 100644 --- a/R/labels_to_levels.R +++ b/R/labels_to_levels.R @@ -79,7 +79,7 @@ labels_to_levels.data.frame <- function(x, # create the new variables and updates "select", so new variables are processed if (!isFALSE(append)) { # process arguments - args <- .process_append( + arguments <- .process_append( x, select, append, @@ -89,8 +89,8 @@ labels_to_levels.data.frame <- function(x, keep_character = FALSE ) # update processed arguments - x <- args$x - select <- args$select + x <- arguments$x + select <- arguments$select } x[select] <- lapply( From bccd20d9fd798907efd3a41d8100be105071a9d5 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 18:39:46 +0100 Subject: [PATCH 04/15] fix test --- tests/testthat/test-describe_distribution.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-describe_distribution.R b/tests/testthat/test-describe_distribution.R index a33e2c52a..5871708b2 100644 --- a/tests/testthat/test-describe_distribution.R +++ b/tests/testthat/test-describe_distribution.R @@ -182,8 +182,8 @@ test_that("describe_distribution - list: correctly handles missing values", { test[1] <- NA test2[1] <- NA with_missing <- describe_distribution(list(test, test2)) - expect_identical(unique(with_missing$n), 31) - expect_identical(unique(with_missing$n_Missing), 1) + expect_identical(unique(with_missing$n), 31L) + expect_identical(unique(with_missing$n_Missing), 1L) expect_false(unique(with_missing$Mean == no_missing$Mean)) }) From a57c33b4e8e5ddf949bc0c0917ca42183f6963c3 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 18:41:57 +0100 Subject: [PATCH 05/15] lintr --- tests/testthat/test-describe_distribution.R | 26 ++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/testthat/test-describe_distribution.R b/tests/testthat/test-describe_distribution.R index 5871708b2..1ba224061 100644 --- a/tests/testthat/test-describe_distribution.R +++ b/tests/testthat/test-describe_distribution.R @@ -15,8 +15,8 @@ test_that("describe_distribution - numeric: correctly handles missing values", { test <- mtcars$mpg test[1] <- NA with_missing <- describe_distribution(test) - expect_equal(with_missing$n, 31) - expect_equal(with_missing$n_Missing, 1) + expect_identical(with_missing$n, 31L) + expect_identical(with_missing$n_Missing, 1L) expect_false(with_missing$Mean == no_missing$Mean) }) @@ -24,7 +24,7 @@ test_that("describe_distribution - numeric: works with quartiles", { skip_if_not_installed("bayestestR") x <- describe_distribution(mtcars$mpg, quartiles = TRUE) - expect_equal(dim(x), c(1, 11)) + expect_identical(dim(x), c(1L, 11L)) expect_true("Q1" %in% names(x)) expect_true("Q3" %in% names(x)) }) @@ -33,7 +33,7 @@ test_that("describe_distribution - numeric: works with range", { skip_if_not_installed("bayestestR") x <- describe_distribution(mtcars$mpg, range = FALSE) - expect_equal(dim(x), c(1, 7)) + expect_identical(dim(x), c(1L, 7L)) expect_false("min" %in% names(x)) expect_false("max" %in% names(x)) }) @@ -53,7 +53,7 @@ test_that("describe_distribution - data frame: works with basic data frame", { skip_if_not_installed("bayestestR") x <- describe_distribution(mtcars) - expect_equal(dim(x), c(11, 10)) + expect_identical(dim(x), c(11L, 10L)) expect_equal(round(x[1, "Mean"]), 20) }) @@ -64,8 +64,8 @@ test_that("describe_distribution - data frame: correctly handles missing values" test <- mtcars test[1, ] <- NA with_missing <- describe_distribution(test) - expect_equal(unique(with_missing$n), 31) - expect_equal(unique(with_missing$n_Missing), 1) + expect_identical(unique(with_missing$n), 31L) + expect_identical(unique(with_missing$n_Missing), 1L) expect_false(unique(with_missing$Mean == no_missing$Mean)) }) @@ -73,7 +73,7 @@ test_that("describe_distribution - data frame: works with quartiles", { skip_if_not_installed("bayestestR") x <- describe_distribution(mtcars, quartiles = TRUE) - expect_equal(dim(x), c(11, 12)) + expect_identical(dim(x), c(11L, 12L)) expect_true("Q1" %in% names(x)) expect_true("Q3" %in% names(x)) }) @@ -82,7 +82,7 @@ test_that("describe_distribution - data frame: works with range", { skip_if_not_installed("bayestestR") x <- describe_distribution(mtcars, range = FALSE) - expect_equal(dim(x), c(11, 8)) + expect_identical(dim(x), c(11L, 8L)) expect_false("min" %in% names(x)) expect_false("max" %in% names(x)) }) @@ -120,13 +120,13 @@ test_that("describe_distribution - list: works with basic list", { named <- describe_distribution(list(foo = mtcars$mpg, foo2 = mtcars$cyl)) mix <- describe_distribution(list(foo = mtcars$mpg, mtcars$cyl)) - expect_equal(dim(stored), c(2, 10)) + expect_identical(dim(stored), c(2L, 10L)) expect_equal(round(stored$Mean), c(20, 6)) - expect_equal(dim(unnamed), c(2, 10)) + expect_identical(dim(unnamed), c(2L, 10L)) expect_equal(round(unnamed$Mean), c(20, 6)) - expect_equal(dim(named), c(2, 10)) + expect_identical(dim(named), c(2L, 10L)) expect_equal(round(named$Mean), c(20, 6)) - expect_equal(dim(mix), c(2, 10)) + expect_identical(dim(mix), c(2L, 10L)) expect_equal(round(mix$Mean), c(20, 6)) }) From 9bc14f3f4184f1ac9dcab932792c18dae4214305 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 20:40:37 +0100 Subject: [PATCH 06/15] lintr --- tests/testthat/test-describe_distribution.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-describe_distribution.R b/tests/testthat/test-describe_distribution.R index 1ba224061..7ec32a7f2 100644 --- a/tests/testthat/test-describe_distribution.R +++ b/tests/testthat/test-describe_distribution.R @@ -5,7 +5,7 @@ test_that("describe_distribution - numeric: works with basic numeric vector", { x <- describe_distribution(mtcars$mpg) expect_identical(dim(x), c(1L, 9L)) - expect_equal(round(x$Mean), 20) + expect_identical(round(x$Mean), 20) }) test_that("describe_distribution - numeric: correctly handles missing values", { @@ -54,7 +54,7 @@ test_that("describe_distribution - data frame: works with basic data frame", { x <- describe_distribution(mtcars) expect_identical(dim(x), c(11L, 10L)) - expect_equal(round(x[1, "Mean"]), 20) + expect_identical(round(x[1, "Mean"]), 20) }) test_that("describe_distribution - data frame: correctly handles missing values", { @@ -121,13 +121,13 @@ test_that("describe_distribution - list: works with basic list", { mix <- describe_distribution(list(foo = mtcars$mpg, mtcars$cyl)) expect_identical(dim(stored), c(2L, 10L)) - expect_equal(round(stored$Mean), c(20, 6)) + expect_identical(round(stored$Mean), c(20, 6)) expect_identical(dim(unnamed), c(2L, 10L)) - expect_equal(round(unnamed$Mean), c(20, 6)) + expect_identical(round(unnamed$Mean), c(20, 6)) expect_identical(dim(named), c(2L, 10L)) - expect_equal(round(named$Mean), c(20, 6)) + expect_identical(round(named$Mean), c(20, 6)) expect_identical(dim(mix), c(2L, 10L)) - expect_equal(round(mix$Mean), c(20, 6)) + expect_identical(round(mix$Mean), c(20, 6)) }) test_that("describe_distribution - list: works with include_factors", { From d0332dbae5c57f5867a2965d97c58a272990fe27 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 20:48:58 +0100 Subject: [PATCH 07/15] test coverage --- tests/testthat/test-labels_to_levels.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/testthat/test-labels_to_levels.R b/tests/testthat/test-labels_to_levels.R index 866154c8f..87a0418b9 100644 --- a/tests/testthat/test-labels_to_levels.R +++ b/tests/testthat/test-labels_to_levels.R @@ -28,6 +28,18 @@ test_that("labels_to_levels, factor, error on no labels", { expect_error(labels_to_levels(iris), regex = "Could not change factor") }) +test_that("labels_to_levels, data frame, append", { + data(efc) + out <- labels_to_levels(efc, append = "_ll") + expect_named(out, c("c12hour", "e16sex", "e42dep", "c172code", "neg_c_7", "e42dep_ll")) +}) + +test_that("labels_to_levels, data frame, append", { + data(iris) + d <- as.data.frame(lapply(iris, as.factor)) + expect_identical(labels_to_levels(d), d) +}) + test_that("labels_to_levels, factor, data frame", { data(efc) out <- labels_to_levels(efc) From 509d46ad869dd6091f07c726357c91b6aef547f3 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 20:53:29 +0100 Subject: [PATCH 08/15] test coverage --- tests/testthat/_snaps/describe_distribution.md | 9 +++++++++ tests/testthat/test-describe_distribution.R | 8 +++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/testthat/_snaps/describe_distribution.md b/tests/testthat/_snaps/describe_distribution.md index 5c582859d..0261ab9b1 100644 --- a/tests/testthat/_snaps/describe_distribution.md +++ b/tests/testthat/_snaps/describe_distribution.md @@ -16,3 +16,12 @@ ----------------------------------------------------------- | | [VC, OJ] | 0 | -2.07 | 60 | 0 +# describe_distribution formatting + + Code + format(x) + Output + Mean | SD | IQR | Range | Quartiles | Skewness | Kurtosis | n | n_Missing + -------------------------------------------------------------------------------------- + 3.06 | 0.44 | 0.52 | [2.00, 4.40] | 2.80, 3.30 | 0.32 | 0.23 | 150 | 0 + diff --git a/tests/testthat/test-describe_distribution.R b/tests/testthat/test-describe_distribution.R index 7ec32a7f2..0196bcf54 100644 --- a/tests/testthat/test-describe_distribution.R +++ b/tests/testthat/test-describe_distribution.R @@ -250,7 +250,6 @@ test_that("describe_distribution - grouped df", { # distribution_mode -------------------------- - test_that("distribution_mode works as expected", { skip_if_not_installed("bayestestR") @@ -279,3 +278,10 @@ test_that("describe_distribution regex", { ignore_attr = TRUE ) }) + +# formatting ------------------------------ +test_that("describe_distribution formatting", { + data(iris) + x <- describe_distribution(iris$Sepal.Width, quartiles = TRUE) + expect_snapshot(format(x)) +}) From 815c5821965028bb99dbf185a48540275b775a7d Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 20:54:47 +0100 Subject: [PATCH 09/15] test coverage --- tests/testthat/_snaps/describe_distribution.md | 9 +++++++++ tests/testthat/test-describe_distribution.R | 2 ++ 2 files changed, 11 insertions(+) diff --git a/tests/testthat/_snaps/describe_distribution.md b/tests/testthat/_snaps/describe_distribution.md index 0261ab9b1..ee08e552c 100644 --- a/tests/testthat/_snaps/describe_distribution.md +++ b/tests/testthat/_snaps/describe_distribution.md @@ -25,3 +25,12 @@ -------------------------------------------------------------------------------------- 3.06 | 0.44 | 0.52 | [2.00, 4.40] | 2.80, 3.30 | 0.32 | 0.23 | 150 | 0 +--- + + Code + format(x) + Output + Mean | SD | IQR | 95% CI | Range | Quartiles | Skewness | Kurtosis | n | n_Missing + ----------------------------------------------------------------------------------------------------- + 3.06 | 0.44 | 0.52 | [2.99, 3.12] | [2.00, 4.40] | 2.80, 3.30 | 0.32 | 0.23 | 150 | 0 + diff --git a/tests/testthat/test-describe_distribution.R b/tests/testthat/test-describe_distribution.R index 0196bcf54..9b8b92fd3 100644 --- a/tests/testthat/test-describe_distribution.R +++ b/tests/testthat/test-describe_distribution.R @@ -284,4 +284,6 @@ test_that("describe_distribution formatting", { data(iris) x <- describe_distribution(iris$Sepal.Width, quartiles = TRUE) expect_snapshot(format(x)) + x <- describe_distribution(iris$Sepal.Width, ci = 0.95, quartiles = TRUE) + expect_snapshot(format(x)) }) From f554db5eeac12660b2c61694e1879504ff1e6116 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 19 Dec 2023 10:10:17 +0100 Subject: [PATCH 10/15] update test --- tests/testthat/_snaps/describe_distribution.md | 9 --------- tests/testthat/test-describe_distribution.R | 2 -- 2 files changed, 11 deletions(-) diff --git a/tests/testthat/_snaps/describe_distribution.md b/tests/testthat/_snaps/describe_distribution.md index ee08e552c..0261ab9b1 100644 --- a/tests/testthat/_snaps/describe_distribution.md +++ b/tests/testthat/_snaps/describe_distribution.md @@ -25,12 +25,3 @@ -------------------------------------------------------------------------------------- 3.06 | 0.44 | 0.52 | [2.00, 4.40] | 2.80, 3.30 | 0.32 | 0.23 | 150 | 0 ---- - - Code - format(x) - Output - Mean | SD | IQR | 95% CI | Range | Quartiles | Skewness | Kurtosis | n | n_Missing - ----------------------------------------------------------------------------------------------------- - 3.06 | 0.44 | 0.52 | [2.99, 3.12] | [2.00, 4.40] | 2.80, 3.30 | 0.32 | 0.23 | 150 | 0 - diff --git a/tests/testthat/test-describe_distribution.R b/tests/testthat/test-describe_distribution.R index 9b8b92fd3..0196bcf54 100644 --- a/tests/testthat/test-describe_distribution.R +++ b/tests/testthat/test-describe_distribution.R @@ -284,6 +284,4 @@ test_that("describe_distribution formatting", { data(iris) x <- describe_distribution(iris$Sepal.Width, quartiles = TRUE) expect_snapshot(format(x)) - x <- describe_distribution(iris$Sepal.Width, ci = 0.95, quartiles = TRUE) - expect_snapshot(format(x)) }) From 2babedbc28630a38c0973694eb54abfc2f37c671 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 19 Dec 2023 12:40:20 +0100 Subject: [PATCH 11/15] fix test --- tests/testthat/test-describe_distribution.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-describe_distribution.R b/tests/testthat/test-describe_distribution.R index 0196bcf54..83d2abb33 100644 --- a/tests/testthat/test-describe_distribution.R +++ b/tests/testthat/test-describe_distribution.R @@ -281,6 +281,7 @@ test_that("describe_distribution regex", { # formatting ------------------------------ test_that("describe_distribution formatting", { + skip_if_not_installed("bayestestR") data(iris) x <- describe_distribution(iris$Sepal.Width, quartiles = TRUE) expect_snapshot(format(x)) From 47b001d512e74e896028b50463db1213b2f6035c Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 19 Dec 2023 13:01:12 +0100 Subject: [PATCH 12/15] lintr --- R/data_seek.R | 2 +- R/data_separate.R | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/R/data_seek.R b/R/data_seek.R index c09e57489..17463878d 100644 --- a/R/data_seek.R +++ b/R/data_seek.R @@ -63,7 +63,7 @@ data_seek <- function(data, pattern, seek = c("names", "labels"), fuzzy = FALSE) # check valid args seek <- intersect(seek, c("names", "labels", "values", "levels", "column_names", "columns", "all")) if (is.null(seek) || !length(seek)) { - insight::format_error("`seek` must be one of \"names\", \"labels\", \"values\", a combination of these options, or \"all\".") + insight::format_error("`seek` must be one of \"names\", \"labels\", \"values\", a combination of these options, or \"all\".") # nolint } pos1 <- pos2 <- pos3 <- NULL diff --git a/R/data_separate.R b/R/data_separate.R index d2c3362b0..53243fb33 100644 --- a/R/data_separate.R +++ b/R/data_separate.R @@ -264,14 +264,12 @@ data_separate <- function(data, # if no column names provided, use standard names if (is.null(new_columns[[sep_column]])) { new_column_names <- paste0(sep_column, "_", seq_along(out)) - } else { + } else if (make_unique_colnames) { # if we have multiple columns that were separated, we avoid duplicated # column names of created variables by appending name of original column - if (make_unique_colnames) { - new_column_names <- paste0(sep_column, "_", new_columns[[sep_column]]) - } else { - new_column_names <- new_columns[[sep_column]] - } + new_column_names <- paste0(sep_column, "_", new_columns[[sep_column]]) + } else { + new_column_names <- new_columns[[sep_column]] } colnames(out) <- new_column_names From f7b1750f4b50891ac5dc3bd41116d061dc29eb00 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 19 Dec 2023 13:04:46 +0100 Subject: [PATCH 13/15] add reminder for myself --- R/to_numeric.R | 6 ++++++ man/to_numeric.Rd | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/R/to_numeric.R b/R/to_numeric.R index 8bfcac6bc..c0b382fd1 100644 --- a/R/to_numeric.R +++ b/R/to_numeric.R @@ -17,6 +17,12 @@ #' @inheritParams find_columns #' @inheritParams categorize #' +#' @note By default, `to_numeric()` converts factors into "binary" dummies, i.e. +#' each factor level is converted into a separate column filled with a binary +#' 0-1 value. If only one column is required, use `dummy_factors = FALSE`. If +#' you want to preserve the original factor levels (in case these represent +#' numeric values), use `preserve_levels = TRUE`. +#' #' @section Selection of variables - `select` argument: #' For most functions that have a `select` argument the complete input data #' frame is returned, even when `select` only selects a range of variables. diff --git a/man/to_numeric.Rd b/man/to_numeric.Rd index 540f4f65a..1868df008 100644 --- a/man/to_numeric.Rd +++ b/man/to_numeric.Rd @@ -106,6 +106,13 @@ Convert data to numeric by converting characters to factors and factors to either numeric levels or dummy variables. The "counterpart" to convert variables into factors is \code{to_factor()}. } +\note{ +By default, \code{to_numeric()} converts factors into "binary" dummies, i.e. +each factor level is converted into a separate column filled with a binary +0-1 value. If only one column is required, use \code{dummy_factors = FALSE}. If +you want to preserve the original factor levels (in case these represent +numeric values), use \code{preserve_levels = TRUE}. +} \section{Selection of variables - \code{select} argument}{ For most functions that have a \code{select} argument the complete input data From c0d63f0a6a8fe47f6d2f73903ccf35184c5262f8 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 19 Dec 2023 13:07:16 +0100 Subject: [PATCH 14/15] add example --- R/to_numeric.R | 2 ++ man/to_numeric.Rd | 2 ++ 2 files changed, 4 insertions(+) diff --git a/R/to_numeric.R b/R/to_numeric.R index c0b382fd1..1dcab4371 100644 --- a/R/to_numeric.R +++ b/R/to_numeric.R @@ -40,6 +40,8 @@ #' x <- as.factor(mtcars$gear) #' to_numeric(x, dummy_factors = FALSE) #' to_numeric(x, dummy_factors = FALSE, preserve_levels = TRUE) +#' # same as: +#' coerce_to_numeric(x) #' #' @return A data frame of numeric variables. #' diff --git a/man/to_numeric.Rd b/man/to_numeric.Rd index 1868df008..7c78b1ba6 100644 --- a/man/to_numeric.Rd +++ b/man/to_numeric.Rd @@ -132,5 +132,7 @@ to_numeric(head(ToothGrowth), dummy_factors = FALSE) x <- as.factor(mtcars$gear) to_numeric(x, dummy_factors = FALSE) to_numeric(x, dummy_factors = FALSE, preserve_levels = TRUE) +# same as: +coerce_to_numeric(x) } From 3111cc661134669da9f68ede56622202e0475856 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 19 Dec 2023 14:45:16 +0100 Subject: [PATCH 15/15] lintrs --- R/describe_distribution.R | 2 +- R/to_numeric.R | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/R/describe_distribution.R b/R/describe_distribution.R index ee5c3a903..37850299a 100644 --- a/R/describe_distribution.R +++ b/R/describe_distribution.R @@ -82,7 +82,7 @@ describe_distribution.list <- function(x, num_el <- which(vapply(x, is.numeric, FUN.VALUE = logical(1L))) # get elements names as is - # ex: list(mtcars$mpg, mtcars$cyl) -> c("mtcars$mpg", "mtcars$cyl") + # ex: `list(mtcars$mpg, mtcars$cyl) -> c("mtcars$mpg", "mtcars$cyl")` nm <- vapply(sys.call()[[2]], insight::safe_deparse, FUN.VALUE = character(1L))[-1] if (isTRUE(include_factors)) { diff --git a/R/to_numeric.R b/R/to_numeric.R index 1dcab4371..c43956399 100644 --- a/R/to_numeric.R +++ b/R/to_numeric.R @@ -219,15 +219,13 @@ to_numeric.factor <- function(x, # if the first observation was missing, add NA row and bind data frame if (i == 1 && na_values[i] == 1) { out <- rbind(NA, out) - } else { + } else if (na_values[i] == rows_x) { # if the last observation was NA, add NA row to data frame - if (na_values[i] == rows_x) { - out <- rbind(out, NA) - } else { - # else, pick rows from beginning to current NA value, add NA, - # and rbind the remaining rows - out <- rbind(out[1:(na_values[i] - 1), ], NA, out[na_values[i]:nrow(out), ]) - } + out <- rbind(out, NA) + } else { + # else, pick rows from beginning to current NA value, add NA, + # and rbind the remaining rows + out <- rbind(out[1:(na_values[i] - 1), ], NA, out[na_values[i]:nrow(out), ]) } } rownames(out) <- NULL