Skip to content

Commit

Permalink
Fix to_numeric() with inversed factor levels (#469)
Browse files Browse the repository at this point in the history
* Fix invers levels for to_numeric()

* news

* lintr

* wordlist

* lintr

* lintr
  • Loading branch information
strengejacke authored Nov 27, 2023
1 parent 2852033 commit ec161a2
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 25 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 0.9.0.4
Version: 0.9.0.5
Authors@R: c(
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
Expand Down
7 changes: 6 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# datawizard 0.9.0.9000 (development version)
# datawizard 0.9.0.x (development version)

CHANGES

Expand All @@ -7,6 +7,11 @@ CHANGES

* `to_factor()` and `to_numeric()` now support class `haven_labelled`.

BUG FIXES

* `to_numeric()` now correctly deals with inversed factor levels when
`preserve_levels = TRUE`.

# datawizard 0.9.0

NEW FUNCTIONS
Expand Down
24 changes: 12 additions & 12 deletions R/data_tabulate.R
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,10 @@ data_tabulate.grouped_df <- function(x,
for (i in seq_along(grps)) {
rows <- grps[[i]]
# save information about grouping factors
if (!is.null(group_variables)) {
group_variable <- group_variables[i, , drop = FALSE]
} else {
if (is.null(group_variables)) {
group_variable <- NULL
} else {
group_variable <- group_variables[i, , drop = FALSE]
}
out <- c(out, data_tabulate(
data_filter(x, rows),
Expand Down Expand Up @@ -226,7 +226,7 @@ format.dw_data_tabulate <- function(x, format = "text", big_mark = NULL, ...) {
# format data frame
ftab <- insight::format_table(x, ...)
ftab[] <- lapply(ftab, function(i) {
i[i == ""] <- ifelse(identical(format, "text"), "<NA>", "(NA)")
i[i == ""] <- ifelse(identical(format, "text"), "<NA>", "(NA)") # nolint
i
})
ftab$N <- gsub("\\.00$", "", ftab$N)
Expand Down Expand Up @@ -347,10 +347,10 @@ print.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
}
} else {
x <- lapply(x, function(i) {
attr <- attributes(i)
i_attr <- attributes(i)
i <- format(i, format = "text", big_mark = big_mark, ...)
i$Variable[attr$duplicate_varnames] <- ""
if (!is.null(i$Group)) i$Group[attr$duplicate_varnames] <- ""
i$Variable[i_attr$duplicate_varnames] <- ""
if (!is.null(i$Group)) i$Group[i_attr$duplicate_varnames] <- ""
i[nrow(i) + 1, ] <- ""
i
})
Expand All @@ -375,9 +375,9 @@ print_html.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
print_html(x[[1]], big_mark = big_mark, ...)
} else {
x <- lapply(x, function(i) {
attr <- attributes(i)
i_attr <- attributes(i)
i <- format(i, format = "html", big_mark = big_mark, ...)
i$Variable[attr$duplicate_varnames] <- ""
i$Variable[i_attr$duplicate_varnames] <- ""
i
})

Expand All @@ -401,10 +401,10 @@ print_md.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
print_md(x[[1]], big_mark = big_mark, ...)
} else {
x <- lapply(x, function(i) {
attr <- attributes(i)
i_attr <- attributes(i)
i <- format(i, format = "markdown", big_mark = big_mark, ...)
i$Variable[attr$duplicate_varnames] <- ""
if (!is.null(i$Group)) i$Group[attr$duplicate_varnames] <- ""
i$Variable[i_attr$duplicate_varnames] <- ""
if (!is.null(i$Group)) i$Group[i_attr$duplicate_varnames] <- ""
i[nrow(i) + 1, ] <- ""
i
})
Expand Down
19 changes: 13 additions & 6 deletions R/to_numeric.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ to_numeric.data.frame <- function(x,
return(x)
}

attr <- attributes(x)
df_attr <- attributes(x)

# evaluate arguments
select <- .select_nse(select,
Expand All @@ -91,16 +91,16 @@ to_numeric.data.frame <- function(x,
# drop numerics, when append is not FALSE
select <- colnames(x[select])[!vapply(x[select], is.numeric, FUN.VALUE = logical(1L))]
# process arguments
args <- .process_append(
fun_args <- .process_append(
x,
select,
append,
append_suffix = "_n",
keep_factors = TRUE
)
# update processed arguments
x <- args$x
select <- args$select
x <- fun_args$x
select <- fun_args$select
}

out <- sapply(
Expand Down Expand Up @@ -129,7 +129,7 @@ to_numeric.data.frame <- function(x,
}

# due to the special handling of dummy factors, we need to take care
# of appending the data here again. usually, "args$x" includes the appended
# of appending the data here again. usually, "fun_args$x" includes the appended
# data, which does not work here...

if (!isFALSE(append)) {
Expand All @@ -141,7 +141,7 @@ to_numeric.data.frame <- function(x,
}

# add back custom attributes
out <- .replace_attrs(out, attr)
out <- .replace_attrs(out, df_attr)
out
}

Expand Down Expand Up @@ -226,6 +226,13 @@ to_numeric.factor <- function(x,
}
names(out) <- levels(x)
} else if (preserve_levels) {
if (is.unsorted(levels(x))) {
x_inverse <- rep(NA_real_, length(x))
for (i in 1:nlevels(x)) {
x_inverse[x == levels(x)[i]] <- as.numeric(levels(x)[nlevels(x) - i + 1])
}
x <- factor(x_inverse)
}
out <- .set_back_labels(as.numeric(as.character(x)), x)
} else {
out <- .set_back_labels(as.numeric(x), x)
Expand Down
1 change: 1 addition & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ ggplot's
https
interpretability
interpretable
inversed
joss
labelled
labelling
Expand Down
19 changes: 14 additions & 5 deletions tests/testthat/test-data_to_numeric.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,13 @@ test_that("convert factor to numeric", {
expect_snapshot(to_numeric(f))
})


test_that("convert factor to numeric", {
expect_identical(to_numeric(c("abc", "xyz")), c(1, 2))
expect_identical(to_numeric(c("123", "789")), c(123, 789))
expect_identical(to_numeric(c("1L", "2e-3")), c(1, 0.002))
expect_identical(to_numeric(c("1L", "2e-3", "ABC")), c(1, 2, 3))
})


test_that("convert factor to numeric, dummy factors", {
expect_identical(
to_numeric(c("abc", "xyz"), dummy_factors = TRUE),
Expand All @@ -66,7 +64,6 @@ test_that("convert factor to numeric, dummy factors", {
)
})


test_that("convert factor to numeric, append", {
data(efc)
expect_identical(
Expand Down Expand Up @@ -94,13 +91,11 @@ test_that("convert factor to numeric, append", {
)
})


test_that("convert factor to numeric, all numeric", {
data(mtcars)
expect_identical(to_numeric(mtcars), mtcars)
})


test_that("convert factor to numeric, dummy factors, with NA", {
x1 <- factor(rep(c("a", "b"), 3))
x2 <- factor(c("a", NA_character_, "a", "b", "a", "b"))
Expand Down Expand Up @@ -153,6 +148,20 @@ test_that("convert factor to numeric, dummy factors, with NA", {
expect_identical(nrow(to_numeric(x7, dummy_factors = TRUE)), length(x7))
})

test_that("to_numeric, inverse factor levels", {
f <- c(0, 0, 1, 1, 1, 0)
x1 <- factor(f, levels = c(0, 1))
x2 <- factor(f, levels = c(1, 0))
out <- to_numeric(x1, dummy_factors = FALSE, preserve_levels = FALSE)
expect_identical(out, c(1, 1, 2, 2, 2, 1))
out <- to_numeric(x2, dummy_factors = FALSE, preserve_levels = FALSE)
expect_identical(out, c(2, 2, 1, 1, 1, 2))
out <- to_numeric(x1, dummy_factors = FALSE, preserve_levels = TRUE)
expect_identical(out, c(0, 0, 1, 1, 1, 0))
out <- to_numeric(x2, dummy_factors = FALSE, preserve_levels = TRUE)
expect_identical(out, c(1, 1, 0, 0, 0, 1))
})

# select helpers ------------------------------
test_that("to_numeric regex", {
expect_identical(
Expand Down

0 comments on commit ec161a2

Please sign in to comment.