Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix to_numeric() with inversed factor levels #469

Merged
merged 6 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 0.9.0.4
Version: 0.9.0.5
Authors@R: c(
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
Expand Down
7 changes: 6 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# datawizard 0.9.0.9000 (development version)
# datawizard 0.9.0.x (development version)

CHANGES

Expand All @@ -7,6 +7,11 @@ CHANGES

* `to_factor()` and `to_numeric()` now support class `haven_labelled`.

BUG FIXES

* `to_numeric()` now correctly deals with inversed factor levels when
`preserve_levels = TRUE`.

# datawizard 0.9.0

NEW FUNCTIONS
Expand Down
16 changes: 8 additions & 8 deletions R/data_tabulate.R
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
for (i in seq_along(grps)) {
rows <- grps[[i]]
# save information about grouping factors
if (!is.null(group_variables)) {

Check warning on line 182 in R/data_tabulate.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/data_tabulate.R,line=182,col=9,[if_not_else_linter] In a simple if/else statement, prefer `if (A) x else y` to the less-readable `if (!A) y else x`.
group_variable <- group_variables[i, , drop = FALSE]
} else {
group_variable <- NULL
Expand Down Expand Up @@ -226,7 +226,7 @@
# format data frame
ftab <- insight::format_table(x, ...)
ftab[] <- lapply(ftab, function(i) {
i[i == ""] <- ifelse(identical(format, "text"), "<NA>", "(NA)")

Check warning on line 229 in R/data_tabulate.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/data_tabulate.R,line=229,col=7,[nzchar_linter] Instead of comparing strings to "", use nzchar(). Note that if x is a factor, you'll have use as.character() to replicate an implicit conversion that happens in x == "". Whenever missing data is possible, please take care to use nzchar(., keepNA = TRUE); nzchar(NA) is TRUE by default.
i
})
ftab$N <- gsub("\\.00$", "", ftab$N)
Expand Down Expand Up @@ -347,10 +347,10 @@
}
} else {
x <- lapply(x, function(i) {
attr <- attributes(i)
i_attr <- attributes(i)
i <- format(i, format = "text", big_mark = big_mark, ...)
i$Variable[attr$duplicate_varnames] <- ""
if (!is.null(i$Group)) i$Group[attr$duplicate_varnames] <- ""
i$Variable[i_attr$duplicate_varnames] <- ""
if (!is.null(i$Group)) i$Group[i_attr$duplicate_varnames] <- ""
i[nrow(i) + 1, ] <- ""
i
})
Expand All @@ -375,9 +375,9 @@
print_html(x[[1]], big_mark = big_mark, ...)
} else {
x <- lapply(x, function(i) {
attr <- attributes(i)
i_attr <- attributes(i)
i <- format(i, format = "html", big_mark = big_mark, ...)
i$Variable[attr$duplicate_varnames] <- ""
i$Variable[i_attr$duplicate_varnames] <- ""
i
})

Expand All @@ -401,10 +401,10 @@
print_md(x[[1]], big_mark = big_mark, ...)
} else {
x <- lapply(x, function(i) {
attr <- attributes(i)
i_attr <- attributes(i)
i <- format(i, format = "markdown", big_mark = big_mark, ...)
i$Variable[attr$duplicate_varnames] <- ""
if (!is.null(i$Group)) i$Group[attr$duplicate_varnames] <- ""
i$Variable[i_attr$duplicate_varnames] <- ""
if (!is.null(i$Group)) i$Group[i_attr$duplicate_varnames] <- ""
i[nrow(i) + 1, ] <- ""
i
})
Expand Down
11 changes: 9 additions & 2 deletions R/to_numeric.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
return(x)
}

attr <- attributes(x)
df_attr <- attributes(x)

# evaluate arguments
select <- .select_nse(select,
Expand All @@ -91,7 +91,7 @@
# drop numerics, when append is not FALSE
select <- colnames(x[select])[!vapply(x[select], is.numeric, FUN.VALUE = logical(1L))]
# process arguments
args <- .process_append(

Check warning on line 94 in R/to_numeric.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/to_numeric.R,line=94,col=5,[object_overwrite_linter] 'args' is an exported object from package 'base'. Avoid re-using such symbols.
x,
select,
append,
Expand Down Expand Up @@ -141,7 +141,7 @@
}

# add back custom attributes
out <- .replace_attrs(out, attr)
out <- .replace_attrs(out, df_attr)
out
}

Expand Down Expand Up @@ -226,6 +226,13 @@
}
names(out) <- levels(x)
} else if (preserve_levels) {
if (is.unsorted(levels(x))) {
x_inverse <- rep(NA_real_, length(x))
for (i in 1:nlevels(x)) {
x_inverse[x == levels(x)[i]] <- as.numeric(levels(x)[nlevels(x) - i + 1])
}
x <- factor(x_inverse)
}
out <- .set_back_labels(as.numeric(as.character(x)), x)
} else {
out <- .set_back_labels(as.numeric(x), x)
Expand Down
1 change: 1 addition & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ ggplot's
https
interpretability
interpretable
inversed
joss
labelled
labelling
Expand Down
19 changes: 14 additions & 5 deletions tests/testthat/test-data_to_numeric.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,13 @@ test_that("convert factor to numeric", {
expect_snapshot(to_numeric(f))
})


test_that("convert factor to numeric", {
expect_identical(to_numeric(c("abc", "xyz")), c(1, 2))
expect_identical(to_numeric(c("123", "789")), c(123, 789))
expect_identical(to_numeric(c("1L", "2e-3")), c(1, 0.002))
expect_identical(to_numeric(c("1L", "2e-3", "ABC")), c(1, 2, 3))
})


test_that("convert factor to numeric, dummy factors", {
expect_identical(
to_numeric(c("abc", "xyz"), dummy_factors = TRUE),
Expand All @@ -66,7 +64,6 @@ test_that("convert factor to numeric, dummy factors", {
)
})


test_that("convert factor to numeric, append", {
data(efc)
expect_identical(
Expand Down Expand Up @@ -94,13 +91,11 @@ test_that("convert factor to numeric, append", {
)
})


test_that("convert factor to numeric, all numeric", {
data(mtcars)
expect_identical(to_numeric(mtcars), mtcars)
})


test_that("convert factor to numeric, dummy factors, with NA", {
x1 <- factor(rep(c("a", "b"), 3))
x2 <- factor(c("a", NA_character_, "a", "b", "a", "b"))
Expand Down Expand Up @@ -153,6 +148,20 @@ test_that("convert factor to numeric, dummy factors, with NA", {
expect_identical(nrow(to_numeric(x7, dummy_factors = TRUE)), length(x7))
})

test_that("to_numeric, inverse factor levels", {
f <- c(0, 0, 1, 1, 1, 0)
x1 <- factor(f, levels = c(0, 1))
x2 <- factor(f, levels = c(1, 0))
out <- to_numeric(x1, dummy_factors = FALSE, preserve_levels = FALSE)
expect_identical(out, c(1, 1, 2, 2, 2, 1))
out <- to_numeric(x2, dummy_factors = FALSE, preserve_levels = FALSE)
expect_identical(out, c(2, 2, 1, 1, 1, 2))
out <- to_numeric(x1, dummy_factors = FALSE, preserve_levels = TRUE)
expect_identical(out, c(0, 0, 1, 1, 1, 0))
out <- to_numeric(x2, dummy_factors = FALSE, preserve_levels = TRUE)
expect_identical(out, c(1, 1, 0, 0, 0, 1))
})

# select helpers ------------------------------
test_that("to_numeric regex", {
expect_identical(
Expand Down
Loading