Skip to content

Commit

Permalink
Need more fixing (#457)
Browse files Browse the repository at this point in the history
* fix labels_to_levels

* fix

* lintr

* lintr

* add comments

* still not working for all edge cases

* fix

* namespace

* fix

* desc

* styler
  • Loading branch information
strengejacke authored Sep 8, 2023
1 parent fbd4430 commit bcbc115
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 16 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 0.8.0.11
Version: 0.8.0.12
Authors@R: c(
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
Expand Down
26 changes: 11 additions & 15 deletions R/utils_labels.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,21 +46,17 @@
"Not all factor levels had a matching value label. Non-matching levels were preserved."
)
}
if (length(value_labels) == length(levels_in_labs)) {
# when length of value_labels and levels_in_labs is identical, we can simply
# replace the levels with the value labels. This makes sure than levels or
# value labels, which are not sorted or not sequentially numbered, match.
# Example:
# x <- c(5, 5, 1, 3, 1, 7)
# attr(x, "labels") <- c(no = 7, yes = 1, maybe = 3, `don't know` = 5)
# to_factor(x, labels_to_levels = TRUE)
levels(x)[levels_in_labs] <- names(value_labels)
} else {
# else, we need to select only those value labels that have a matching level
# (in labs_in_levels). This is required when not all values that have labels
# appear in the data.
levels(x)[levels_in_labs] <- names(value_labels[labs_in_levels])
}
# we need to find out which levels have no labelled value
missing_levels <- levels(x)[!levels(x) %in% value_labels]

# and we need to remove those value labels that don't have a matching level
value_labels <- value_labels[value_labels %in% levels(x)]

# for levels that have no label, we just keep the original factor level
value_labels <- c(value_labels, stats::setNames(missing_levels, missing_levels))

# now we can add back levels
levels(x) <- names(value_labels)[order(as.numeric(value_labels))]
attr(x, "labels") <- NULL

x
Expand Down
73 changes: 73 additions & 0 deletions tests/testthat/test-labels_to_levels.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,77 @@ test_that("labels_to_levels, factor, with random value numbers (no sequential or
out <- to_factor(x, labels_to_levels = TRUE)
expect_identical(as.character(out), c("a", "a", "d", "c", "d", "b"))
expect_identical(levels(out), c("d", "c", "b", "a"))

x <- c(5, 5, 1, 3, 1, 7)
attr(x, "labels") <- c(no = 7, yes = 1, maybe = 3, `don't know` = 5)
out <- to_factor(x, labels_to_levels = TRUE)
expect_identical(
out,
structure(
c(3L, 3L, 1L, 2L, 1L, 4L),
levels = c("yes", "maybe", "don't know", "no"),
class = "factor"
)
)
expect_identical(
as.character(out),
c("don't know", "don't know", "yes", "maybe", "yes", "no")
)

x <- c(5, 5, 1, 3, 1, 7, 4)
attr(x, "labels") <- c(no = 7, yes = 1, maybe = 3, `don't know` = 5)
expect_message(
{
out <- to_factor(x, labels_to_levels = TRUE)
},
regex = "Not all factor levels"
)
expect_identical(
out,
structure(
c(4L, 4L, 1L, 2L, 1L, 5L, 3L),
levels = c("yes", "maybe", "4", "don't know", "no"),
class = "factor"
)
)
expect_identical(
as.character(out),
c("don't know", "don't know", "yes", "maybe", "yes", "no", "4")
)

x <- c(5, 5, 1, 3, 1, 7)
attr(x, "labels") <- c(no = 7, yes = 1, maybe = 4, `don't know` = 5)
expect_message({
out <- to_factor(x, labels_to_levels = TRUE)
})
expect_identical(
out,
structure(
c(3L, 3L, 1L, 2L, 1L, 4L),
levels = c("yes", "3", "don't know", "no"),
class = "factor"
)
)
expect_identical(
as.character(out),
c("don't know", "don't know", "yes", "3", "yes", "no")
)

x <- c(5, 5, 1, 3, 1, 7, 6)
attr(x, "labels") <- c(no = 7, yes = 1, maybe = 4, `don't know` = 5)
expect_message({
out <- to_factor(x, labels_to_levels = TRUE)
})
expect_identical(
out,
structure(
c(3L, 3L, 1L, 2L, 1L, 5L, 4L),
levels = c("yes", "3", "don't know", "6", "no"),
class = "factor"
)
)
expect_identical(
as.character(out),
c("don't know", "don't know", "yes", "3", "yes", "no", "6")
)
})

0 comments on commit bcbc115

Please sign in to comment.