Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Need more fixing #457

Merged
merged 12 commits into from
Sep 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 0.8.0.11
Version: 0.8.0.12
Authors@R: c(
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
Expand Down
26 changes: 11 additions & 15 deletions R/utils_labels.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,21 +46,17 @@
"Not all factor levels had a matching value label. Non-matching levels were preserved."
)
}
if (length(value_labels) == length(levels_in_labs)) {
# when length of value_labels and levels_in_labs is identical, we can simply
# replace the levels with the value labels. This makes sure than levels or
# value labels, which are not sorted or not sequentially numbered, match.
# Example:
# x <- c(5, 5, 1, 3, 1, 7)
# attr(x, "labels") <- c(no = 7, yes = 1, maybe = 3, `don't know` = 5)
# to_factor(x, labels_to_levels = TRUE)
levels(x)[levels_in_labs] <- names(value_labels)
} else {
# else, we need to select only those value labels that have a matching level
# (in labs_in_levels). This is required when not all values that have labels
# appear in the data.
levels(x)[levels_in_labs] <- names(value_labels[labs_in_levels])
}
# we need to find out which levels have no labelled value
missing_levels <- levels(x)[!levels(x) %in% value_labels]

# and we need to remove those value labels that don't have a matching level
value_labels <- value_labels[value_labels %in% levels(x)]

# for levels that have no label, we just keep the original factor level
value_labels <- c(value_labels, stats::setNames(missing_levels, missing_levels))

# now we can add back levels
levels(x) <- names(value_labels)[order(as.numeric(value_labels))]
attr(x, "labels") <- NULL

x
Expand Down
73 changes: 73 additions & 0 deletions tests/testthat/test-labels_to_levels.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,77 @@ test_that("labels_to_levels, factor, with random value numbers (no sequential or
out <- to_factor(x, labels_to_levels = TRUE)
expect_identical(as.character(out), c("a", "a", "d", "c", "d", "b"))
expect_identical(levels(out), c("d", "c", "b", "a"))

x <- c(5, 5, 1, 3, 1, 7)
attr(x, "labels") <- c(no = 7, yes = 1, maybe = 3, `don't know` = 5)
out <- to_factor(x, labels_to_levels = TRUE)
expect_identical(
out,
structure(
c(3L, 3L, 1L, 2L, 1L, 4L),
levels = c("yes", "maybe", "don't know", "no"),
class = "factor"
)
)
expect_identical(
as.character(out),
c("don't know", "don't know", "yes", "maybe", "yes", "no")
)

x <- c(5, 5, 1, 3, 1, 7, 4)
attr(x, "labels") <- c(no = 7, yes = 1, maybe = 3, `don't know` = 5)
expect_message(
{
out <- to_factor(x, labels_to_levels = TRUE)
},
regex = "Not all factor levels"
)
expect_identical(
out,
structure(
c(4L, 4L, 1L, 2L, 1L, 5L, 3L),
levels = c("yes", "maybe", "4", "don't know", "no"),
class = "factor"
)
)
expect_identical(
as.character(out),
c("don't know", "don't know", "yes", "maybe", "yes", "no", "4")
)

x <- c(5, 5, 1, 3, 1, 7)
attr(x, "labels") <- c(no = 7, yes = 1, maybe = 4, `don't know` = 5)
expect_message({
out <- to_factor(x, labels_to_levels = TRUE)
})
expect_identical(
out,
structure(
c(3L, 3L, 1L, 2L, 1L, 4L),
levels = c("yes", "3", "don't know", "no"),
class = "factor"
)
)
expect_identical(
as.character(out),
c("don't know", "don't know", "yes", "3", "yes", "no")
)

x <- c(5, 5, 1, 3, 1, 7, 6)
attr(x, "labels") <- c(no = 7, yes = 1, maybe = 4, `don't know` = 5)
expect_message({
out <- to_factor(x, labels_to_levels = TRUE)
})
expect_identical(
out,
structure(
c(3L, 3L, 1L, 2L, 1L, 5L, 4L),
levels = c("yes", "3", "don't know", "6", "no"),
class = "factor"
)
)
expect_identical(
as.character(out),
c("don't know", "don't know", "yes", "3", "yes", "no", "6")
)
})
Loading