diff --git a/R/utils_labels.R b/R/utils_labels.R index 09ed94692..60199d878 100644 --- a/R/utils_labels.R +++ b/R/utils_labels.R @@ -46,17 +46,17 @@ "Not all factor levels had a matching value label. Non-matching levels were preserved." ) } - # to match factor levels with value labels, we need to swicth names and elements - value_levels <- stats::setNames(names(value_labels), as.character(value_labels)) + # we need to find out which levels have no labelled value + missing_levels <- levels(x)[!levels(x) %in% value_labels] - # find out if we have any non-matching levels - non_match <- is.na(value_levels[levels(x)]) + # and we need to remove those value labels that don't have a matching level + value_labels <- value_labels[value_labels %in% levels(x)] - # if we have non-matching levels, we need to add them to the value labels - value_levels[non_match] <- stats::setNames(levels(x)[non_match], levels(x)[non_match]) + # for levels that have no label, we just keep the original factor level + value_labels <- c(value_labels, setNames(missing_levels, missing_levels)) # now we can add back levels - levels(x) <- value_levels[order(names(value_levels))] + levels(x) <- names(value_labels)[order(value_labels)] attr(x, "labels") <- NULL x diff --git a/tests/testthat/test-labels_to_levels.R b/tests/testthat/test-labels_to_levels.R index 55105acfe..c43a1c011 100644 --- a/tests/testthat/test-labels_to_levels.R +++ b/tests/testthat/test-labels_to_levels.R @@ -59,4 +59,77 @@ test_that("labels_to_levels, factor, with random value numbers (no sequential or out <- to_factor(x, labels_to_levels = TRUE) expect_identical(as.character(out), c("a", "a", "d", "c", "d", "b")) expect_identical(levels(out), c("d", "c", "b", "a")) + + x <- c(5, 5, 1, 3, 1, 7) + attr(x, "labels") <- c(no = 7, yes = 1, maybe = 3, `don't know` = 5) + out <- to_factor(x, labels_to_levels = TRUE) + expect_identical( + out, + structure( + c(3L, 3L, 1L, 2L, 1L, 4L), + levels = c("yes", "maybe", "don't know", "no"), + class = "factor" + ) + ) + expect_identical( + as.character(out), + c("don't know", "don't know", "yes", "maybe", "yes", "no") + ) + + x <- c(5, 5, 1, 3, 1, 7, 4) + attr(x, "labels") <- c(no = 7, yes = 1, maybe = 3, `don't know` = 5) + expect_message( + { + out <- to_factor(x, labels_to_levels = TRUE) + }, + regex = "Not all factor levels" + ) + expect_identical( + out, + structure( + c(4L, 4L, 1L, 2L, 1L, 5L, 3L), + levels = c("yes", "maybe", "4", "don't know", "no"), + class = "factor" + ) + ) + expect_identical( + as.character(out), + c("don't know", "don't know", "yes", "maybe", "yes", "no", "4") + ) + + x <- c(5, 5, 1, 3, 1, 7) + attr(x, "labels") <- c(no = 7, yes = 1, maybe = 4, `don't know` = 5) + expect_message({ + out <- to_factor(x, labels_to_levels = TRUE) + }) + expect_identical( + out, + structure( + c(3L, 3L, 1L, 2L, 1L, 4L), + levels = c("yes", "3", "don't know", "no"), + class = "factor" + ) + ) + expect_identical( + as.character(out), + c("don't know", "don't know", "yes", "3", "yes", "no") + ) + + x <- c(5, 5, 1, 3, 1, 7, 6) + attr(x, "labels") <- c(no = 7, yes = 1, maybe = 4, `don't know` = 5) + expect_message({ + out <- to_factor(x, labels_to_levels = TRUE) + }) + expect_identical( + out, + structure( + c(3L, 3L, 1L, 2L, 1L, 5L, 4L), + levels = c("yes", "3", "don't know", "6", "no"), + class = "factor" + ) + ) + expect_identical( + as.character(out), + c("don't know", "don't know", "yes", "3", "yes", "no", "6") + ) })