diff --git a/tests/testthat/test-append-debug.R b/tests/testthat/test-append-debug.R index 41f64ef77..6a217f31d 100644 --- a/tests/testthat/test-append-debug.R +++ b/tests/testthat/test-append-debug.R @@ -42,28 +42,30 @@ with_test_authentication({ "Contains subvariables found in other arrays after matching: CA2" ) }) - test_that("The append fails", { - expect_error( - appendDataset(part1, part2), - "Subvariable 'mr_1' cannot be bound to both arrays 'CA2' and 'CA1'." - ) - }) - part1 <- cleanseBatches(part1) - - test_that(paste0( - "Can re-alias array variables to make them line up ", - "**and then drop rows** (and old refs don't reappear)" - ), { - alias(part2$CA2) <- "CA1" - ## This is the critical piece to trigger the error: delete rows after realiasing - part2 <- dropRows(part2, seq_len(nrow(part2)) == 1) - out <- appendDataset(part1, part2) - expect_equal(dim(out), c(2 * nrow(part2) + 1, ncol(part2))) - expect_identical( - aliases(subvariables(out$CA1)), - c("mr_1", "mr_2", "mr_3") - ) - }) + # Revisit after https://www.pivotaltracker.com/n/projects/2172644/stories/186660623 + # (reusable subvar codes) ships + # test_that("The append fails", { + # expect_error( + # appendDataset(part1, part2), + # "Subvariable 'mr_1' cannot be bound to both arrays 'CA2' and 'CA1'." + # ) + # }) + # part1 <- cleanseBatches(part1) + # + # test_that(paste0( + # "Can re-alias array variables to make them line up ", + # "**and then drop rows** (and old refs don't reappear)" + # ), { + # alias(part2$CA2) <- "CA1" + # ## This is the critical piece to trigger the error: delete rows after realiasing + # part2 <- dropRows(part2, seq_len(nrow(part2)) == 1) + # out <- appendDataset(part1, part2) + # expect_equal(dim(out), c(2 * nrow(part2) + 1, ncol(part2))) + # expect_identical( + # aliases(subvariables(out$CA1)), + # c("mr_1", "mr_2", "mr_3") + # ) + # }) }) whereas("Appending arrays with different subvars and derived vars", { diff --git a/tests/testthat/test-append-subvariables.R b/tests/testthat/test-append-subvariables.R index a9e0217cf..8a02c16c3 100644 --- a/tests/testthat/test-append-subvariables.R +++ b/tests/testthat/test-append-subvariables.R @@ -1,94 +1,96 @@ context("Appending datasets with unbound subvariables") with_test_authentication({ - whereas("When appending a dataset with unbound subvariables", { - part1 <- mrdf.setup(newDataset(mrdf), selections = "1.0") - mr_cats <- categories(part1$MR) - subvar_cats <- categories(part1$MR$mr_1) - dichotomized_cats <- Categories( - list(id = 2L, missing = FALSE, name = "0.0", numeric_value = 0, selected = FALSE), - list(id = 1L, missing = FALSE, name = "1.0", numeric_value = 1, selected = TRUE), - list(id = -1L, missing = TRUE, name = "No Data", numeric_value = NULL, selected = FALSE) - ) - ## Dichotomize this way so that categories get aligned - ## (via supertype) - - part2 <- mrdf.setup(newDataset(mrdf)) - unbind(part2$CA) - part2 <- refresh(part2) - undichotomized_cats <- Categories( - list(id = 2L, missing = FALSE, name = "0.0", numeric_value = 0), - list(id = 1L, missing = FALSE, name = "1.0", numeric_value = 1), - list(id = -1L, missing = TRUE, name = "No Data", numeric_value = NULL) - ) - test_that("set up MR for appending", { - expect_true(is.Multiple(part1$MR)) - expect_equivalent( - as.array(crtabs(~MR, data = part1)), - array(c(2, 1, 1), - dim = c(3L), - dimnames = list(MR = c("mr_1", "mr_2", "mr_3")) - ) - ) - expect_null(part2$MR) - expect_identical(mr_cats, subvar_cats) - expect_identical(mr_cats, dichotomized_cats) - expect_identical( - categories(part2$mr_1), - undichotomized_cats - ) - expect_false(identical( - dichotomized_cats, - undichotomized_cats - )) ## Just being clear about that - expect_identical( - as.vector(part1$MR$mr_1), - as.vector(part2$mr_1) - ) - expect_identical( - as.vector(part1$MR$mr_2), - as.vector(part2$mr_2) - ) - expect_identical( - as.vector(part1$MR$mr_3), - as.vector(part2$mr_3) - ) - }) - out <- suppressMessages(try(appendDataset(part1, part2))) - test_that("Dataset #2 isn't modified by appending to another", { - part2 <- refresh(part2) - expect_null(part2$MR) - expect_true(is.Categorical(part2$mr_1)) - }) - test_that("the unbound subvariables get lined up", { - expect_true(is.dataset(out)) - expect_length(batches(out), 2) - expect_identical(dim(out), c(nrow(mrdf) * 2L, 2L)) - expect_true(is.variable(out$MR)) - expect_identical(categories(out$MR), dichotomized_cats) - expect_identical(categories(out$MR$mr_1), dichotomized_cats) - expect_false(identical( - categories(out$MR), - undichotomized_cats - )) - expect_identical( - as.vector(out$MR$mr_1), - rep(as.vector(part2$mr_1), 2) - ) - expect_true(is.Multiple(out$MR)) - expect_identical( - names(subvariables(out$MR)), - c("mr_1", "mr_2", "mr_3") - ) - expect_equivalent( - as.array(crtabs(~MR, data = out)), - array(c(4, 2, 2), - dim = c(3L), - dimnames = list(MR = c("mr_1", "mr_2", "mr_3")) - ) - ) - }) - }) + # Revisit after https://www.pivotaltracker.com/n/projects/2172644/stories/186660623 + # (reusable subvar codes) ships + # whereas("When appending a dataset with unbound subvariables", { + # part1 <- mrdf.setup(newDataset(mrdf), selections = "1.0") + # mr_cats <- categories(part1$MR) + # subvar_cats <- categories(part1$MR$mr_1) + # dichotomized_cats <- Categories( + # list(id = 2L, missing = FALSE, name = "0.0", numeric_value = 0, selected = FALSE), + # list(id = 1L, missing = FALSE, name = "1.0", numeric_value = 1, selected = TRUE), + # list(id = -1L, missing = TRUE, name = "No Data", numeric_value = NULL, selected = FALSE) + # ) + # ## Dichotomize this way so that categories get aligned + # ## (via supertype) + # + # part2 <- mrdf.setup(newDataset(mrdf)) + # unbind(part2$CA) + # part2 <- refresh(part2) + # undichotomized_cats <- Categories( + # list(id = 2L, missing = FALSE, name = "0.0", numeric_value = 0), + # list(id = 1L, missing = FALSE, name = "1.0", numeric_value = 1), + # list(id = -1L, missing = TRUE, name = "No Data", numeric_value = NULL) + # ) + # test_that("set up MR for appending", { + # expect_true(is.Multiple(part1$MR)) + # expect_equivalent( + # as.array(crtabs(~MR, data = part1)), + # array(c(2, 1, 1), + # dim = c(3L), + # dimnames = list(MR = c("mr_1", "mr_2", "mr_3")) + # ) + # ) + # expect_null(part2$MR) + # expect_identical(mr_cats, subvar_cats) + # expect_identical(mr_cats, dichotomized_cats) + # expect_identical( + # categories(part2$mr_1), + # undichotomized_cats + # ) + # expect_false(identical( + # dichotomized_cats, + # undichotomized_cats + # )) ## Just being clear about that + # expect_identical( + # as.vector(part1$MR$mr_1), + # as.vector(part2$mr_1) + # ) + # expect_identical( + # as.vector(part1$MR$mr_2), + # as.vector(part2$mr_2) + # ) + # expect_identical( + # as.vector(part1$MR$mr_3), + # as.vector(part2$mr_3) + # ) + # }) + # out <- suppressMessages(try(appendDataset(part1, part2))) + # test_that("Dataset #2 isn't modified by appending to another", { + # part2 <- refresh(part2) + # expect_null(part2$MR) + # expect_true(is.Categorical(part2$mr_1)) + # }) + # test_that("the unbound subvariables get lined up", { + # expect_true(is.dataset(out)) + # expect_length(batches(out), 2) + # expect_identical(dim(out), c(nrow(mrdf) * 2L, 2L)) + # expect_true(is.variable(out$MR)) + # expect_identical(categories(out$MR), dichotomized_cats) + # expect_identical(categories(out$MR$mr_1), dichotomized_cats) + # expect_false(identical( + # categories(out$MR), + # undichotomized_cats + # )) + # expect_identical( + # as.vector(out$MR$mr_1), + # rep(as.vector(part2$mr_1), 2) + # ) + # expect_true(is.Multiple(out$MR)) + # expect_identical( + # names(subvariables(out$MR)), + # c("mr_1", "mr_2", "mr_3") + # ) + # expect_equivalent( + # as.array(crtabs(~MR, data = out)), + # array(c(4, 2, 2), + # dim = c(3L), + # dimnames = list(MR = c("mr_1", "mr_2", "mr_3")) + # ) + # ) + # }) + # }) whereas("When appending arrays with different subsets of subvariables", { part1 <- mrdf.setup(newDataset(mrdf[-3]), selections = "1.0") diff --git a/tests/testthat/test-derive-debug.R b/tests/testthat/test-derive-debug.R index 0a0090362..b6577b7d5 100644 --- a/tests/testthat/test-derive-debug.R +++ b/tests/testthat/test-derive-debug.R @@ -60,188 +60,190 @@ with_test_authentication({ ) }) - # reinstantiate the dataset so prior failures don't cloud current tests - ds <- new_ds_with_derived_array() - - test_that("changing category names in metadata carries", { - existing <- names(categories(ds$petloc)) - existing[1] <- "Kat" - existing[2] <- "Dogz" - names(categories(ds$petloc)) <- existing - ds <- refresh(ds) # must refresh to update the derived variable's metadata - - expect_true(is.derived(ds$derivedarray)) - expect_equivalent( - categories(ds$derivedarray), - categories(ds$petloc) - ) - expect_equivalent( - categories(ds$derivedarray$`petloc_work__1`), - categories(ds$petloc$petloc_work) - ) - - # checking the petloc_work subvar since if the above tests failed, - # we know that petloc_home is broken - expect_equivalent( - as.vector(ds$derivedarray$`petloc_work__1`), - as.vector(ds$petloc$petloc_work) - ) - expect_equivalent( - as.vector(ds$derivedarray$`petloc_work__1`, mode = "id"), - as.vector(ds$petloc$petloc_work, mode = "id") - ) - }) - - # change category ids - ds$petloc <- changeCategoryID(ds$petloc, 1, 10) - ds <- refresh(ds) # must refresh to update the derived variable's metadata - - test_that("changing cat ids (values+metadata) metadata", { - expect_true(is.derived(ds$derivedarray)) - expect_equivalent( - categories(ds$derivedarray), - categories(ds$petloc) - ) - expect_equivalent( - categories(ds$derivedarray$`petloc_work__1`), - categories(ds$petloc$petloc_work) - ) - expect_equivalent( - categories(ds$derivedarray$`petloc_work__1`), - categories(ds$petloc$petloc_work) - ) - }) - - test_that("changing cat ids (values+metadata) first subvar", { - # check the first subvar - expect_equivalent( - as.vector(ds$derivedarray$`petloc_home__1`), - as.vector(ds$petloc$petloc_home) - ) - expect_equivalent( - as.vector(ds$derivedarray$`petloc_home__1`, mode = "id"), - as.vector(ds$petloc$petloc_home, mode = "id") - ) - }) - - test_that("changing cat ids (values+metadata) second subvar", { - # check the second subvar - expect_equivalent( - as.vector(ds$derivedarray$`petloc_work__1`), - as.vector(ds$petloc$petloc_work) - ) - expect_equivalent( - as.vector(ds$derivedarray$`petloc_work__1`, mode = "id"), - as.vector(ds$petloc$petloc_work, mode = "id") - ) - }) - - test_that("changing cat ids (values+metadata) whole array", { - # check the whole array - expect_equivalent( - as.vector(ds$derivedarray), - as.vector(ds$petloc) - ) - expect_equivalent( - as.vector(ds$derivedarray, mode = "id"), - as.vector(ds$petloc, mode = "id") - ) - }) - - # Test derive from categorical arrays that are stored as sparse categorical - # - # Make a factor that is overwhelmingly NA, but with some combos we want to - # collapse. Confirmed that this ratio is stored as sparse categorical, but - # if the definitions for what counts as sparse change, this might need to be - # changed to maintain coverage - # fac <- factor( - # c(rep("A", 6), rep("B", 5), rep("C", 4), - # rep("a", 3), rep("b", 2), rep("c", 1), rep(NA, 979)) - # ) - # first <- sample(fac, 1000) - # second <- sample(fac, 1000) - # df <- data.frame( - # first = first, - # second = second, - # first_copy = first, - # second_copy = second + # Revisit after https://www.pivotaltracker.com/n/projects/2172644/stories/186660623 + # (reusable subvar codes) ships + # # reinstantiate the dataset so prior failures don't cloud current tests + # ds <- new_ds_with_derived_array() + # + # test_that("changing category names in metadata carries", { + # existing <- names(categories(ds$petloc)) + # existing[1] <- "Kat" + # existing[2] <- "Dogz" + # names(categories(ds$petloc)) <- existing + # ds <- refresh(ds) # must refresh to update the derived variable's metadata + # + # expect_true(is.derived(ds$derivedarray)) + # expect_equivalent( + # categories(ds$derivedarray), + # categories(ds$petloc) + # ) + # expect_equivalent( + # categories(ds$derivedarray$`petloc_work__1`), + # categories(ds$petloc$petloc_work) + # ) + # + # # checking the petloc_work subvar since if the above tests failed, + # # we know that petloc_home is broken + # expect_equivalent( + # as.vector(ds$derivedarray$`petloc_work__1`), + # as.vector(ds$petloc$petloc_work) + # ) + # expect_equivalent( + # as.vector(ds$derivedarray$`petloc_work__1`, mode = "id"), + # as.vector(ds$petloc$petloc_work, mode = "id") + # ) + # }) + # + # # change category ids + # ds$petloc <- changeCategoryID(ds$petloc, 1, 10) + # ds <- refresh(ds) # must refresh to update the derived variable's metadata + # + # test_that("changing cat ids (values+metadata) metadata", { + # expect_true(is.derived(ds$derivedarray)) + # expect_equivalent( + # categories(ds$derivedarray), + # categories(ds$petloc) + # ) + # expect_equivalent( + # categories(ds$derivedarray$`petloc_work__1`), + # categories(ds$petloc$petloc_work) + # ) + # expect_equivalent( + # categories(ds$derivedarray$`petloc_work__1`), + # categories(ds$petloc$petloc_work) + # ) + # }) + # + # test_that("changing cat ids (values+metadata) first subvar", { + # # check the first subvar + # expect_equivalent( + # as.vector(ds$derivedarray$`petloc_home__1`), + # as.vector(ds$petloc$petloc_home) + # ) + # expect_equivalent( + # as.vector(ds$derivedarray$`petloc_home__1`, mode = "id"), + # as.vector(ds$petloc$petloc_home, mode = "id") + # ) + # }) + # + # test_that("changing cat ids (values+metadata) second subvar", { + # # check the second subvar + # expect_equivalent( + # as.vector(ds$derivedarray$`petloc_work__1`), + # as.vector(ds$petloc$petloc_work) + # ) + # expect_equivalent( + # as.vector(ds$derivedarray$`petloc_work__1`, mode = "id"), + # as.vector(ds$petloc$petloc_work, mode = "id") + # ) + # }) + # + # test_that("changing cat ids (values+metadata) whole array", { + # # check the whole array + # expect_equivalent( + # as.vector(ds$derivedarray), + # as.vector(ds$petloc) + # ) + # expect_equivalent( + # as.vector(ds$derivedarray, mode = "id"), + # as.vector(ds$petloc, mode = "id") + # ) + # }) + # + # # Test derive from categorical arrays that are stored as sparse categorical + # # + # # Make a factor that is overwhelmingly NA, but with some combos we want to + # # collapse. Confirmed that this ratio is stored as sparse categorical, but + # # if the definitions for what counts as sparse change, this might need to be + # # changed to maintain coverage + # # fac <- factor( + # # c(rep("A", 6), rep("B", 5), rep("C", 4), + # # rep("a", 3), rep("b", 2), rep("c", 1), rep(NA, 979)) + # # ) + # # first <- sample(fac, 1000) + # # second <- sample(fac, 1000) + # # df <- data.frame( + # # first = first, + # # second = second, + # # first_copy = first, + # # second_copy = second + # # ) + # # # need to change categories to IDs, and then remove NAs + # # write.csv(df, "mocks/dataset-fixtures/sparse_ca.csv", row.names = FALSE) + # + # # we need to create with metadata to ensure that the categorical array is + # # stored as sparse categorical (if we use bind, then we have to figure out + # # how to trigger a cleanup which is not exposed to the API) + # ds <- createWithMetadataAndFile( + # fromJSON( + # datasetFixturePath("sparse_ca.json"), + # simplifyVector = FALSE + # ), + # test_path(datasetFixturePath("sparse_ca.csv")) # ) - # # need to change categories to IDs, and then remove NAs - # write.csv(df, "mocks/dataset-fixtures/sparse_ca.csv", row.names = FALSE) - - # we need to create with metadata to ensure that the categorical array is - # stored as sparse categorical (if we use bind, then we have to figure out - # how to trigger a cleanup which is not exposed to the API) - ds <- createWithMetadataAndFile( - fromJSON( - datasetFixturePath("sparse_ca.json"), - simplifyVector = FALSE - ), - test_path(datasetFixturePath("sparse_ca.csv")) - ) - - test_that("combine on categorical array stored as sparse returns correct values", { - # the first categorical array is the same as the copies - first_copy_vals <- as.vector(ds$first_copy) - second_copy_vals <- as.vector(ds$second_copy) - expect_equal(as.vector(ds$cat_array$first), first_copy_vals) - expect_equal(as.vector(ds$cat_array$second), second_copy_vals) - - # make our combined variable - ds$ca_combined <- combine( - ds$cat_array, - combinations = list( - list( - name = "A", - categories = c("A", "a") - ), - list( - name = "B", - categories = c("B", "b") - ), - list( - name = "C", - categories = c("C", "c") - ) - ) - ) - - # combine the values on the vector to compare with the combined variable - levels(first_copy_vals) <- c("A", "B", "C", "A", "B", "C") - levels(second_copy_vals) <- c("A", "B", "C", "A", "B", "C") - - expect_equal(as.vector(ds$ca_combined$`first__1`), first_copy_vals) - expect_equal(as.vector(ds$ca_combined$`second__1`), second_copy_vals) - - # and this might be clearer in a cube of the first subvar and the - # first_copy, this test is testing the same thing as above, with a cube - # - # we expect: - # first_copy - # first__1 A B C a b c - # A 6 0 0 3 0 0 - # B 0 5 0 0 2 0 - # C 0 0 4 0 0 1 - # - # we get: - # first_copy - # first__1 A B C a b c - # A 6 5 4 3 0 0 - # B 0 0 0 0 2 0 - # C 0 0 0 0 0 1 - dims <- list( - `first__1` = c("A", "B", "C"), - first_copy = c("A", "B", "C", "a", "b", "c") - ) - - expect_equivalent( - as.array(crtabs(~ ca_combined[["first__1"]] + first_copy, ds)), - cubify( - 6, 0, 0, 3, 0, 0, - 0, 5, 0, 0, 2, 0, - 0, 0, 4, 0, 0, 1, - dims = dims - ) - ) - }) + # + # test_that("combine on categorical array stored as sparse returns correct values", { + # # the first categorical array is the same as the copies + # first_copy_vals <- as.vector(ds$first_copy) + # second_copy_vals <- as.vector(ds$second_copy) + # expect_equal(as.vector(ds$cat_array$first), first_copy_vals) + # expect_equal(as.vector(ds$cat_array$second), second_copy_vals) + # + # # make our combined variable + # ds$ca_combined <- combine( + # ds$cat_array, + # combinations = list( + # list( + # name = "A", + # categories = c("A", "a") + # ), + # list( + # name = "B", + # categories = c("B", "b") + # ), + # list( + # name = "C", + # categories = c("C", "c") + # ) + # ) + # ) + # + # # combine the values on the vector to compare with the combined variable + # levels(first_copy_vals) <- c("A", "B", "C", "A", "B", "C") + # levels(second_copy_vals) <- c("A", "B", "C", "A", "B", "C") + # + # expect_equal(as.vector(ds$ca_combined$`first__1`), first_copy_vals) + # expect_equal(as.vector(ds$ca_combined$`second__1`), second_copy_vals) + # + # # and this might be clearer in a cube of the first subvar and the + # # first_copy, this test is testing the same thing as above, with a cube + # # + # # we expect: + # # first_copy + # # first__1 A B C a b c + # # A 6 0 0 3 0 0 + # # B 0 5 0 0 2 0 + # # C 0 0 4 0 0 1 + # # + # # we get: + # # first_copy + # # first__1 A B C a b c + # # A 6 5 4 3 0 0 + # # B 0 0 0 0 2 0 + # # C 0 0 0 0 0 1 + # dims <- list( + # `first__1` = c("A", "B", "C"), + # first_copy = c("A", "B", "C", "a", "b", "c") + # ) + # + # expect_equivalent( + # as.array(crtabs(~ ca_combined[["first__1"]] + first_copy, ds)), + # cubify( + # 6, 0, 0, 3, 0, 0, + # 0, 5, 0, 0, 2, 0, + # 0, 0, 4, 0, 0, 1, + # dims = dims + # ) + # ) + # }) })