diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index cb01cbd3..fd021c07 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -64,6 +64,14 @@ jobs: reticulate::py_install(c("anndata", "scanpy", "dummy-anndata"), pip = TRUE) shell: Rscript {0} + - name: Install h5diff + run: | + if [ "$RUNNER_OS" == "Linux" ]; then + sudo apt-get install hdf5-tools + fi + shell: bash + + - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true diff --git a/DESCRIPTION b/DESCRIPTION index fb6a1862..e4d31613 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -83,7 +83,8 @@ Suggests: testthat (>= 3.0.0), vctrs, withr, - yaml + yaml, + processx VignetteBuilder: knitr Config/Needs/website: pkgdown, tibble, knitr, rprojroot, stringr, readr, diff --git a/R/generate_matrix.R b/R/generate_matrix.R index 592e2e36..078b11d0 100644 --- a/R/generate_matrix.R +++ b/R/generate_matrix.R @@ -1,73 +1,82 @@ # nolint start +generate_numeric_matrix <- function(n_obs, n_vars, NAs = FALSE) { + # byrow = TRUE to mimic the way a matrix gets filled in Python + m <- matrix(seq(0.5, n_obs * n_vars), nrow = n_obs, ncol = n_vars, byrow = TRUE) + if (NAs) { + m[1, 1] <- NA_real_ + } + m +} + +generate_integer_matrix <- function(n_obs, n_vars, NAs = FALSE) { + # byrow = TRUE to mimic the way a matrix gets filled in Python + m <- matrix(seq(0L, n_obs * n_vars - 1), nrow = n_obs, ncol = n_vars, byrow = TRUE) + if (NAs) { + m[1, 1] <- NA_integer_ + } + m +} + matrix_generators <- list( numeric_matrix = function(n_obs, n_vars) { - matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars) + generate_numeric_matrix(n_obs, n_vars) }, numeric_dense = function(n_obs, n_vars) { - m <- matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars) + m <- generate_numeric_matrix(n_obs, n_vars) as(m, "denseMatrix") }, numeric_csparse = function(n_obs, n_vars) { - m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) + m <- generate_numeric_matrix(n_obs, n_vars) as(m, "CsparseMatrix") }, numeric_rsparse = function(n_obs, n_vars) { - m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) + m <- generate_numeric_matrix(n_obs, n_vars) as(m, "RsparseMatrix") }, numeric_matrix_with_nas = function(n_obs, n_vars) { - m <- matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars) - m[seq(1, n_obs * n_vars, by = 2)] <- NA_real_ - m + generate_numeric_matrix(n_obs, n_vars, NAs = TRUE) }, numeric_dense_with_nas = function(n_obs, n_vars) { - m <- matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars) - m[seq(1, n_obs * n_vars, by = 2)] <- NA_real_ + m <- generate_numeric_matrix(n_obs, n_vars, NAs = TRUE) as(m, "denseMatrix") }, numeric_csparse_with_nas = function(n_obs, n_vars) { - m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) - m[seq(1, n_obs * n_vars, by = 2)] <- NA_real_ + m <- generate_numeric_matrix(n_obs, n_vars, NAs = TRUE) as(m, "CsparseMatrix") }, numeric_rsparse_with_nas = function(n_obs, n_vars) { - m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) - m[seq(1, n_obs * n_vars, by = 2)] <- NA_real_ + m <- generate_numeric_matrix(n_obs, n_vars, NAs = TRUE) as(m, "RsparseMatrix") }, integer_matrix = function(n_obs, n_vars) { - matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars) + generate_integer_matrix(n_obs, n_vars) }, integer_dense = function(n_obs, n_vars) { - m <- matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars) + m <- generate_integer_matrix(n_obs, n_vars) as(m, "denseMatrix") }, integer_csparse = function(n_obs, n_vars) { - m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) + m <- generate_integer_matrix(n_obs, n_vars) as(m, "CsparseMatrix") }, integer_rsparse = function(n_obs, n_vars) { - m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) + m <- generate_integer_matrix(n_obs, n_vars) as(m, "RsparseMatrix") }, integer_matrix_with_nas = function(n_obs, n_vars) { - m <- matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars) - m[seq(1, n_obs * n_vars, by = 2)] <- NA_integer_ + m <- generate_integer_matrix(n_obs, n_vars, NAs = TRUE) m }, integer_dense_with_nas = function(n_obs, n_vars) { - m <- matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars) - m[seq(1, n_obs * n_vars, by = 2)] <- NA_integer_ + m <- generate_integer_matrix(n_obs, n_vars, NAs = TRUE) as(m, "denseMatrix") }, integer_csparse_with_nas = function(n_obs, n_vars) { - m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) - m[seq(1, n_obs * n_vars, by = 2)] <- NA_integer_ + m <- generate_integer_matrix(n_obs, n_vars, NAs = TRUE) as(m, "CsparseMatrix") }, integer_rsparse_with_nas = function(n_obs, n_vars) { - m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) - m[seq(1, n_obs * n_vars, by = 2)] <- NA_integer_ + m <- generate_integer_matrix(n_obs, n_vars, NAs = TRUE) as(m, "RsparseMatrix") } ) diff --git a/R/generate_vector.R b/R/generate_vector.R index f10b6f8d..978005d2 100644 --- a/R/generate_vector.R +++ b/R/generate_vector.R @@ -1,28 +1,28 @@ vector_generators <- list( - character = function(n) paste0("value", seq_len(n)), - integer = function(n) seq_len(n), - factor = function(n) factor(paste0("value", seq_len(n))), - factor_ordered = function(n) factor(paste0("value", seq_len(n)), ordered = TRUE), + character = function(n) paste0("value_", seq(from = 0, to = n - 1)), + integer = function(n) seq(from = 0, to = n - 1), + factor = function(n) factor(rep(c("Value1", "Value2"), length.out = n)), + factor_ordered = function(n) factor(rep(c("Value1", "Value2"), length.out = n), ordered = TRUE), logical = function(n) sample(c(TRUE, FALSE), n, replace = TRUE), - numeric = function(n) runif(n), + numeric = function(n) seq(from = 0.5, to = n), character_with_nas = function(n) { x <- paste0("value", seq_len(n)) x[seq(1, n, by = 2)] <- NA_character_ x }, integer_with_nas = function(n) { - x <- seq_len(n) - x[seq(1, n, by = 2)] <- NA_integer_ + x <- seq(from = 0, to = n - 1) + x[1] <- NA_integer_ x }, factor_with_nas = function(n) { - x <- factor(paste0("value", seq_len(n))) - x[seq(1, n, by = 2)] <- NA_character_ + x <- factor(rep(c("Value1", "Value2"), length.out = n)) + x[1] <- NA_character_ x }, factor_ordered_with_nas = function(n) { - x <- factor(paste0("value", seq_len(n)), ordered = TRUE) - x[seq(1, n, by = 2)] <- NA_character_ + x <- factor(rep(c("Value1", "Value2"), length.out = n), ordered = TRUE) + x[1] <- NA_character_ x }, logical_with_nas = function(n) { diff --git a/inst/known_issues.yaml b/inst/known_issues.yaml index 0d7c6e8a..74640f24 100644 --- a/inst/known_issues.yaml +++ b/inst/known_issues.yaml @@ -72,3 +72,145 @@ known_issues: proposed_solution: The input checking function for obsm and varm should allow the object to be a vector of the correct length instead of only a matrix or a data frame. to_investigate: True to_fix: True + - backend: HDF5AnnData + slot: + - X + dtype: + - float_csparse + - float_csparse_nas + process: [h5diff] + error_message: | + Warning: different storage datatype + has file datatype H5T_STD_I64LE + has file datatype H5T_STD_I32LE + attribute: > and > + description: hdf5py writes the attribute as a H5T_STD_I64LE, hdf5r writes it as H5T_STD_I32LE. + proposed_solution: We should investigate if we can specify the type with which an attribute should be written. + to_investigate: True + to_fix: True + - backend: HDF5AnnData + slot: + - X + - obsm + - varm + - obsp + - varp + dtype: + - float_csparse + - float_csparse_nas + - float_rsparse + - float_rsparse_nas + process: [h5diff] + error_message: | + dataset: and + Not comparable: has rank 1, dimensions [200], max dimensions [18446744073709551615] + and has rank 1, dimensions [108], max dimensions [108] + 0 differences found + dataset: and + Not comparable: has rank 1, dimensions [200], max dimensions [18446744073709551615] + and has rank 1, dimensions [108], max dimensions [108] + 0 differences found + dataset: and + Warning: different maximum dimensions + has max dimensions [18446744073709551615] + has max dimensions [21] + description: hdf5py has max dimensions as 2^64 - 1, the max val for an unsigned int. hdf5r has it as the actual value + proposed_solution: We should investigate if something goes wrong with h5py, but I think hdf5 provides the expected behaviour. + to_investigate: True + to_fix: False + - backend: HDF5AnnData + slot: + - obs + - var + dtype: + - integer_with_nas + process: [h5diff] + error_message: | + dataset: and + Warning: different storage datatype + Not comparable: has sign H5T_SGN_2 and has sign H5T_SGN_NONE + 0 differences found + description: hdf5py writes a nullable integer array with type H5T_SGN_2, hdf5r writes with type H5T_SGN_NONE + proposed_solution: We should investigate if we can specify the type with which an attribute should be written. + to_investigate: True + to_fix: True + - backend: HDF5AnnData + slot: + - obs + - var + dtype: + - nullable_integer_array + process: [h5diff] + error_message: | + dataset: and + Warning: different storage datatype + has file datatype H5T_STD_I64LE + has file datatype H5T_STD_I32LE + size: [20] [20] + position values values difference + ------------------------------------------------------------ + [ 0 ] 0 1 1 + 1 differences found + description: hdf5py writes a nullable integer array with type H5T_STD_I64LE, hdf5r writes with type H5T_STD_I32LE + proposed_solution: We should investigate if we can specify the type with which an attribute should be written. + to_investigate: True + to_fix: True + - backend: HDF5AnnData + slot: + - obs + - var + dtype: + - nullable_integer_array + process: [h5diff] + error_message: | + dataset: and + Warning: different storage datatype + has file datatype H5T_STD_I64LE + has file datatype H5T_STD_I32LE + size: [20] [20] + position values values difference + ------------------------------------------------------------ + [ 0 ] 0 1 1 + 1 differences found + description: On position 0, hdf5py writes a 0 in the values array, hdf5r writes a 1. + proposed_solution: We should investigate why this difference happens. + to_investigate: True + to_fix: True + - backend: HDF5AnnData + slot: + - X + - obsm + - varm + - layers + - obsp + - varp + dtype: + - numeric_dense + - numeric_dense_with_nas + - integer_dense + process: [h5diff] + error_message: | + Error in `H5File.open(filename, mode, file_create_pl, file_access_pl)`: HDF5-API Errors: + error #000: ../../../src/H5F.c in H5Fcreate(): line 349: unable to create file + class: HDF5 + major: File accessibility + minor: Unable to open file + + error #001: ../../../src/H5Fint.c in H5F_open(): line 1725: unable to open file + class: HDF5 + major: File accessibility + minor: Unable to open file + + error #002: ../../../src/H5FD.c in H5FD_open(): line 722: open failed + class: HDF5 + major: Virtual File Layer + minor: Unable to initialize object + + error #003: ../../../src/H5FDsec2.c in H5FD__sec2_open(): line 351: unable to open file: name = '/tmp/RtmpN29Fmn/anndata_r2_integer_matrixbe0976b43e39b.h5ad', errno = 17, error message = 'File exists', flags = 15, o_flags = c2 + class: HDF5 + major: File accessibility + minor: Unable to open file + description: Issue is related to [issue \#198](https://github.com/scverse/anndataR/issues/198). + proposed_solution: Fix writing dgeMatrix objects. + to_investigate: True + to_fix: True diff --git a/tests/testthat/helper-py-R-equivalences.R b/tests/testthat/helper-py-R-equivalences.R new file mode 100644 index 00000000..d7986070 --- /dev/null +++ b/tests/testthat/helper-py-R-equivalences.R @@ -0,0 +1,65 @@ +matrix_equivalences <- list( + c("float_matrix", "numeric_matrix"), + c("float_matrix", "numeric_dense"), + c("float_matrix_nas", "numeric_matrix_with_nas"), + c("float_matrix_nas", "numeric_dense_with_nas"), + c("integer_matrix", "integer_matrix"), + c("integer_matrix", "integer_dense"), + c("float_csparse", "numeric_csparse"), + c("float_csparse_nas", "numeric_csparse_with_nas"), + c("float_rsparse", "numeric_rsparse"), + c("float_rsparse_nas", "numeric_rsparse_with_nas") +) + +# python, R +vector_equivalences <- list( + c("categorical", "factor"), + c("categorical_ordered", "factor_ordered"), + c("categorical_missing_values", "factor_with_nas"), + c("categorical_ordered_missing_values", "factor_ordered_with_nas"), + c("string_array", "character"), + c("dense_array", "numeric"), + c("integer_array", "integer"), + c("boolean_array", "logical"), + c("nullable_integer_array", "integer_with_nas"), + c("nullable_boolean_array", "logical_with_nas") +) + +all_equivalences <- c(matrix_equivalences, vector_equivalences) + +check_arg <- function(args, name, falseval) { + if (name %in% names(args)) { + args[[name]][[1]] + } else { + falseval + } +} + +r_generate_dataset <- function(n_obs, n_vars, write = FALSE, ...) { + args <- list(...) + + data <- generate_dataset(n_obs, n_vars, + x_type = check_arg(args, "x_type", "numeric_matrix"), + layer_types = check_arg(args, "layer_types", character()), + obs_types = ifelse("obs_types" %in% names(args), args$obs_types, "integer"), + var_types = ifelse("var_types" %in% names(args), args$var_types, "integer"), + obsm_types = check_arg(args, "obsm_types", character()), + varm_types = check_arg(args, "varm_types", character()), + obsp_types = check_arg(args, "obsp_types", character()), + varp_types = check_arg(args, "varp_types", character()), + uns_types = check_arg(args, "uns_types", character()), + format = "AnnData") + if (write) { + r_write_dataset(data) + } + + data +} + +r_write_dataset <- function(dataset, file = NULL) { + if (is.null(file)) { + file <- tempfile(pattern = "hdf5_write_R_", fileext = ".h5ad") + } + write_h5ad(dataset, file) + file +} diff --git a/tests/testthat/helper-skip_if_no_h5diff.R b/tests/testthat/helper-skip_if_no_h5diff.R new file mode 100644 index 00000000..c9fd995a --- /dev/null +++ b/tests/testthat/helper-skip_if_no_h5diff.R @@ -0,0 +1,7 @@ +# helper function to skip tests if h5diff is not available +skip_if_no_h5diff <- function() { + testthat::skip_if_not({ + s <- system2(command = "which", args = "h5diff", stdout = TRUE, stderr = TRUE) + is.null(attr(s, "status")) + }, message = "h5diff not available for testing") +} diff --git a/tests/testthat/test-roundtrip-X.R b/tests/testthat/test-roundtrip-X.R index 8aeb420b..b42750eb 100644 --- a/tests/testthat/test-roundtrip-X.R +++ b/tests/testthat/test-roundtrip-X.R @@ -33,6 +33,7 @@ for (name in test_names) { # create a couple of paths file_py <- withr::local_file(tempfile(paste0("anndata_py_", name), fileext = ".h5ad")) file_r <- withr::local_file(tempfile(paste0("anndata_r_", name), fileext = ".h5ad")) + file_r2 <- withr::local_file(tempfile(paste0("anndata_r2_", name), fileext = ".h5ad")) # write to file adata_py$write_h5ad(file_py) @@ -101,4 +102,36 @@ for (name in test_names) { adata_py$X ) }) + + skip_if_no_h5diff() + # Get all R datatypes that are equivalent to the python datatype (name) + res <- Filter(function(x) x[[1]] == name, matrix_equivalences) + r_datatypes <- sapply(res, function(x) x[[2]]) + + for (r_name in r_datatypes) { + test_msg <- paste0("Comparing a python generated .h5ad with X '", name, + "' with an R generated .h5ad '", r_name, "' works") + test_that(test_msg, { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("X"), + dtype = c(name, r_name), + process = c("h5diff"), + known_issues = known_issues + ) + skip_if(!is.null(msg), message = msg) + + # generate an R h5ad + adata_r <- r_generate_dataset(10L, 20L, x_type = list(r_name)) + write_h5ad(adata_r, file_r2) + + # run h5diff + res <- processx::run("h5diff", + c("-v", file_py, file_r2, "/X"), + error_on_status = FALSE) + + expect_equal(res$status, 0, info = res$stdout) + + }) + } } diff --git a/tests/testthat/test-roundtrip-layers.R b/tests/testthat/test-roundtrip-layers.R index 9ba0aa25..4b8b311c 100644 --- a/tests/testthat/test-roundtrip-layers.R +++ b/tests/testthat/test-roundtrip-layers.R @@ -33,6 +33,7 @@ for (name in test_names) { # create a couple of paths file_py <- withr::local_file(tempfile(paste0("anndata_py_", name), fileext = ".h5ad")) file_r <- withr::local_file(tempfile(paste0("anndata_r_", name), fileext = ".h5ad")) + file_r2 <- withr::local_file(tempfile(paste0("anndata_r2_", name), fileext = ".h5ad")) # write to file adata_py$write_h5ad(file_py) @@ -105,4 +106,37 @@ for (name in test_names) { py_get_item(adata_py$layers, name) ) }) + + skip_if_no_h5diff() + # Get all R datatypes that are equivalent to the python datatype (name) + res <- Filter(function(x) x[[1]] == name, matrix_equivalences) + r_datatypes <- sapply(res, function(x) x[[2]]) + + for (r_name in r_datatypes) { + test_msg <- paste0("Comparing a python generated .h5ad with layer '", name, + "' with an R generated .h5ad '", r_name, "' works") + test_that(test_msg, { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("X"), + dtype = c(name, r_name), + process = c("h5diff"), + known_issues = known_issues + ) + + skip_if(!is.null(msg), message = msg) + + # generate an R h5ad + adata_r <- r_generate_dataset(10L, 20L, layer_types = list(r_name)) + write_h5ad(adata_r, file_r2) + + # run h5diff + res <- processx::run("h5diff", + c("-v", file_py, file_r2, paste0("/layers/", name), paste0("/layers/", r_name)), + error_on_status = FALSE) + + expect_equal(res$status, 0, info = res$stdout) + + }) + } } diff --git a/tests/testthat/test-roundtrip-obsmvarm.R b/tests/testthat/test-roundtrip-obsmvarm.R index 372a72b0..1051f618 100644 --- a/tests/testthat/test-roundtrip-obsmvarm.R +++ b/tests/testthat/test-roundtrip-obsmvarm.R @@ -44,6 +44,7 @@ for (name in test_names) { # create a couple of paths file_py <- withr::local_file(tempfile(paste0("anndata_py_", name), fileext = ".h5ad")) file_r <- withr::local_file(tempfile(paste0("anndata_r_", name), fileext = ".h5ad")) + file_r2 <- withr::local_file(tempfile(paste0("anndata_r2_", name), fileext = ".h5ad")) # write to file adata_py$write_h5ad(file_py) @@ -129,4 +130,39 @@ for (name in test_names) { py_get_item(adata_py$varm, name) ) }) + + skip_if_no_h5diff() + # Get all R datatypes that are equivalent to the python datatype (name) + res <- Filter(function(x) x[[1]] == name, all_equivalences) + r_datatypes <- sapply(res, function(x) x[[2]]) + + for (r_name in r_datatypes){ + test_msg <- paste0("Comparing a python generated .h5ad with obsm and varm '", name, + "' with an R generated .h5ad '", r_name, "' works") + test_that(test_msg, { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("obsm", "varm"), + dtype = c(name, r_name), + process = c("h5diff"), + known_issues = known_issues + ) + + skip_if(!is.null(msg), message = msg) + # generate an R h5ad + adata_r <- r_generate_dataset(10L, 20L, obsm_types = list(r_name), varm_types = list(r_name)) + write_h5ad(adata_r, file_r2) + + # run h5diff + res_obsm <- processx::run("h5diff", + c("-v", file_py, file_r2, paste0("/obsm/", name), paste0("/obsm/", r_name)), + error_on_status = FALSE) + expect_equal(res_obsm$status, 0, info = res_obsm$stdout) + + res_varm <- processx::run("h5diff", + c("-v", file_py, file_r2, paste0("/varm/", name), paste0("/varm/", r_name)), + error_on_status = FALSE) + expect_equal(res_varm$status, 0, info = res_varm$stdout) + }) + } } diff --git a/tests/testthat/test-roundtrip-obspvarp.R b/tests/testthat/test-roundtrip-obspvarp.R index 25b3e9ba..801aa3ef 100644 --- a/tests/testthat/test-roundtrip-obspvarp.R +++ b/tests/testthat/test-roundtrip-obspvarp.R @@ -33,6 +33,7 @@ for (name in test_names) { # create a couple of paths file_py <- withr::local_file(tempfile(paste0("anndata_py_", name), fileext = ".h5ad")) file_r <- withr::local_file(tempfile(paste0("anndata_r_", name), fileext = ".h5ad")) + file_r2 <- withr::local_file(tempfile(paste0("anndata_r2_", name), fileext = ".h5ad")) # write to file adata_py$write_h5ad(file_py) @@ -118,4 +119,41 @@ for (name in test_names) { py_get_item(adata_py$varp, name) ) }) + + skip_if_no_h5diff() + # Get all R datatypes that are equivalent to the python datatype (name) + res <- Filter(function(x) x[[1]] == name, matrix_equivalences) + r_datatypes <- sapply(res, function(x) x[[2]]) + + + for (r_name in r_datatypes){ + test_msg <- paste0("Comparing a python generated .h5ad with obsp and varp '", name, + "' with an R generated .h5ad '", r_name, "' works") + test_that(test_msg, { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("obsp", "varp"), + dtype = c(name, r_name), + process = c("h5diff"), + known_issues = known_issues + ) + skip_if(!is.null(msg), message = msg) + # generate an R h5ad + adata_r <- r_generate_dataset(10L, 20L, obsp_types = list(r_name), varp_types = list(r_name)) + write_h5ad(adata_r, file_r2) + + # run h5diff + res_obsp <- processx::run("h5diff", + c("-v", file_py, file_r2, paste0("/obsp/", name), paste0("/obsp/", r_name)), + error_on_status = FALSE) + expect_equal(res_obsp$status, 0, info = res_obsp$stdout) + + res_varp <- processx::run("h5diff", + c("-v", file_py, file_r2, paste0("/varp/", name), paste0("/varp/", r_name)), + error_on_status = FALSE) + expect_equal(res_varp$status, 0, info = res_varp$stdout) + + }) + } + } diff --git a/tests/testthat/test-roundtrip-obsvar.R b/tests/testthat/test-roundtrip-obsvar.R index b38e3ec4..89eafc43 100644 --- a/tests/testthat/test-roundtrip-obsvar.R +++ b/tests/testthat/test-roundtrip-obsvar.R @@ -33,6 +33,7 @@ for (name in test_names) { # create a couple of paths file_py <- withr::local_file(tempfile(paste0("anndata_py_", name), fileext = ".h5ad")) file_r <- withr::local_file(tempfile(paste0("anndata_r_", name), fileext = ".h5ad")) + file_r2 <- withr::local_file(tempfile(paste0("anndata_r2_", name), fileext = ".h5ad")) # write to file adata_py$write_h5ad(file_py) @@ -112,4 +113,38 @@ for (name in test_names) { expect_equal_py(adata_py2$obs, adata_py$obs) expect_equal_py(adata_py2$var, adata_py$var) }) + + skip_if_no_h5diff() + # Get all R datatypes that are equivalent to the python datatype (name) + res <- Filter(function(x) x[[1]] == name, vector_equivalences) + r_datatypes <- sapply(res, function(x) x[[2]]) + + for (r_name in r_datatypes){ + test_msg <- paste0("Comparing a python generated .h5ad with obs and var '", name, + "' with an R generated .h5ad '", r_name, "' works") + test_that(test_msg, { + msg <- message_if_known( + backend = "HDF5AnnData", + slot = c("obs", "var"), + dtype = c(name, r_name), + process = c("h5diff"), + known_issues = known_issues + ) + skip_if(!is.null(msg), message = msg) + # generate an R h5ad + adata_r <- r_generate_dataset(10L, 20L, obs_types = list(r_name), var_types = list(r_name)) + write_h5ad(adata_r, file_r2) + + # run h5diff + res_obs <- processx::run("h5diff", + c("-v", file_py, file_r2, paste0("/obs/", name), paste0("/obs/", r_name)), + error_on_status = FALSE) + expect_equal(res_obs$status, 0, info = res_obs$stdout) + + res_var <- processx::run("h5diff", + c("-v", file_py, file_r2, paste0("/var/", name), paste0("/var/", r_name)), + error_on_status = FALSE) + expect_equal(res_var$status, 0, info = res_var$stdout) + }) + } }