diff --git a/.Rbuildignore b/.Rbuildignore index 391e79c..e5f6055 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -7,3 +7,4 @@ ^\.github ^\.lintr$ ^NEWS\.md$ +^inst/validation/results/\.gitempty$ diff --git a/.lintr b/.lintr new file mode 100644 index 0000000..d2d542e --- /dev/null +++ b/.lintr @@ -0,0 +1,6 @@ +linters: linters_with_defaults( + line_length_linter(120), + object_usage_linter = NULL, + indentation_linter = NULL, + trailing_whitespace_linter = NULL + ) diff --git a/DESCRIPTION b/DESCRIPTION index c6478af..a12e282 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dv.loader Type: Package Title: Data loading module -Version: 1.1.1 +Version: 2.0.0 Authors@R: c( person( "Boehringer-Ingelheim Pharma GmbH & Co.KG", role = c("cph", "fnd")), person( given = "Ming", family = "Yang", role = c("aut", "cre"), email = "ming.yang.ext@boehringer-ingelheim.com"), diff --git a/NEWS.md b/NEWS.md index 5ccdf08..a5c5124 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# dv.loader 2.0.0 + +- GitHub release with QC report +- Update package documentation + # dv.loader 1.1.1 - General package maintenance. diff --git a/R/dvloader.R b/R/dvloader.R index b9d0895..a36a7b9 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -18,12 +18,12 @@ get_nfs_path <- function() { get_cre_path <- get_nfs_path #' Loads data into memory based on study directory and one or more file_names. -#' @param sub_dir Study directory, which will be appended to its internal base_path. -#' If left as NULL, it will use the working directory as the sub_dir. +#' @param sub_dir A relative directory/folder that will be appended to a base path defined by `Sys.getenv("RXD_DATA")`. +#' If the argument is left as NULL, the function will load data from the working directory `getwd()`. #' @param file_names Study file or file_names name(s) - can be a vector of strings. #' This is the only required argument. #' @param use_wd for "use working directory" - a flag used when importing local files -#' not on NFS - default value is False +#' not on NFS - default value is FALSE #' @param prefer_sas if set to TRUE, imports sas7bdat files first before looking for #' RDS files (the opposite of default behavior) #' @return a list of dataframes diff --git a/README.md b/README.md index bac7a34..1199129 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,24 @@ -# Data Loading Module - -[![Checks 🧩](https://github.com/Boehringer-Ingelheim/dv.loader/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/Boehringer-Ingelheim/dv.loader/actions/workflows/ci.yml) +# Data Loading + +The {dv.loader} package provides a simple interface for loading data from a network file storage folder or +locally. It is designed to be used with `.RDS` and `.sas7bdat` file formats. +The package provides a simple function, `load_data()`, which loads R and SAS data files into memory. +Loading data from SQL databases is not yet supported. The function returns a list named by the file names passed, +and containing data frames, along with metadata for that table. By default, the function will look for files in a +sub-directory `sub_dir` of the base path defined by a environment variable "RXD_DATA". You can check if the base path +is set by running `Sys.getenv("RXD_DATA")`. A single file or multiple files can be loaded at once. +To make the loading process faster for large datasets, it is suggested that '.sas7bdat' files are converted to +'.RDS' files. The function will prefer '.RDS' files over '.sas7bdat' files by default. ## Installation ```r -install.packages("devtools") # if you have not installed "devtools" package -devtools::install_github("Boehringer-Ingelheim/dv.loader") +if (!require("remotes")) install.packages("remotes") +remotes::install_github("Boehringer-Ingelheim/dv.loader") ``` ## Basic usage -**Note**: `dv.loader` is only designed to be used with `.RDS` and `.sas7bdat` file formats. - ```r # getting data from a network file storage folder dv.loader::load_data(sub_dir = "subdir1/subdir2", file_names = c("adsl", "adae")) @@ -20,9 +26,5 @@ dv.loader::load_data(sub_dir = "subdir1/subdir2", file_names = c("adsl", "adae") ```r # getting data locally (e.g., if you have file `./data/adsl.RDS`) -dv.loader::load_data(sub_dir = "data", file_names = c("adsl"), use_wd = T) +dv.loader::load_data(sub_dir = "data", file_names = c("adsl"), use_wd = TRUE) ``` - -## Contact - -If you have edits or suggestions, please open a PR. diff --git a/_pkgdown.yml b/_pkgdown.yml index d201be0..3cc3a6f 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,10 +1,16 @@ ---- -url: https://boehringer-ingelheim.github.io/dv.loader - template: bootstrap: 5 navbar: - right: - - icon: fa-github - href: https://github.com/boehringer-ingelheim/dv.loader + type: inverse + structure: + left: [intro, reference, articles, tutorials, news, qc] + components: + qc: + text: Quality Control + href: articles/qc.html +home: + title: dv.loader + links: + - text: Browse source code + href: https://github.com/Boehringer-Ingelheim/dv.loader diff --git a/dv.loader.Rproj b/dv.loader.Rproj index 33e599b..eaa6b81 100644 --- a/dv.loader.Rproj +++ b/dv.loader.Rproj @@ -15,4 +15,4 @@ LaTeX: pdfLaTeX BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source -PackageRoxygenize: rd,collate,namespace \ No newline at end of file +PackageRoxygenize: rd,collate,namespace diff --git a/inst/validation/results/.gitempty b/inst/validation/results/.gitempty new file mode 100644 index 0000000..e69de29 diff --git a/inst/validation/run_validation.R b/inst/validation/run_validation.R new file mode 100644 index 0000000..66396c0 --- /dev/null +++ b/inst/validation/run_validation.R @@ -0,0 +1,44 @@ +pkg_name <- read.dcf("DESCRIPTION")[, "Package"] +pkg_version <- read.dcf("DESCRIPTION")[, "Version"] +test_results <- tibble::as_tibble(devtools::test()) + +local({ + # This is evaluated inside a local because, otherwise, all the variables created in the chunks of the rendered + # document leak into the environment + + validation_root <- "./inst/validation" + validation_report_rmd <- file.path(validation_root, "val_report.Rmd") + validation_report_html <- "val_report.html" + validation_results <- file.path(validation_root, "results") + val_param_rds <- file.path(validation_results, "val_param.rds") + + stopifnot(dir.exists(validation_root)) + stopifnot(file.exists(validation_report_rmd)) + + stopifnot(dir.exists(validation_results)) + unlink(list.files(validation_results)) + + saveRDS( + list( + package = pkg_name, + tests = test_results, + version = pkg_version + ), + val_param_rds + ) + + rmarkdown::render( + input = validation_report_rmd, + params = list( + package = pkg_name, + tests = test_results, + version = pkg_version + ), + output_dir = validation_results, + output_file = validation_report_html + ) + + # We use one of the leaked variables, created inside the validation report to asses if the validation is + # succesful or not + VALIDATION_PASSED +}) diff --git a/inst/validation/specs.R b/inst/validation/specs.R new file mode 100644 index 0000000..982747d --- /dev/null +++ b/inst/validation/specs.R @@ -0,0 +1,13 @@ +# Use a list to declare the specs + +specs_list <- list + +specs <- specs_list( + "default_dir" = "loads data from the working directory by default", + "file_names" = "returns an error if file_names is missing", + "file_type" = "returns an error if the file type is not supported", + "file_extensions" = "checks for valid file extensions", + "data_integrity" = "ensures data is loaded correctly", + "meta_data" = "reads metadata from the items of file.info()", + "prefer_sas" = "loads a SAS or RDS file based on the prefer_sas flag" +) diff --git a/inst/validation/utils-validation.R b/inst/validation/utils-validation.R new file mode 100644 index 0000000..eeb9356 --- /dev/null +++ b/inst/validation/utils-validation.R @@ -0,0 +1,155 @@ +#' Setting up the validation + +if (!exists("package_name")) stop("package name must be in the environment when this script is sourced") + +#' How to link tests and specs + +if (FALSE) { + test_that( + vdoc[["add_spec"]]("my test description", specs$a_spec), + { + expect_true(TRUE) + } + ) +} +#' The specs variable on the call references the one declared in specs.R + +#' 3. For those tests covering more than one spec. +#' NOTE: It must be c() and not list() +#' + +if (FALSE) { + test_that( + vdoc[["add_spec"]]("my test_description", c(specs$my$hier$spec, vdoc_specs$my$hier$other_spec)), + { + expect_true(TRUE) + } + ) +} + +#' Considerations: +#' - parse_spec uses deparse(substitute()). These spec_ids are later used to check if all requirements +#' are covered or not, therefore those calls cannot by substituted for: + +if (FALSE) { + my_spec <- specs$my$hier$spec + test_that(vdoc[["add_spec"]]("my test_description", my_spec), { + ... + }) + + test_that(vdoc[["add_spec"]]("my test_description", specs[["my"]][["hier"]][["spec"]]), { + ... + }) +} + +# In this case the substitute captures my_spec and cannot be used later. +# If you want to do this you must use the spec_id parameter where you pass a +# character vector with the ids. +# Notice that the ids in character form do no longer have the specs particle +# at the beginning, only the pathing of the spec is needed. + +if (FALSE) { + my_spec <- specs$my$hier$spec + test_that(vdoc$parse_spec(my_spec, "my test_description", spec_id = c("my$hier$spec")), { + ... + }) +} + +# Validation code +# nolint start cyclocomp_linter +local({ + specs <- source( + system.file("validation", "specs.R", package = package_name, mustWork = TRUE), + local = TRUE + )[["value"]] + recursive_ids <- function(x, parent = character(0)) { + if (!is.list(x)) { + return(parent) + } + unlist(mapply(recursive_ids, + x, + paste(parent, names(x), + sep = if (identical(parent, character(0))) "" else "$" + ), + SIMPLIFY = FALSE, USE.NAMES = FALSE + )) + } + + recursive_ids <- function(x, parent = character(0)) { + if (!is.list(x)) { + return(parent) + } + unlist(mapply(recursive_ids, x, + paste(parent, names(x), + sep = if (identical(parent, character(0))) "" else "$" + ), + SIMPLIFY = FALSE, USE.NAMES = FALSE + )) + } + + + spec_id_list <- recursive_ids(specs) + + list( + specs = specs, + spec_id_list = spec_id_list, + add_spec = function(desc, spec, spec_id) { + if (missing(spec_id)) { + if (!is.character(spec) || length(spec) == 0) stop("spec must be a non-empty character vector") + s_spec <- substitute(spec) + if (s_spec[[1]] == "c") { + spec_id <- sapply(s_spec[2:length(s_spec)], identity) + } else { + spec_id <- list(s_spec) # Otherwise the posterior vapply iterates over the expression + } + + spec_id_chr <- vapply(spec_id, function(x) { + sub("^[^$]*\\$", "", deparse(x)) + }, FUN.VALUE = character(1)) + + if (!all(spec_id_chr %in% spec_id_list)) { + stop("At least one spec is not declared in the spec list") + } # This should be covered by pack of constants but just in case + } else { + spec_id_chr <- spec_id + } + paste0(desc, "__spec_ids{", paste0(spec_id_chr, collapse = ";"), "}") + }, + get_spec = function(test, specs) { + spec_ids <- utils::strcapture( + pattern = "__spec_ids\\{(.*)\\}", + x = test, + proto = list(spec = character()) + )[["spec"]] + + spec_ids <- strsplit(spec_ids, split = ";") + + specs_and_id <- list() + + for (idx in seq_along(spec_ids)){ + ids <- spec_ids[[idx]] + if (all(!is.na(ids))) { + this_specs <- list() + for (sub_idx in seq_along(ids)) { + id <- ids[[sub_idx]] + this_specs[[sub_idx]] <- eval(str2expression(paste0("specs$", id))) + } + specs_and_id[[idx]] <- list( + spec_id = ids, + spec = this_specs + ) + } else { + specs_and_id[[idx]] <- list( + spec_id = NULL, + spec = NULL + ) + } + } + specs_and_id + } + + + ) +}) + +# nolint end cyclocomp_linter diff --git a/inst/validation/val_report.Rmd b/inst/validation/val_report.Rmd new file mode 100644 index 0000000..26a97e9 --- /dev/null +++ b/inst/validation/val_report.Rmd @@ -0,0 +1,17 @@ +--- +title: "Quality Control" +output: + html_document: + toc: true + toc_depth: 2 + code_folding: hide +toc-title: "----\nIndex" + +params: + package: NULL + tests: NULL + version: NULL +--- + +```{r, child = "val_report_child.Rmd"} +``` diff --git a/inst/validation/val_report_child.Rmd b/inst/validation/val_report_child.Rmd new file mode 100644 index 0000000..bf7f1bc --- /dev/null +++ b/inst/validation/val_report_child.Rmd @@ -0,0 +1,209 @@ + + + +```{r setup, message = FALSE} +# Import vdoc functions ---- +vdoc <- local({ + # ########## + # package_name is used # INSIDE # the sourced file below + # ########## + package_name <- params[["package"]] + utils_file_path <- system.file("validation", "utils-validation.R", package = package_name, mustWork = TRUE) + source(utils_file_path, local = TRUE)[["value"]] +}) + +# Set required packages ---- +suppressPackageStartupMessages(stopifnot(requireNamespace("DT"))) +suppressPackageStartupMessages(stopifnot(requireNamespace("devtools"))) + +# Parse tests ---- + +tests <- as.data.frame(params[["tests"]]) +tests[["validation_data"]] <- vdoc[["get_spec"]](tests[["test"]], vdoc[["specs"]]) +tests[["spec_id"]] <- sapply(tests[["validation_data"]], function(x) x[["spec_id"]]) +tests[["spec"]] <- sapply(tests[["validation_data"]], function(x) x[["spec"]]) +tests[["spec_id_paste"]] <- vapply(tests[["spec_id"]], function(x) paste(x, collapse = "\n"), FUN.VALUE = character(1)) +tests[["spec_paste"]] <- vapply(tests[["spec"]], function(x) paste(x, collapse = "\n"), FUN.VALUE = character(1)) +tests[["desc"]] <- paste0("(#", seq_len(nrow(tests)), "): ", tests[["test"]]) +tests[["with_spec"]] <- vapply(tests[["spec_id"]], Negate(is.null), FUN.VALUE = logical(1)) + +spec_tests <- tests[tests[["with_spec"]], ] +no_spec_tests <- tests[!tests[["with_spec"]], ] + +declared_spec <- vdoc[["spec_id_list"]] +tested_spec <- unique(unlist(tests[["spec_id"]])) +uncovered_spec <- declared_spec[!declared_spec %in% tested_spec] +undeclared_spec <- tested_spec[!tested_spec %in% declared_spec] + +spec_tests[["are_declared"]] <- sapply(spec_tests[["spec_id"]], function(x) all(x %in% declared_spec)) + +# Count tests in the different categories ---- +mask_failed <- !!spec_tests[["failed"]] | spec_tests[["error"]] +mask_skipped <- !!spec_tests[["skipped"]] +mask_declared <- spec_tests[["are_declared"]] +n_pass_dec <- sum(!mask_failed & !mask_skipped & mask_declared) +n_fail_dec <- sum(mask_failed & mask_declared) +n_skip_dec <- sum(mask_skipped & mask_declared) +n_uncov <- length(uncovered_spec) +n_undec <- sum(!mask_declared) + +render_spec_table <- function(t) { + t <- t[trac_matrix_col] + colnames(t) <- names(trac_matrix_col) + t <- t[order(t[["Spec ID"]]), ] + DT::datatable(t, options = list(dom = "ltp"), filter = list(position = "top")) +} + +data_frame_by_row <- function(colnames, data) { + n <- length(data) + n_cols <- length(colnames) + stopifnot(n %% n_cols == 0) + columns <- vector("list", length = n_cols) + for (i in 1:n_cols) columns[[i]] <- unlist(data[seq(i, n, n_cols)]) + do.call(data.frame, setNames(columns, colnames)) +} + +# Select columns to be included in the tables ---- +trac_matrix_col <- c("Spec ID" = "spec_id_paste", "Spec" = "spec_paste", "Test Desc" = "desc", "File" = "file") + +# Check that validation passes and set title ---- +VALIDATION_PASSED <- n_fail_dec == 0 && n_uncov == 0 && n_undec == 0 && n_uncov == 0 # nolint + +result_symbol <- if (VALIDATION_PASSED) "\U02705" else "\U274C" +title <- paste(result_symbol, params[["package"]], params[["version"]]) +``` + +## `r title` +Date: `r format(Sys.time(), "%Y-%b-%d %H:%M:%S")` + +The following document generates a report for R packages, to satisfy the criteria of a "Released" status under the **Non-GxP** project. The QC report contains the following information: + +- **Specifications (specs):** These can be attached to every test that the user adds. +- **Traceability matrix:** Contains test cases with passed, failed, or skipped expectations. +- **Uncovered or undeclared specs** +- **Session Info and System Configuration** + +::: {.infobox .warning} +Please be advised that the QC report generated for this module does not imply validation according to any other GxP criteria. +The QC report only satisfies our internally developed quality checks for non-GxP criteria. +For clinical reporting purposes, it is essential to note that any outputs generated using this module must be checked and verified within a validated system that adheres to the appropriate GxP guidelines. +::: + +---- +# Traceability matrix + +In this traceability matrix only those tests that point to an specification are included. + +Test cases can contain several expectations a test is considered: + + - **passed** if all expectations in the test pass. + + - **failed** if at least one expectation in the test fails. + + - **skipped** if at least one expectation in the test is skipped. + +A test can be both **failed** and **skipped**. + +## Summary + +```{r summary} +data_frame_by_row( + colnames = c("Spec Exists", "Test", "Count", "color"), + data = list( + "Yes", "Pass", n_pass_dec, "white", + "Yes", "Failed", n_fail_dec, if (n_fail_dec > 0) "red" else "green", + "Yes", "Skipped", n_skip_dec, if (n_skip_dec > 0) "red" else "green", + "Yes", "No Test", n_uncov, if (n_uncov > 0) "red" else "green", + "No", "NA", n_undec, if (n_undec > 0) "red" else "green" + ) +) |> + DT::datatable( + rownames = FALSE, + options = list(columnDefs = list(list(visible = FALSE, targets = c(3))), dom = "tp"), + filter = list(position = "top") + ) |> + DT::formatStyle( + c("Count"), + valueColumns = "color", + backgroundColor = DT::JS("value") + ) +``` + +## Passed tests + +```{r passed_test} +render_spec_table(spec_tests[!mask_failed & !mask_skipped & mask_declared, ]) +``` + +## Failed tests + +```{r failed_test} +render_spec_table(spec_tests[mask_failed & mask_declared, ]) +``` + +## Skipped tests + +```{r skipped_test} +render_spec_table(spec_tests[mask_skipped & mask_declared, ]) +``` + +## Uncovered specifications + +```{r uncovered_spec, echo=FALSE} +data.frame("Uncovered Specifications" = uncovered_spec) |> + DT::datatable( + options = list(dom = "ltp"), + filter = list(position = "top") + ) +``` + +## Undeclared specifications + +This should always be empty, as non existant specs are controlled during test execution. + +```{r undeclared_spec, echo=FALSE, results = "asis"} +render_spec_table(spec_tests[!mask_declared, ]) +``` + +# Session Info and System Configuration + +```{r system_conf} +devtools::session_info() +``` + +# List of specifications +```{r spec_list} +j <- vapply( + vdoc[["spec_id_list"]], + function(x) { + eval( + str2expression( + paste0("vdoc[[\"specs\"]]$", x) + ) + ) + }, + FUN.VALUE = character(1) +) |> + gsub("\n", "
", x = _, fixed = TRUE) + +data.frame(spec_id = names(j), spec = j) |> + DT::datatable( + rownames = FALSE, + options = list( + dom = "ltp" + ), + filter = list(position = "top"), + escape = FALSE + ) +``` diff --git a/man/load_data.Rd b/man/load_data.Rd index c869828..5367158 100644 --- a/man/load_data.Rd +++ b/man/load_data.Rd @@ -7,14 +7,14 @@ load_data(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = FALSE) } \arguments{ -\item{sub_dir}{Study directory, which will be appended to its internal base_path. -If left as NULL, it will use the working directory as the sub_dir.} +\item{sub_dir}{A relative directory/folder that will be appended to a base path defined by `Sys.getenv("RXD_DATA")`. +If the argument is left as NULL, the function will load data from the working directory `getwd()`.} \item{file_names}{Study file or file_names name(s) - can be a vector of strings. This is the only required argument.} \item{use_wd}{for "use working directory" - a flag used when importing local files -not on NFS - default value is False} +not on NFS - default value is FALSE} \item{prefer_sas}{if set to TRUE, imports sas7bdat files first before looking for RDS files (the opposite of default behavior)} diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 23a38b4..1dab7a6 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -1,4 +1,14 @@ -# testthat setup file +# validation (S) +vdoc <- local({ + # ########## + # package_name is used # INSIDE # the sourced file below + # ########## + package_name <- read.dcf("../../DESCRIPTION")[, "Package"] + utils_file_path <- system.file("validation", "utils-validation.R", package = package_name, mustWork = TRUE) + source(utils_file_path, local = TRUE)[["value"]] +}) +specs <- vdoc[["specs"]] +# validation (F) Sys.setenv("RXD_DATA" = find.package(package = "dv.loader")) local_test_path <- "inst/extdata" diff --git a/tests/testthat/tests.R b/tests/testthat/tests.R index 3e13c8d..d5e5747 100644 --- a/tests/testthat/tests.R +++ b/tests/testthat/tests.R @@ -1,154 +1,207 @@ -test_that("defaults to using the working directory when sub_dir arg is NULL", { - expect_error( - load_data( - file_names = local_file_names - ) - ) -}) - -test_that("throws an error if you don't provide 'file_names'", { - expect_error( - load_data(file_names = NULL) - ) -}) - -test_that("throws an error if you provide a non CRE file type", { - expect_error( - load_data( - sub_dir = local_test_path, - file_names = "bad_file_type" +test_that( + "defaults to using the working directory when sub_dir arg is NULL" %>% + vdoc[["add_spec"]](specs$default_dir), + { + expect_error( + load_data(file_names = local_file_names) + ) + } +) + +test_that( + "throws an error if you don't provide 'file_names'" %>% + vdoc[["add_spec"]](specs$file_names), + { + expect_error( + load_data(file_names = NULL) + ) + } +) + +test_that( + "throws an error if you provide a file type which is not supported" %>% + vdoc[["add_spec"]](specs$file_type), + { + expect_error( + load_data( + sub_dir = local_test_path, + file_names = "bad_file_type" + ) ) - ) -}) + } +) + +test_that( + "does not throw an error if provided valid extensions" %>% + vdoc[["add_spec"]](specs$file_extensions), + { + expect_error( + load_data( + sub_dir = local_test_path, + file_names = "dummyads1.RDS", + use_wd = TRUE + ), + NA + ) + } +) + -test_that("does not throw an error if provided valid extensions", { - expect_error( - load_data( +test_that( + "can mix file_names with valid extensions" %>% + vdoc[["add_spec"]](specs$file_extensions), + { + actual <- load_data( sub_dir = local_test_path, - file_names = "dummyads1.RDS", + file_names = c("dummyads1.RDS", "dummyads1.sas7bdat"), use_wd = TRUE - ), - NA - ) -}) - - -test_that("can mix file_names with valid extensions", { - actual <- load_data( - sub_dir = local_test_path, - file_names = c("dummyads1.RDS", "dummyads1.sas7bdat"), - use_wd = TRUE - ) - actual <- c( - tools::file_ext(attributes(actual[[1]])$meta$path), - tools::file_ext(attributes(actual[[2]])$meta$path) - ) - expected <- c("RDS", "sas7bdat") - expect_equal(actual, expected) -}) - -test_that("can mix file_names with and without valid extensions", { - expect_error( - load_data( + ) + actual <- c( + tools::file_ext(attributes(actual[[1]])$meta$path), + tools::file_ext(attributes(actual[[2]])$meta$path) + ) + expected <- c("RDS", "sas7bdat") + expect_equal(actual, expected) + } +) + +test_that( + "can mix file_names with and without valid extensions" %>% + vdoc[["add_spec"]](specs$file_extensions), + { + expect_error( + load_data( + sub_dir = local_test_path, + file_names = c("dummyads1", "dummyads2.RDS"), + use_wd = TRUE + ), + NA + ) + } +) + +test_that( + "does not throw an error if you provide valid extensions" %>% + vdoc[["add_spec"]](specs$file_extensions), + { + expect_error( + load_data( + sub_dir = local_test_path, + file_names = c("bad_file_type.txt"), + use_wd = TRUE + ) + ) + expect_error( + load_data( + sub_dir = local_test_path, + file_names = c("bad_file_type.myrds"), + use_wd = TRUE + ) + ) + } +) + +test_that( + "maintains integrity of data from producing system to consuming system" %>% + vdoc[["add_spec"]](specs$data_integrity), + { + actual <- load_data( sub_dir = local_test_path, - file_names = c("dummyads1", "dummyads2.RDS"), - use_wd = TRUE - ), - NA - ) -}) - -test_that("does not throw an error if you provide valid extensions", { - expect_error( - load_data( + file_names = local_file_names[2], + use_wd = TRUE, + prefer_sas = TRUE + ) + + actual <- actual[[1]] + + attr(actual, "meta") <- NULL + attr(actual, "label") <- "dummyads2" + + expected <- haven::read_sas(file.path(local_test_path, local_test_files[2])) + attr(expected, "label") <- "dummyads2" + + expect_equal( + actual, + expected + ) + } +) + +test_that( + "has correct metadata" %>% + vdoc[["add_spec"]](specs$meta_data), + { + actual <- load_data( sub_dir = local_test_path, - file_names = c("bad_file_type.txt"), + file_names = local_file_names[2], use_wd = TRUE ) - ) - expect_error( - load_data( + actual_meta <- attr(actual[[1]], "meta") + expect_equal( + c( + "size", "isdir", "mode", + "mtime", "ctime", "atime", + "path", "file_name" + ), + names(actual_meta) + ) + } +) + +test_that( + "loads an RDS file when prefer_sas is FALSE (default) and both SAS and RDS files exist" %>% + vdoc[["add_spec"]](specs$prefer_sas), + { + actual <- load_data( sub_dir = local_test_path, - file_names = c("bad_file_type.myrds"), + file_names = local_file_names[2], use_wd = TRUE ) - ) -}) - -test_that("maintains integrity of data from producing system to consuming system", { - actual <- load_data( - sub_dir = local_test_path, - file_names = local_file_names[2], - use_wd = TRUE, prefer_sas = TRUE - ) - - actual <- actual[[1]] - - attr(actual, "meta") <- NULL - attr(actual, "label") <- "dummyads2" - - expected <- haven::read_sas(file.path(local_test_path, local_test_files[2])) - attr(expected, "label") <- "dummyads2" - - expect_equal( - actual, - expected - ) -}) - -test_that("has correct metadata", { - actual <- load_data( - sub_dir = local_test_path, - file_names = local_file_names[2], - use_wd = TRUE - ) - actual_meta <- attr(actual[[1]], "meta") - expect_equal( - c( - "size", "isdir", "mode", - "mtime", "ctime", "atime", - "path", "file_name" - ), - names(actual_meta) - ) -}) - -test_that("loads an RDS file when prefer_sas is FALSE (default) and both SAS and RDS files exist", { - actual <- load_data( - sub_dir = local_test_path, - file_names = local_file_names[2], - use_wd = TRUE - ) - actual <- attr(actual[[1]], "meta")[["path"]] - expect_equal(grepl(".RDS$", actual, ignore.case = FALSE), TRUE) -}) - -test_that("loads a SAS file when prefer_sas is FALSE (default) and an RDS file doesn't exist", { - actual <- load_data( - sub_dir = file.path(local_test_path, "just_sas"), - file_names = local_file_names[2], - use_wd = TRUE - ) - actual <- attr(actual[[1]], "meta")[["path"]] - expect_equal(grepl(".sas7bdat$", actual, ignore.case = TRUE), TRUE) -}) - -test_that("loads a SAS file when prefer_sas is TRUE and both SAS and RDS files exist", { - actual <- load_data( - sub_dir = local_test_path, - file_names = local_file_names[2], - use_wd = TRUE, prefer_sas = TRUE - ) - actual <- attr(actual[[1]], "meta")[["path"]] - expect_equal(grepl(".sas7bdat$", actual), TRUE) -}) - -test_that("loads an RDS file when prefer_sas is TRUE and a SAS file doesn't exist", { - actual <- load_data( - sub_dir = file.path(local_test_path, "just_rds"), - file_names = local_file_names[2], - use_wd = TRUE, prefer_sas = TRUE - ) - actual <- attr(actual[[1]], "meta")[["path"]] - expect_equal(grepl(".RDS$", actual), TRUE) -}) + actual <- attr(actual[[1]], "meta")[["path"]] + expect_equal(grepl(".RDS$", actual, ignore.case = FALSE), TRUE) + } +) + +test_that( + "loads a SAS file when prefer_sas is FALSE (default) and an RDS file doesn't exist" %>% + vdoc[["add_spec"]](specs$prefer_sas), + { + actual <- load_data( + sub_dir = file.path(local_test_path, "just_sas"), + file_names = local_file_names[2], + use_wd = TRUE + ) + actual <- attr(actual[[1]], "meta")[["path"]] + expect_equal(grepl(".sas7bdat$", actual, ignore.case = TRUE), TRUE) + } +) + +test_that( + "loads a SAS file when prefer_sas is TRUE and both SAS and RDS files exist" %>% + vdoc[["add_spec"]](specs$prefer_sas), + { + actual <- load_data( + sub_dir = local_test_path, + file_names = local_file_names[2], + use_wd = TRUE, + prefer_sas = TRUE + ) + actual <- attr(actual[[1]], "meta")[["path"]] + expect_equal(grepl(".sas7bdat$", actual), TRUE) + } +) + +test_that( + "loads an RDS file when prefer_sas is TRUE and a SAS file doesn't exist" %>% + vdoc[["add_spec"]](specs$prefer_sas), + { + actual <- load_data( + sub_dir = file.path(local_test_path, "just_rds"), + file_names = local_file_names[2], + use_wd = TRUE, + prefer_sas = TRUE + ) + actual <- attr(actual[[1]], "meta")[["path"]] + expect_equal(grepl(".RDS$", actual), TRUE) + } +) diff --git a/vignettes/qc.Rmd b/vignettes/qc.Rmd new file mode 100644 index 0000000..87bb735 --- /dev/null +++ b/vignettes/qc.Rmd @@ -0,0 +1,32 @@ +--- +title: "Quality Control" +output: + rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Quality Control} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + echo = FALSE +) +``` + +```{r, params, echo = FALSE, include = FALSE} +val_param_rds <- "../inst/validation/results/val_param.rds" +val_param_rds_exists <- file.exists(val_param_rds) +if (file.exists(val_param_rds)) params <- readRDS(val_param_rds) +``` + +```{r, results = "asis", echo = FALSE} +if (val_param_rds_exists) { + res <- knitr::knit_child("../inst/validation/val_report_child.Rmd", quiet = TRUE, envir = environment()) + cat(res, sep = "\n") +} else { + "No quality control results found" +} +```