Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rc/3.0.0 to main #6

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
.Rhistory
.RData
.Ruserdata
*.sqlite
*.sqlite-journal
*.rds
.Rprofile

.vscode

docs
pkgdown
8 changes: 4 additions & 4 deletions .lintr
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
linters: linters_with_defaults(
line_length_linter(120),
object_usage_linter = NULL,
indentation_linter = NULL,
trailing_whitespace_linter = NULL
line_length_linter(120),
object_usage_linter = NULL,
indentation_linter = NULL,
trailing_whitespace_linter = NULL
)
31 changes: 19 additions & 12 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
Package: dv.loader
Type: Package
Title: Data loading module
Version: 2.0.0
Title: Data Loader for DaVinci Modular Applications
Version: 3.0.0
Authors@R: c(
person( "Boehringer-Ingelheim Pharma GmbH & Co.KG", role = c("cph", "fnd")),
person( given = "Ming", family = "Yang", role = c("aut", "cre"), email = "[email protected]"),
person( given = "Steven", family = "Brooks", role = "aut", email = "[email protected]"),
person( given = "Sorin", family = "Voicu", role = "aut", email = "[email protected]")
person("Boehringer-Ingelheim Pharma GmbH & Co.KG", role = c("cph", "fnd")),
person("Ming", "Yang", email = "[email protected]", role = c("aut", "cre")),
person("Steven", "Brooks", email = "[email protected]", role = "aut"),
person("Sorin", "Voicu", email = "[email protected]", role = "aut")
)
Description: This is a module for loading .RDS / .sas7bdat data files from a network file storage environment. It also allows loading data locally.
Description: dv.loader offers a streamlined method for importing multiple data files in R,
tailored for seamless integration with DaVinci modular applications.
License: Apache License (>= 2)
Encoding: UTF-8
LazyData: true
Depends: R (>= 3.5.0)
Imports: haven
Depends: R (>= 4.0.0)
Imports:
checkmate (>= 2.3.1),
haven (>= 2.5.4),
lifecycle (>= 1.0.4)
Suggests:
testthat,
knitr,
rmarkdown
knitr (>= 1.45),
pharmaverseadam (>= 0.2.0),
pharmaversesdtm (>= 0.2.0),
rmarkdown (>= 2.25),
testthat (>= 3.2.1)
RoxygenNote: 7.3.0
VignetteBuilder: knitr
Config/testthat/edition: 3
6 changes: 4 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(get_cre_path)
export(get_nfs_path)
export(load_data)
export(load_rds)
export(load_sas)
export(load_xpt)
importFrom(lifecycle,deprecate_warn)
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# dv.loader 3.0.0

- Introduced a new set of functions `load_rds()`, `load_sas()`, and `load_xpt()`.
- Deprecated the function `load_data()`.

# dv.loader 2.0.0

- GitHub release with QC report
Expand Down
61 changes: 0 additions & 61 deletions R/dvloader.R

This file was deleted.

146 changes: 146 additions & 0 deletions R/load_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#' Loads data into memory based on study directory and one or more file_names.
#' @param sub_dir A relative directory/folder that will be appended to a base path defined by `Sys.getenv("RXD_DATA")`.
#' If the argument is left as NULL, the function will load data from the working directory `getwd()`.
#' @param file_names Study file or file_names name(s) - can be a vector of strings.
#' This is the only required argument.
#' @param use_wd for "use working directory" - a flag used when importing local files
#' not on NFS - default value is FALSE
#' @param prefer_sas if set to TRUE, imports sas7bdat files first before looking for
#' RDS files (the opposite of default behavior)
#' @return a list of dataframes
#' @export
#' @examples
#' \dontrun{
#' test_data_path <- "../inst/extdata/"
#' data_list <- load_data(
#' sub_dir = test_data_path,
#' file_names = "dummyads2",
#' use_wd = TRUE
#' )
#' }
#' @export
#' @importFrom lifecycle deprecate_warn
load_data <- function(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = FALSE) {
lifecycle::deprecate_warn("3.0.0", "load_data()", "read_data()")

if (is.null(file_names)) {
stop("Usage: load_data: file_names: Must supply at least one file name")
}

study_path <- "" # will be built using args

if (is.null(sub_dir)) {
study_path <- getwd()
} else {
if (use_wd) {
study_path <- file.path(getwd(), sub_dir)
} else {
study_path <- file.path(get_cre_path(), sub_dir)
}
}

# create the output
data_list <- create_data_list(study_path, file_names, prefer_sas) # nolint

return(data_list)
}

#' gets the NFS base path from an env var
#' It assumes there is an env var
#' called RXD_DATA which holds the path suffix.
#' @return the NFS base path
get_nfs_path <- function() {
base_path <- Sys.getenv("RXD_DATA")
# check that RXD_DATA is set
if (base_path == "") {
stop("Usage: get_nfs_path: RXD_DATA must be set")
}
return(base_path)
}

#' gets the NFS base path from an env var
#' alias for get_nfs_path to maintain backwards compatibility
get_cre_path <- get_nfs_path


#' For each file name provided, reads in the first matching file and its meta data/attributes.
#' Preference is given to RDS because its faster
#' @param file_path the folder where the files are
#' @param file_names CDISC names for the files
#' @param prefer_sas if TRUE, imports .sas7bdat files first instead of .RDS files
#' @return returns a list of dataframes with metadata as an attribute on each dataframe
create_data_list <- function(file_path, file_names, prefer_sas) {
data_list <- lapply(file_names, function(x) {
extensions <- c("", ".rds", ".sas7bdat")
if (prefer_sas) {
extensions <- c("", ".sas7bdat", ".rds")
}

file_name_to_load <- NULL

candidates <- list.files(file_path)
uppercase_candidates <- Map(toupper, candidates)

for (ext in extensions) {
# Case insensitive file name match
uppercase_file_name <- toupper(paste0(x, ext))

match_count <- sum(uppercase_candidates == uppercase_file_name)
if (match_count > 1) {
stop(paste("create_data_list(): More than one case-insensitive file name match for", file_path, x))
}

index <- match(uppercase_file_name, uppercase_candidates)
if (!is.na(index)) {
file_name_to_load <- candidates[[index]]
break
}
}

if (is.null(file_name_to_load)) {
stop(paste("create_data_list(): No RDS or SAS files found for", file_path, x))
}

output <- read_file(file_path, file_name_to_load)

return(output)
})

names(data_list) <- file_names

return(data_list)
}


#' Reads RDS/SAS file and metadatas from first 6 items from file.info() its file path
#' @param file_path a path to a file
#' @param file_name name of a file
#' @return a data object with an extra attribute of metadata
read_file <- function(file_path, file_name) {
ext <- tools::file_ext(file_name)

if (!(toupper(ext) %in% c("RDS", "SAS7BDAT"))) {
stop("Usage error: read_file: file_name: file must either be RDS or SAS7BDAT.")
}

is_rds <- toupper(ext) == "RDS"

file <- file.path(file_path, file_name)
file_name <- tools::file_path_sans_ext(file_name)

# grab file info
meta <- file.info(file)[1L:6L]
meta[["path"]] <- row.names(meta)
meta[["file_name"]] <- file_name
meta <- data.frame(meta, stringsAsFactors = FALSE)
row.names(meta) <- NULL

if (is_rds) {
out <- readRDS(file)
} else {
out <- haven::read_sas(file)
}
attr(out, "meta") <- meta

return(out)
}
47 changes: 47 additions & 0 deletions R/load_rds.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#' Load RDS files
#'
#' This function loads RDS files via readRDS() and returns a list of data frames.
#'
#' @param files A character vector of file paths to RDS files.
#' @return A list of data frames, each containing the data from an RDS file.
#' @examples
#' # Create temporary directory and files
#' temp_dir <- tempdir()
#' adsl_rds_file <- file.path(temp_dir, "adsl.rds")
#' adae_rds_file <- file.path(temp_dir, "adae.rds")
#'
#' # Write example data to RDS files
#' saveRDS(pharmaverseadam::adsl, adsl_rds_file)
#' saveRDS(pharmaverseadam::adae, adae_rds_file)
#'
#' # Load RDS files
#' rds_data_list <- load_rds(c(adsl_rds_file, adae_rds_file))
#'
#' # Clean up
#' unlink(c(adsl_rds_file, adae_rds_file))
#' @export
load_rds <- function(files) {
# Check if files is a character vector
checkmate::assert_character(files)

# Read each file and add metadata
data_list <- lapply(files, function(file) {
# Check if file exists
checkmate::assert_file_exists(file)
# Check if file is an RDS file
check_file_ext(file, extension = "rds")

# Read RDS file
data <- readRDS(file)

# Get file info and add to data as an attribute
attr(data, "meta") <- file_info(file)

return(data)
})

# Set names of data_list to the file names
names(data_list) <- basename(files)

return(data_list)
}
46 changes: 46 additions & 0 deletions R/load_sas.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#' Load SAS files
#'
#' This function loads SAS files via haven::read_sas() and returns a list of data frames.
#'
#' @param files A character vector of file paths to SAS files.
#' @return A list of data frames, each containing the data from a SAS file.
#' @examples
#' # Create temporary directory and files
#' temp_dir <- tempdir()
#' adsl_sas_file <- file.path(temp_dir, "adsl.sas7bdat")
#' adae_sas_file <- file.path(temp_dir, "adae.sas7bdat")
#'
#' # Write example data to SAS files
#' haven::write_sas(pharmaverseadam::adsl, adsl_sas_file)
#' haven::write_sas(pharmaverseadam::adae, adae_sas_file)
#'
#' # Load SAS files
#' sas_data_list <- load_sas(c(adsl_sas_file, adae_sas_file))
#'
#' # Clean up
#' unlink(c(adsl_sas_file, adae_sas_file))
#' @export
load_sas <- function(files) {
# Check if files is a character vector
checkmate::assert_character(files)

# Read each file and add metadata
data_list <- lapply(files, function(file) {
# Check if file exists
checkmate::assert_file_exists(file)
# Check if file is a SAS file
check_file_ext(file, extension = "sas7bdat")

# Read SAS file
data <- haven::read_sas(file)

# Get file info and add to data as an attribute
attr(data, "meta") <- file_info(file)

return(data)
})

# Set names of data_list to the file names
names(data_list) <- basename(files)
return(data_list)
}
Loading
Loading