Skip to content

Commit

Permalink
Reuse create_data_list() and keep legacy code
Browse files Browse the repository at this point in the history
  • Loading branch information
mingstat committed Nov 19, 2024
1 parent a14c500 commit 350f4ed
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 116 deletions.
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(get_cre_path)
export(get_file_paths)
export(get_nfs_path)
export(load_data)
export(load_data_files)
10 changes: 6 additions & 4 deletions R/dvloader.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,16 @@ load_data <- function(

dir_path <- if (is.null(sub_dir)) base_dir else file.path(base_dir, sub_dir)

file_paths <- get_file_paths(dir_path = dir_path, file_names = file_names, prefer_sas = prefer_sas)

if (isTRUE(print_file_paths)) {
cat("Loading data from", dir_path, "\n")
cat("Loading data file(s):", basename(file_paths), "\n")
cat("Loading data file(s):", file_names, "\n")
}

data_list <- load_data_files(file_paths)
data_list <- create_data_list(
dir_path = dir_path,
file_names = file_names,
prefer_sas = prefer_sas
)

names(data_list) <- file_names

Expand Down
116 changes: 45 additions & 71 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -1,87 +1,61 @@
#' Get File Paths
#' Create a List of Data Frames with Metadata
#'
#' This function constructs file paths for given file names, handling both RDS and SAS7BDAT files.
#' It can prioritize SAS files over RDS files based on the `prefer_sas` parameter.
#' For each file name provided, this function reads the first matching file and its metadata/attributes.
#' By default, RDS files are preferred over SAS files for faster loading.
#' The function performs case-insensitive matching of file names.
#'
#' @param dir_path [character(1)] The directory path where the files are located.
#' @param file_names [character(1+)] A vector of file names to process.
#' @param prefer_sas [logical(1)] Whether to prefer SAS files over RDS files. Default is FALSE.
#' @param dir_path [character(1)] Directory path where the files are located
#' @param file_names [character(1+)] Vector of file names
#' @param prefer_sas [logical(1)] If TRUE, SAS (.sas7bdat) files are preferred over RDS (.rds) files
#'
#' @return [character] A vector of normalized file paths.
#'
#' @examples
#' \dontrun{
#' temp_dir <- tempdir()
#'
#' file_names <- c("adsl", "adae")
#'
#' file.create(file.path(temp_dir, paste0(file_names, ".rds")))
#' file.create(file.path(temp_dir, paste0(file_names, ".sas7bdat")))
#'
#' list.files(temp_dir)
#'
#' get_file_paths(dir_path = temp_dir, file_names = file_names)
#' get_file_paths(dir_path = temp_dir, file_names = file_names, prefer_sas = TRUE)
#'
#' unlink(temp_dir, recursive = TRUE)
#' }
#'
#' @export
get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) {
#' @return [list] A named list of data frames, where each name is the basename of the corresponding file path.
create_data_list <- function(dir_path, file_names, prefer_sas = FALSE) {
checkmate::assert_character(dir_path, len = 1)
checkmate::assert_character(file_names, min.len = 1)
checkmate::assert_logical(prefer_sas, len = 1)
checkmate::assert_directory_exists(dir_path)

data_list <- lapply(file_names, function(x) {
extensions <- c("", ".rds", ".sas7bdat")
if (prefer_sas) {
extensions <- c("", ".sas7bdat", ".rds")
}

file_paths <- lapply(file_names, function(file_name) {
file_path <- file.path(dir_path, file_name)
file_ext <- tools::file_ext(file_name)

if (file_ext == "") {
candidates <- basename(list.files(dir_path))

rds_match <- grep(
pattern = paste0("^", file_name, "\\.rds$"),
x = candidates,
ignore.case = TRUE,
value = TRUE
)

sas_match <- grep(
pattern = paste0("^", file_name, "\\.sas7bdat$"),
x = candidates,
ignore.case = TRUE,
value = TRUE
)

if (isTRUE(prefer_sas)) {
if (length(sas_match) > 0) {
return(file.path(dir_path, sas_match[1]))
} else if (length(rds_match) > 0) {
return(file.path(dir_path, rds_match[1]))
} else {
stop(dir_path, " does not contain SAS or RDS file: ", file_name)
}
} else if (isFALSE(prefer_sas)) {
if (length(rds_match) > 0) {
return(file.path(dir_path, rds_match[1]))
} else if (length(sas_match) > 0) {
return(file.path(dir_path, sas_match[1]))
} else {
stop(dir_path, " does not contain RDS or SAS file: ", file_name)
}
file_name_to_load <- NULL

candidates <- list.files(dir_path)
uppercase_candidates <- Map(toupper, candidates)

for (ext in extensions) {
# Case insensitive file name match
uppercase_file_name <- toupper(paste0(x, ext))

match_count <- sum(uppercase_candidates == uppercase_file_name)
if (match_count > 1) {
stop(paste("create_data_list(): More than one case-insensitive file name match for", dir_path, x))
}
} else {
if (file.exists(file_path)) {
return(file_path)
} else {
stop(dir_path, " does not contain: ", file_name)

index <- match(uppercase_file_name, uppercase_candidates)
if (!is.na(index)) {
file_name_to_load <- candidates[[index]]
break
}
}

if (is.null(file_name_to_load)) {
stop(paste("create_data_list(): No RDS or SAS files found for", dir_path, x))
}

# Load a single data file and get the first element of the list
output <- load_data_files(file.path(dir_path, file_name_to_load))[[1]]

return(output)
})

return(normalizePath(unlist(file_paths)))
}
names(data_list) <- file_names

return(data_list)
}


#' Load Data Files
Expand Down
23 changes: 23 additions & 0 deletions man/create_data_list.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 0 additions & 40 deletions man/get_file_paths.Rd

This file was deleted.

0 comments on commit 350f4ed

Please sign in to comment.