From 350f4ed234cf344a55d2d8215b36bd6c35549fe8 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Tue, 19 Nov 2024 15:22:08 +0800 Subject: [PATCH] Reuse create_data_list() and keep legacy code --- NAMESPACE | 1 - R/dvloader.R | 10 ++-- R/utils.R | 116 ++++++++++++++++------------------------ man/create_data_list.Rd | 23 ++++++++ man/get_file_paths.Rd | 40 -------------- 5 files changed, 74 insertions(+), 116 deletions(-) create mode 100644 man/create_data_list.Rd delete mode 100644 man/get_file_paths.Rd diff --git a/NAMESPACE b/NAMESPACE index be207da..2dcc8cd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,6 @@ # Generated by roxygen2: do not edit by hand export(get_cre_path) -export(get_file_paths) export(get_nfs_path) export(load_data) export(load_data_files) diff --git a/R/dvloader.R b/R/dvloader.R index c032317..9a4dac3 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -76,14 +76,16 @@ load_data <- function( dir_path <- if (is.null(sub_dir)) base_dir else file.path(base_dir, sub_dir) - file_paths <- get_file_paths(dir_path = dir_path, file_names = file_names, prefer_sas = prefer_sas) - if (isTRUE(print_file_paths)) { cat("Loading data from", dir_path, "\n") - cat("Loading data file(s):", basename(file_paths), "\n") + cat("Loading data file(s):", file_names, "\n") } - data_list <- load_data_files(file_paths) + data_list <- create_data_list( + dir_path = dir_path, + file_names = file_names, + prefer_sas = prefer_sas + ) names(data_list) <- file_names diff --git a/R/utils.R b/R/utils.R index a856cb7..33915ac 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,87 +1,61 @@ -#' Get File Paths +#' Create a List of Data Frames with Metadata #' -#' This function constructs file paths for given file names, handling both RDS and SAS7BDAT files. -#' It can prioritize SAS files over RDS files based on the `prefer_sas` parameter. +#' For each file name provided, this function reads the first matching file and its metadata/attributes. +#' By default, RDS files are preferred over SAS files for faster loading. +#' The function performs case-insensitive matching of file names. #' -#' @param dir_path [character(1)] The directory path where the files are located. -#' @param file_names [character(1+)] A vector of file names to process. -#' @param prefer_sas [logical(1)] Whether to prefer SAS files over RDS files. Default is FALSE. +#' @param dir_path [character(1)] Directory path where the files are located +#' @param file_names [character(1+)] Vector of file names +#' @param prefer_sas [logical(1)] If TRUE, SAS (.sas7bdat) files are preferred over RDS (.rds) files #' -#' @return [character] A vector of normalized file paths. -#' -#' @examples -#' \dontrun{ -#' temp_dir <- tempdir() -#' -#' file_names <- c("adsl", "adae") -#' -#' file.create(file.path(temp_dir, paste0(file_names, ".rds"))) -#' file.create(file.path(temp_dir, paste0(file_names, ".sas7bdat"))) -#' -#' list.files(temp_dir) -#' -#' get_file_paths(dir_path = temp_dir, file_names = file_names) -#' get_file_paths(dir_path = temp_dir, file_names = file_names, prefer_sas = TRUE) -#' -#' unlink(temp_dir, recursive = TRUE) -#' } -#' -#' @export -get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { +#' @return [list] A named list of data frames, where each name is the basename of the corresponding file path. +create_data_list <- function(dir_path, file_names, prefer_sas = FALSE) { checkmate::assert_character(dir_path, len = 1) checkmate::assert_character(file_names, min.len = 1) checkmate::assert_logical(prefer_sas, len = 1) + checkmate::assert_directory_exists(dir_path) + + data_list <- lapply(file_names, function(x) { + extensions <- c("", ".rds", ".sas7bdat") + if (prefer_sas) { + extensions <- c("", ".sas7bdat", ".rds") + } - file_paths <- lapply(file_names, function(file_name) { - file_path <- file.path(dir_path, file_name) - file_ext <- tools::file_ext(file_name) - - if (file_ext == "") { - candidates <- basename(list.files(dir_path)) - - rds_match <- grep( - pattern = paste0("^", file_name, "\\.rds$"), - x = candidates, - ignore.case = TRUE, - value = TRUE - ) - - sas_match <- grep( - pattern = paste0("^", file_name, "\\.sas7bdat$"), - x = candidates, - ignore.case = TRUE, - value = TRUE - ) - - if (isTRUE(prefer_sas)) { - if (length(sas_match) > 0) { - return(file.path(dir_path, sas_match[1])) - } else if (length(rds_match) > 0) { - return(file.path(dir_path, rds_match[1])) - } else { - stop(dir_path, " does not contain SAS or RDS file: ", file_name) - } - } else if (isFALSE(prefer_sas)) { - if (length(rds_match) > 0) { - return(file.path(dir_path, rds_match[1])) - } else if (length(sas_match) > 0) { - return(file.path(dir_path, sas_match[1])) - } else { - stop(dir_path, " does not contain RDS or SAS file: ", file_name) - } + file_name_to_load <- NULL + + candidates <- list.files(dir_path) + uppercase_candidates <- Map(toupper, candidates) + + for (ext in extensions) { + # Case insensitive file name match + uppercase_file_name <- toupper(paste0(x, ext)) + + match_count <- sum(uppercase_candidates == uppercase_file_name) + if (match_count > 1) { + stop(paste("create_data_list(): More than one case-insensitive file name match for", dir_path, x)) } - } else { - if (file.exists(file_path)) { - return(file_path) - } else { - stop(dir_path, " does not contain: ", file_name) + + index <- match(uppercase_file_name, uppercase_candidates) + if (!is.na(index)) { + file_name_to_load <- candidates[[index]] + break } } + + if (is.null(file_name_to_load)) { + stop(paste("create_data_list(): No RDS or SAS files found for", dir_path, x)) + } + + # Load a single data file and get the first element of the list + output <- load_data_files(file.path(dir_path, file_name_to_load))[[1]] + + return(output) }) - return(normalizePath(unlist(file_paths))) -} + names(data_list) <- file_names + return(data_list) +} #' Load Data Files diff --git a/man/create_data_list.Rd b/man/create_data_list.Rd new file mode 100644 index 0000000..f1d405e --- /dev/null +++ b/man/create_data_list.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{create_data_list} +\alias{create_data_list} +\title{Create a List of Data Frames with Metadata} +\usage{ +create_data_list(dir_path, file_names, prefer_sas = FALSE) +} +\arguments{ +\item{dir_path}{[character(1)] Directory path where the files are located} + +\item{file_names}{[character(1+)] Vector of file names} + +\item{prefer_sas}{[logical(1)] If TRUE, SAS (.sas7bdat) files are preferred over RDS (.rds) files} +} +\value{ +[list] A named list of data frames, where each name is the basename of the corresponding file path. +} +\description{ +For each file name provided, this function reads the first matching file and its metadata/attributes. +By default, RDS files are preferred over SAS files for faster loading. +The function performs case-insensitive matching of file names. +} diff --git a/man/get_file_paths.Rd b/man/get_file_paths.Rd deleted file mode 100644 index 57751eb..0000000 --- a/man/get_file_paths.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{get_file_paths} -\alias{get_file_paths} -\title{Get File Paths} -\usage{ -get_file_paths(dir_path, file_names, prefer_sas = FALSE) -} -\arguments{ -\item{dir_path}{[character(1)] The directory path where the files are located.} - -\item{file_names}{[character(1+)] A vector of file names to process.} - -\item{prefer_sas}{[logical(1)] Whether to prefer SAS files over RDS files. Default is FALSE.} -} -\value{ -[character] A vector of normalized file paths. -} -\description{ -This function constructs file paths for given file names, handling both RDS and SAS7BDAT files. -It can prioritize SAS files over RDS files based on the `prefer_sas` parameter. -} -\examples{ -\dontrun{ -temp_dir <- tempdir() - -file_names <- c("adsl", "adae") - -file.create(file.path(temp_dir, paste0(file_names, ".rds"))) -file.create(file.path(temp_dir, paste0(file_names, ".sas7bdat"))) - -list.files(temp_dir) - -get_file_paths(dir_path = temp_dir, file_names = file_names) -get_file_paths(dir_path = temp_dir, file_names = file_names, prefer_sas = TRUE) - -unlink(temp_dir, recursive = TRUE) -} - -}