From 05e39de14c42676b98a903475639f917d030244c Mon Sep 17 00:00:00 2001 From: Andrew Bruce Date: Tue, 19 Nov 2024 04:21:25 -0800 Subject: [PATCH] added codecov --- .Rbuildignore | 1 + .github/workflows/test-coverage.yaml | 61 ++++++ DESCRIPTION | 1 + R/nlm_api.R | 6 +- R/utils.R | 2 +- README.Rmd | 2 +- README.md | 3 +- codecov.yml | 14 ++ data-raw/download_icd10.R | 306 +++++++++++++++++++++++++++ data-raw/extending.R | 80 +++++++ man/mount_board.Rd | 2 +- 11 files changed, 469 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/test-coverage.yaml create mode 100644 codecov.yml create mode 100644 data-raw/download_icd10.R create mode 100644 data-raw/extending.R diff --git a/.Rbuildignore b/.Rbuildignore index ebaa3e7..39ab0dc 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,4 @@ ^data-raw$ ^CODE_OF_CONDUCT\.md$ ^man-roxygen$ +^codecov\.yml$ diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 0000000..e050312 --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,61 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + +name: test-coverage.yaml + +permissions: read-all + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr, any::xml2 + needs: coverage + + - name: Test coverage + run: | + cov <- covr::package_coverage( + quiet = FALSE, + clean = FALSE, + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") + ) + covr::to_cobertura(cov) + shell: Rscript {0} + + - uses: codecov/codecov-action@v4 + with: + # Fail if error if not on PR, or if on PR and token is given + fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }} + file: ./cobertura.xml + plugin: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/DESCRIPTION b/DESCRIPTION index be25b28..f90b357 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,6 +38,7 @@ Remotes: Config/roxyglobals/filename: roxyglobals-generated.R Config/roxyglobals/unique: TRUE Config/testthat/edition: 3 +Config/testthat/parallel: true Encoding: UTF-8 Roxygen: list(markdown = TRUE, roclets = c("collate", "namespace", "rd", "roxyglobals::global_roclet")) diff --git a/R/nlm_api.R b/R/nlm_api.R index bc1ecaa..9b9ff9e 100644 --- a/R/nlm_api.R +++ b/R/nlm_api.R @@ -66,11 +66,7 @@ icd10api <- function(icd_code = NULL, stopifnot( "Both `icd_code` and `term` cannot be NULL" = all( - !is.null( - c(icd_code, term) - ) - ) - ) + !is.null(c(icd_code, term)))) args <- stringr::str_c( c(code = icd_code, diff --git a/R/utils.R b/R/utils.R index 1f06db3..62fc0f6 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,6 +1,6 @@ #' Mount [pins][pins::pins-package] board #' -#' @param `` string; whether source is `"local"` or `"remote"` +#' @param source `` string; whether source is `"local"` or `"remote"` #' #' @returns `` if `source = "local"` or `` #' if `source = "remote"` diff --git a/README.Rmd b/README.Rmd index 2487cef..be63ce0 100644 --- a/README.Rmd +++ b/README.Rmd @@ -18,10 +18,10 @@ knitr::opts_chunk$set( > Tidy ICD-10-CM Interface -[![Codecov](https://codecov.io/gh/andrewallenbruce/pathologie/branch/main/graph/badge.svg)](https://codecov.io/gh/andrewallenbruce/pathologie) [![CodeFactor](https://www.codefactor.io/repository/github/andrewallenbruce/pathologie/badge)](https://www.codefactor.io/repository/github/andrewallenbruce/pathologie) [![Code size](https://img.shields.io/github/languages/code-size/andrewallenbruce/pathologie.svg)](https://github.com/andrewallenbruce/pathologie) [![Last commit](https://img.shields.io/github/last-commit/andrewallenbruce/pathologie.svg)](https://github.com/andrewallenbruce/pathologie/commits/main) +[![Codecov test coverage](https://codecov.io/gh/andrewallenbruce/pathologie/graph/badge.svg)](https://app.codecov.io/gh/andrewallenbruce/pathologie) ## :package: Installation diff --git a/README.md b/README.md index 23c2b57..4a89f07 100644 --- a/README.md +++ b/README.md @@ -7,12 +7,13 @@ -[![Codecov](https://codecov.io/gh/andrewallenbruce/pathologie/branch/main/graph/badge.svg)](https://codecov.io/gh/andrewallenbruce/pathologie) [![CodeFactor](https://www.codefactor.io/repository/github/andrewallenbruce/pathologie/badge)](https://www.codefactor.io/repository/github/andrewallenbruce/pathologie) [![Code size](https://img.shields.io/github/languages/code-size/andrewallenbruce/pathologie.svg)](https://github.com/andrewallenbruce/pathologie) [![Last commit](https://img.shields.io/github/last-commit/andrewallenbruce/pathologie.svg)](https://github.com/andrewallenbruce/pathologie/commits/main) +[![Codecov test +coverage](https://codecov.io/gh/andrewallenbruce/pathologie/graph/badge.svg)](https://app.codecov.io/gh/andrewallenbruce/pathologie) ## :package: Installation diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..04c5585 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,14 @@ +comment: false + +coverage: + status: + project: + default: + target: auto + threshold: 1% + informational: true + patch: + default: + target: auto + threshold: 1% + informational: true diff --git a/data-raw/download_icd10.R b/data-raw/download_icd10.R new file mode 100644 index 0000000..709037c --- /dev/null +++ b/data-raw/download_icd10.R @@ -0,0 +1,306 @@ +#' ICD Code Web Scraper +#' +#' Function to web scrape ICD discharge diagnosis code sets from the CDC FTP server +#' (for ICD-10) or CMS website (for ICD-9). If pulling ICD-10 codes, by default the +#' function will search for the most recent year's code set publication by NCHS. +#' Users can specify earlier publication years back to 2019 if needed. The ICD-9 +#' option will only web scrape the most recent, final ICD-9 code set +#' publication (2014) from the CMS website. This function will return an error +#' message if the FTP server or CMS website is unresponsive or if a timeout +#' of 60 seconds is reached. The result is a dataframe with 3 fields: code, +#' description, and set (ICD version concatenated with year). Codes are +#' standardized to upper case with punctuation and extra leading/tailing white +#' space removed to enable successful joining. +#' +#' @param icd_version A character value of either "icd10", "ICD10", "icd9", or +#' "ICD9" to specify ICD version +#' @param year A numeric integer indicating the year of desired ICD-10 code set. +#' Defaults to \code{NULL} to pull the most recent year's publication. +#' @param quiet logical. If \code{TRUE}, suppress status messages (if any), +#' and the progress bar. +#' +#' @return A dataframe +#' +#' @references +#' \itemize{ +#' \item \href{https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10CM/}{CDC NCHS FTP Server Location for Published ICD-10 Code Sets} +#' \item \href{https://www.cms.gov/Medicare/Coding/ICD9ProviderDiagnosticCodes/codes}{CMS Website for Published ICD-9 Code Sets} +#' } +#' +#' @examples +#' +#' # Example 1 +#' icd9_2014 <- webscrape_icd(icd_version = "ICD9") +#' head(icd9_2014) +#' +#' # Example 2 +#' icd10_2024 <- webscrape_icd(icd_version = "ICD10", year = 2024) +#' head(icd10_2024) +#' +#' # Example 3 +#' icd10_2023 <- webscrape_icd(icd_version = "ICD10", year = 2023) +#' head(icd10_2023) +#' +#' @export +#' + +webscrape_icd <- function(icd_version = "ICD10", year = NULL, quiet = FALSE) { + + icd_version <- toupper(icd_version) + + if (!grepl("ICD10|ICD9", icd_version)) { + cli::cli_abort('ICD version argument {.var icd_version} must be {.var "ICD9"} or {.var "ICD10"}') + } + + if (icd_version == "ICD9" & !is.null(year)) { + cli::cli_abort("Argument {.var year} only applies for ICD10") + } + + if (icd_version == "ICD10" & !is.null(year)) { + if (year <= 2018) { + cli::cli_abort("ICD-10 code sets prior to 2019 are not supported") + } + } + + if (icd_version == "ICD10" & !is.null(year)) { + if (year > as.numeric(format(Sys.Date(), "%Y")) + 1) { + cli::cli_abort("Argument {.var year} cannot be greater than the upcoming year.") + } + } + + if (icd_version == "ICD10") { + ftp_url <- "https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10CM/" + + root_folders <- readLines(ftp_url, warn = FALSE)[3] + + current_year <- if (!is.null(year)) { + year == as.numeric(format(Sys.Date(), "%Y")) + } else { + TRUE + } + + if (is.null(year) | current_year) { + path <- stringr::str_split(root_folders, "
")[[1]] |> + stringr::str_extract_all("pub/Health_Statistics/NCHS/Publications/ICD10CM/\\d{4}/") |> + purrr::compact() |> + purrr::list_c() |> + tibble::enframe() |> + dplyr::mutate(year = as.numeric(stringr::str_extract(value, "\\d{4}"))) |> + dplyr::filter(year == max(year)) |> + dplyr::pull(value) |> + as.character() + + recent_year <- stringr::str_extract(path, "\\d{4}") + file_year <- recent_year + + path_files <- readLines(paste0("https://ftp.cdc.gov/", path), warn = FALSE)[3] + + file_list <- unlist( + stringr::str_extract_all( + path_files, pattern = paste0( + path, "[a-zA-Z\\d-_ %20]+\\w+(?:\\.(?:xlsx|pdf|zip|txt))?" + ) + ) + ) + + file_match <- grepl( + "code_descriptions|code%20descriptions|codesdescriptions|icd10cm_codes_\\d{4}", + gsub("[ -]", "_", tolower(file_list)) + ) + + if (!all(file_match)) { + cli::cli_abort( + paste( + "The", + recent_year, + "code description file is not yet available. Please try a previous year." + ) + ) + } else { + file <- paste0("https://ftp.cdc.gov/", file_list[which(file_match)]) + file_ext <- tools::file_ext(file) + + file_idx <- which(file_ext == "zip") + + file_zip <- file[file_idx] + + temp_dir <- tempdir() + temp_file <- tempfile(tmpdir = temp_dir, fileext = ".zip") + + download_exit_status <- try( + download.file(file_zip, temp_file, quiet = FALSE) + ) + + if (all(class(download_exit_status) == "try-error")) { + cli::cli_abort("Error in {.fn webscrape_icd}: ICD-10 webscrape failed. + FTP server is currently unresponsive.") + } + + file_name <- unzip(temp_file, list = TRUE) |> + dplyr::filter(Name == paste0("Code Descriptions/icd10cm-codes-", file_year, ".txt") | Name == paste0("icd10cm-codes-", file_year, ".txt")) |> dplyr::pull(Name) + + unzip(temp_file, files = file_name, exdir = temp_dir, overwrite = TRUE) + + file_path <- file.path(temp_dir, file_name) + + icd_dictionary <- data.table::fread(file_path, sep = "\t", header = FALSE) |> + data.table::setnames(old = "V1", new = "code_combo") |> + as.data.frame() |> + dplyr::mutate( + code_combo = stringr::str_replace_all(code_combo, "\\s{2,4}", "_"), + code_combo = stringr::str_replace_all(code_combo, "(?<=^[[:alnum:]]{7})\\s{1}", "_") + ) |> + tidyr::separate(code_combo, c("code", "description"), sep = "_") |> + dplyr::mutate( + code = stringr::str_squish(code), + description = stringr::str_squish(description), + set = paste("ICD-10", file_year) + ) + } + + } else if (as.numeric(year) == 2023) { + file_year <- year + + path <- str_split(root_folders, "
") %>% + magrittr::extract2(1) %>% + str_extract_all("pub/Health_Statistics/NCHS/Publications/ICD10CM/\\d{4}/") %>% + compact() %>% + tibble::enframe() %>% + mutate(year = as.numeric(str_extract(value, "\\d{4}"))) %>% + filter(year == 2023) %>% + pull(value) %>% + as.character() + + path_files <- readLines(paste0("https://ftp.cdc.gov/", path), warn = FALSE)[3] + + file_list <- unlist( + str_extract_all( + path_files, pattern = paste0( + path, "[a-zA-Z\\d-_ %20]+\\w+(?:\\.(?:xlsx|pdf|zip|txt))?" + ) + ) + ) + + file_match <- grepl( + "icd10_order_codefiles2023", + gsub("[ -]", "_", tolower(file_list)) + ) + + if (all(file_match == FALSE)) { + cli::cli_abort( + paste( + "The", + recent_year, + "code description file is not yet available. Please try a previous year." + ) + ) + } else { + file <- paste0("https://ftp.cdc.gov/", file_list[which(file_match)]) + file_ext <- tools::file_ext(file) + + file_idx <- which(file_ext == "zip") + + file_zip <- file[file_idx] + + temp_dir <- tempdir() + temp_file <- tempfile(tmpdir = temp_dir, fileext = ".zip") + + download_exit_status <- try( + download.file(file_zip, temp_file, quiet = quiet) + ) + + if (all(class(download_exit_status) == "try-error")) { + cli::cli_abort("Error in {.fn webscrape_icd}: ICD-10 webscrape failed. + FTP server is currently unresponsive.") + } + + file_name <- unzip(temp_file, list = TRUE) %>% + filter( + Name == paste0("Code Descriptions/icd10cm-codes-", file_year, ".txt") | Name == paste0("icd10cm-codes-", file_year, ".txt") + ) %>% + pull(Name) + + unzip(temp_file, files = file_name, exdir = temp_dir, overwrite = TRUE) + + file_path <- file.path(temp_dir, file_name) + + icd_dictionary <- fread(file_path, sep = "\t", header = FALSE) %>% + setnames(old = "V1", new = "code_combo") %>% + as.data.frame() %>% + mutate( + code_combo = str_replace_all(code_combo, "\\s{2,4}", "_"), + code_combo = str_replace_all(code_combo, "(?<=^[[:alnum:]]{7})\\s{1}", "_") + ) %>% + separate(code_combo, c("code", "description"), sep = "_") %>% + mutate( + code = str_squish(code), + description = str_squish(description), + set = paste("ICD-10", file_year) + ) + } + + } else { + file_year <- year + path <- paste0("pub/Health_Statistics/NCHS/Publications/ICD10CM/", file_year, "/") + file <- paste0("https://ftp.cdc.gov/", path, "icd10cm_codes_", file_year, ".txt") + + icd_dictionary <- fread(file, sep = "\t", header = FALSE) %>% + setnames(old = "V1", new = "code_combo") %>% + as.data.frame() %>% + mutate( + code_combo = str_replace_all(code_combo, "\\s{2,4}", "_"), + code_combo = str_replace_all(code_combo, "(?<=^[[:alnum:]]{7})\\s{1}", "_") + ) %>% + separate(code_combo, c("code", "description"), sep = "_") %>% + mutate( + code = str_squish(code), + description = str_squish(description), + set = paste("ICD-10", file_year) + ) + } + + return(icd_dictionary) + } else { + base_url <- "https://www.cms.gov/Medicare/Coding/ICD9ProviderDiagnosticCodes" + icd_file <- "Downloads/ICD-9-CM-v32-master-descriptions.zip" + cms_url <- file.path(base_url, icd_file) + + temp_dir <- tempdir() + temp_file <- tempfile(tmpdir = temp_dir, fileext = ".zip") + + download_exit_status <- try( + download.file(cms_url, temp_file, quiet = quiet) + ) + + if (all(class(download_exit_status) == "try-error")) { + cli::cli_abort("Error in {.fn webscrape_icd}: ICD-9 webscrape failed. + CMS website is currently unresponsive.") + } + + file_name <- unzip(temp_file, list = TRUE) %>% + filter(Name == "CMS32_DESC_LONG_DX.txt") %>% + pull(Name) + + unzip(temp_file, files = file_name, exdir = temp_dir, overwrite = TRUE) + + file_path <- file.path(temp_dir, file_name) + + file_year <- 2014 + + icd_dictionary <- fread(file_path, sep = "\t", header = FALSE) %>% + setnames(old = "V1", new = "code_combo") %>% + as.data.frame() %>% + mutate( + code_combo = str_replace_all(code_combo, "\\s{2,4}", "_"), + code_combo = str_replace_all(code_combo, "(?<=^[[:alnum:]]{4,7})\\s{1}", "_") + ) %>% + separate(code_combo, c("code", "description"), sep = "_") %>% + mutate( + code = str_squish(code), + description = str_squish(description), + set = paste("ICD-9", file_year) + ) + + return(icd_dictionary) + } +} diff --git a/data-raw/extending.R b/data-raw/extending.R new file mode 100644 index 0000000..53d894a --- /dev/null +++ b/data-raw/extending.R @@ -0,0 +1,80 @@ +appendixA <- appendix_A() |> + dplyr::rename( + drg_abb = ms, + drg_description = description + ) + +drg_expand <- appendix_B() |> + dplyr::count(drg, sort = TRUE) |> + dplyr::select(drg) |> + dplyr::filter(stringr::str_detect(drg, "-")) |> + tidyr::separate_wider_delim( + drg, + delim = "-", + names = c("start", "end"), + cols_remove = FALSE + ) |> + dplyr::mutate(start = as.integer(start), + end = as.integer(end)) |> + dplyr::rowwise() |> + dplyr::mutate(seq = list(c(start, end)), + full = list(tidyr::full_seq(seq, 1))) |> + dplyr::select(drg, full) |> + tidyr::unnest(full) |> + dplyr::mutate(full = stringr::str_pad(as.character(full), width = 3, pad = "0")) + +appendixB <- appendix_B() |> + dplyr::left_join(drg_expand, + by = c("drg" = "drg"), + relationship = "many-to-many") |> + dplyr::select( + icd_code, + mdc, + drg_range = drg, + drg = full + ) + + +app_ab <- appendixA |> + dplyr::left_join(appendixB, + by = dplyr::join_by(drg, mdc)) |> + dplyr::select( + icd_code, + mdc, + drg, + drg_abb, + drg_description, + drg_range + ) + +appendix_C()$apx_c |> + dplyr::summarise(max_length = max(nchar(code), na.rm = TRUE)) + +append_C <- appendix_C() + +apx_c <- append_C$apx_c |> + dplyr::mutate(code = remove_dot(code), + code = add_dot(code)) |> + dplyr::select( + icd_code = code, + cc_mcc = level, + pdx_group = pdx) + +pdx <- append_C$pdx |> + dplyr::mutate(code = remove_dot(code), + code = add_dot(code)) |> + dplyr::select( + pdx_icd = code, + pdx_group = pdx_collection + ) |> + tidyr::nest(pdx_icd = pdx_icd) + +app_c <- dplyr::left_join(apx_c, pdx, by = dplyr::join_by(pdx_group)) + + +app_abc <- app_ab |> + dplyr::filter(!is.na(icd_code)) |> + dplyr::left_join(app_c) + +app_abc |> + dplyr::filter(pdx_group == "0008") diff --git a/man/mount_board.Rd b/man/mount_board.Rd index 6949954..73402e9 100644 --- a/man/mount_board.Rd +++ b/man/mount_board.Rd @@ -7,7 +7,7 @@ mount_board(source = c("local", "remote")) } \arguments{ -\item{``}{string; whether source is \code{"local"} or \code{"remote"}} +\item{source}{\verb{} string; whether source is \code{"local"} or \code{"remote"}} } \value{ \verb{} if \code{source = "local"} or \verb{}