diff --git a/R/anlz_fibmatrix.R b/R/anlz_fibmatrix.R index f5ebf03b..91a7bc7f 100644 --- a/R/anlz_fibmatrix.R +++ b/R/anlz_fibmatrix.R @@ -3,7 +3,7 @@ #' Analyze Fecal Indicator Bacteria categories over time by station #' #' @param fibdata input data frame as returned by \code{\link{read_importfib}} or \code{\link{read_importentero}} -#' @param yrrng numeric vector indicating min, max years to include, defaults to range of years in data +#' @param yrrng numeric vector indicating min, max years to include, defaults to range of years in data, see details #' @param stas optional vector of stations to include, see details #' @param indic character for choice of fecal indicator. Allowable options are \code{fcolif} for fecal coliform, or \code{ecocci} for Enterococcus. A numeric column in the data frame must have this name. #' @param threshold optional numeric for threshold against which to calculate exceedances for the indicator bacteria of choice. If not provided, defaults to 400 for \code{fcolif} and 130 for \code{ecocci}. @@ -19,6 +19,8 @@ #' #' @details This function is used to create output for plotting a matrix stoplight graphic for FIB categories by station and year. Each station and year combination is categorized based on the likelihood of fecal indicator bacteria concentrations exceeding some threshold in a given year. For fecal coliform, the default threshold is 400 CFU / 100 mL in a given year (using Fecal Coliform, \code{fcolif} in \code{fibdata}). For Enterococcus, the default threshold is 130 CFU / 100 mL. The proportions are categorized as A, B, C, D, or E (Microbial Water Quality Assessment or MWQA categories) with corresponding colors, where the breakpoints for each category are <10\%, 10-30\%, 30-50\%, 50-75\%, and >75\% (right-closed). By default, the results for each year are based on a right-centered window that uses the previous two years and the current year to calculate probabilities using the monthly samples (\code{lagyr = 3}). See \code{\link{show_fibmatrix}} for additional details. #' +#' \code{yrrng} can be specified several ways. If \code{yrrng = NULL}, the year range of the data for the selected changes is chosen. User-defined values for the minimum and maximum years can also be used, or only a minimum or maximum can be specified, e.g., \code{yrrng = c(2000, 2010)} or \code{yrrng = c(2000, NA)}. In the latter case, the maximum year will be defined by the data. +#' #' @export #' #' @importFrom dplyr "%>%" @@ -81,18 +83,43 @@ anlz_fibmatrix <- function(fibdata, dplyr::filter(wetdry == subset_wetdry) } - - # get year range from data if not provided - if(is.null(yrrng)) - yrrng <- c(min(fibdata$yr, na.rm = T), max(fibdata$yr, na.rm = T)) - # if dealing with epchc data, make a simple 'station' column if(exists("epchc_station", fibdata)){fibdata$station <- fibdata$epchc_station} + # all stations if stas is NULL + if(is.null(stas)) + stas <- fibdata %>% + dplyr::pull(station) %>% + unique() %>% + sort() + + # check stations + chk <- stas %in% fibdata$station + if(any(!chk)) + stop('Station(s) not found in fibdata: ', paste(stas[!chk], collapse = ', ')) # make a generic column for the indicator fibdata$indic <- fibdata[[which(names(fibdata) == indic)]] + # get year range from data if not provided + if(any(is.na(yrrng)) | is.null(yrrng)){ + valyrs <- fibdata %>% + dplyr::filter(station %in% stas) %>% + dplyr::filter(!is.na(indic) | indic < 0) %>% + dplyr::pull(yr) %>% + range(na.rm = T) + + if(is.null(yrrng)) + yrrng <- valyrs + + if(is.na(yrrng[1])) + yrrng[1] <- valyrs[1] + + if(is.na(yrrng[2])) + yrrng[2] <- valyrs[2] + + } + # if threshold wasn't assigned, assign one based on the indicator of choice if(is.null(threshold)){ thrsh <- switch(indic, @@ -104,6 +131,7 @@ anlz_fibmatrix <- function(fibdata, # valid stations with sufficient data for lagyr stasval <- fibdata %>% + dplyr::filter(station %in% stas) %>% dplyr::filter(yr >= (yrrng[1] - (lagyr - 1)) & yr <= yrrng[2]) %>% dplyr::filter(!is.na(indic) | indic < 0) %>% dplyr::summarise( @@ -114,15 +142,6 @@ anlz_fibmatrix <- function(fibdata, dplyr::pull(station) %>% unique() - # all valid stations if stas is NULL - if(is.null(stas)) - stas <- stasval - - # check stations - chk <- stas %in% fibdata$station - if(any(!chk)) - stop('Station(s) not found in fibdata: ', paste(stas[!chk], collapse = ', ')) - chk <- !stas %in% stasval # check if some stations valid for lagyr diff --git a/man/anlz_fibmatrix.Rd b/man/anlz_fibmatrix.Rd index 4814cfc4..6e264a83 100644 --- a/man/anlz_fibmatrix.Rd +++ b/man/anlz_fibmatrix.Rd @@ -20,7 +20,7 @@ anlz_fibmatrix( \arguments{ \item{fibdata}{input data frame as returned by \code{\link{read_importfib}} or \code{\link{read_importentero}}} -\item{yrrng}{numeric vector indicating min, max years to include, defaults to range of years in data} +\item{yrrng}{numeric vector indicating min, max years to include, defaults to range of years in data, see details} \item{stas}{optional vector of stations to include, see details} @@ -46,6 +46,8 @@ Analyze Fecal Indicator Bacteria categories over time by station } \details{ This function is used to create output for plotting a matrix stoplight graphic for FIB categories by station and year. Each station and year combination is categorized based on the likelihood of fecal indicator bacteria concentrations exceeding some threshold in a given year. For fecal coliform, the default threshold is 400 CFU / 100 mL in a given year (using Fecal Coliform, \code{fcolif} in \code{fibdata}). For Enterococcus, the default threshold is 130 CFU / 100 mL. The proportions are categorized as A, B, C, D, or E (Microbial Water Quality Assessment or MWQA categories) with corresponding colors, where the breakpoints for each category are <10\%, 10-30\%, 30-50\%, 50-75\%, and >75\% (right-closed). By default, the results for each year are based on a right-centered window that uses the previous two years and the current year to calculate probabilities using the monthly samples (\code{lagyr = 3}). See \code{\link{show_fibmatrix}} for additional details. + +\code{yrrng} can be specified several ways. If \code{yrrng = NULL}, the year range of the data for the selected changes is chosen. User-defined values for the minimum and maximum years can also be used, or only a minimum or maximum can be specified, e.g., \code{yrrng = c(2000, 2010)} or \code{yrrng = c(2000, NA)}. In the latter case, the maximum year will be defined by the data. } \examples{ anlz_fibmatrix(fibdata, indic = 'fcolif') diff --git a/man/show_fibmatrix.Rd b/man/show_fibmatrix.Rd index 1b85980b..a760b4d3 100644 --- a/man/show_fibmatrix.Rd +++ b/man/show_fibmatrix.Rd @@ -30,7 +30,7 @@ show_fibmatrix( \arguments{ \item{fibdata}{input data frame as returned by \code{\link{read_importfib}} or \code{\link{read_importentero}}} -\item{yrrng}{numeric vector indicating min, max years to include, defaults to range of years in data} +\item{yrrng}{numeric vector indicating min, max years to include, defaults to range of years in data, see details} \item{stas}{optional vector of stations to include, see details} diff --git a/tests/testthat/test-anlz_fibmatrix.R b/tests/testthat/test-anlz_fibmatrix.R index 45b1e727..0ef71bf1 100644 --- a/tests/testthat/test-anlz_fibmatrix.R +++ b/tests/testthat/test-anlz_fibmatrix.R @@ -1,5 +1,5 @@ test_that("Checking anlz_fibmatrix tibble class", { - result <- anlz_fibmatrix(fibdata, indic = 'fcolif') + result <- expect_warning(anlz_fibmatrix(fibdata, indic = 'fcolif')) expect_is(result, 'tbl') }) @@ -19,7 +19,7 @@ test_that("Checking anlz_fibmatrix station error all insufficient data", { }) # Example data -fibdata <- data.frame( +fibdatatst <- data.frame( yr = rep(2000:2005, each = 3), epchc_station = rep(letters[1:3], times = 6), fcolif = runif(18, 0, 500), @@ -27,32 +27,32 @@ fibdata <- data.frame( ) test_that("anlz_fibmatrix returns correct structure", { - result <- anlz_fibmatrix(fibdata, indic = 'fcolif') + result <- anlz_fibmatrix(fibdatatst, indic = 'fcolif') expect_s3_class(result, "tbl_df") expect_true(all(c("yr", "station", "gmean", "cat") %in% names(result))) }) test_that("anlz_fibmatrix handles default parameters", { - result <- anlz_fibmatrix(fibdata, indic = 'fcolif') + result <- anlz_fibmatrix(fibdatatst, indic = 'fcolif') expect_true(nrow(result) > 0) expect_true(all(result$yr >= 2000 & result$yr <= 2005)) }) test_that("anlz_fibmatrix respects custom year range and stations", { - result <- anlz_fibmatrix(fibdata, indic = 'fcolif', yrrng = c(2001, 2004), stas = c("a", "b")) + result <- anlz_fibmatrix(fibdatatst, indic = 'fcolif', yrrng = c(2001, 2004), stas = c("a", "b")) expect_true(all(result$yr >= 2001 & result$yr <= 2004)) expect_true(all(result$station %in% c("a", "b"))) }) test_that("anlz_fibmatrix works with different indicators", { - result_fcolif <- anlz_fibmatrix(fibdata, indic = "fcolif") - result_ecocci <- anlz_fibmatrix(fibdata, indic = "ecocci") + result_fcolif <- anlz_fibmatrix(fibdatatst, indic = "fcolif") + result_ecocci <- anlz_fibmatrix(fibdatatst, indic = "ecocci") expect_true(nrow(result_fcolif) > 0) expect_true(nrow(result_ecocci) > 0) }) test_that("anlz_fibmatrix respects custom thresholds", { - result <- anlz_fibmatrix(fibdata, indic = 'fcolif', threshold = 200) + result <- anlz_fibmatrix(fibdatatst, indic = 'fcolif', threshold = 200) expect_true(nrow(result) > 0) })