Skip to content

Commit

Permalink
108 make output messages optional and reanimate caching
Browse files Browse the repository at this point in the history
* adding progress bar for loading

* trying to get caching/output option to work

* passing output through

* reviving caching

* fixing counting

* roxygen update

* adding quiet option

* checking cached file

* documenting caching functionality

* getting message working

* removing cutting edge arrow

* reverting change back to cran, too soon

* nope arrow github not working yet
  • Loading branch information
wcornwell authored Apr 23, 2024
1 parent a82ea31 commit 3cf5388
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 18 deletions.
2 changes: 2 additions & 0 deletions R/APCalign-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ utils::globalVariables(
"taxon_ID",
"taxon_ID_aligned",
"taxon_rank",
"txtProgressBar",
"setTxtProgressBar",
"taxonomic_status",
"taxonomic_status_aligned",
"taxonomic_status_genus",
Expand Down
30 changes: 20 additions & 10 deletions R/align_taxa.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,16 @@ align_taxa <- function(original_name,
.default = readr::col_character()
)
)

# TODO: check taxa_ raw has correct columns
correct_names <- c("original_name", "aligned_name", "accepted_name", "suggested_name",
"genus", "family", "taxon_rank", "taxonomic_dataset", "taxonomic_status",
"taxonomic_status_aligned", "aligned_reason", "update_reason",
"subclass", "taxon_distribution", "scientific_name", "taxon_ID",
"taxon_ID_genus", "scientific_name_ID", "canonical_name", "row_number",
"number_of_collapsed_taxa", "checked", "known")
if(!identical(names(taxa_raw), correct_names)) {
stop("Your output file already exists and it's not in the right format.
Please check that the file you are passing in to the output option.")
}
}
else {
taxa_raw <-
Expand Down Expand Up @@ -149,29 +157,29 @@ align_taxa <- function(original_name,
# take unique values so each name only processed once
dplyr::filter(!duplicated(original_name))

if (all(taxa$tocheck$checked)) {
if (all(taxa$tocheck$checked)|all(is.na(taxa$tocheck$checked))) {
message(" - all taxa are already checked, yay!")
return(invisible(taxa$tocheck))
}

# move all checked taxa to "checked"
taxa <- redistribute(taxa)

if (!is.null(output) && file.exists(output) && !all(taxa$tocheck$checked)) {
# check unknown taxa
message(
" -> ",
crayon::blue(sum(taxa$tocheck$known, na.rm = T)),
crayon::blue(sum(!is.na(taxa$checked$accepted_name), na.rm = T)),
" names already matched; ",
crayon::blue(sum(
taxa$tocheck$checked &
!taxa$tocheck$known,
is.na(taxa$checked$accepted_name),
na.rm = T
)),
" names checked but without a match; ",
crayon::blue(sum(!taxa$tocheck$checked)),
" names checked but without a species-level match; ",
crayon::blue(sum(!is.na(taxa$tocheck$original_name))),
" taxa yet to be checked"
)

}
# do the actual matching
taxa <-
match_taxa(taxa, resources, fuzzy_abs_dist, fuzzy_rel_dist, fuzzy_matches, imprecise_fuzzy_matches, APNI_matches, identifier) %>%
Expand Down Expand Up @@ -199,8 +207,10 @@ align_taxa <- function(original_name,
## save outputs to file, useful for caching results
if (!is.null(output)) {
dir.create(dirname(output), FALSE, TRUE)
taxa$checked<-TRUE
taxa$known<-!is.na(taxa$aligned_name)
readr::write_csv(taxa, output)
message(" - output saved in file: ", output)
#message(" - output saved in file: ", output)
}

return(taxa)
Expand Down
5 changes: 3 additions & 2 deletions R/create_taxonomic_update_lookup.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#' @param APNI_matches Name matches to the APNI (Australian Plant Names Index) are turned off as a default.
#' @param imprecise_fuzzy_matches Imprecise fuzzy matches are turned on as a default.
#' @param identifier A dataset, location or other identifier, which defaults to NA.
#' @param output file path to save the intermediate output to
#' @param output file path to save the output. If this file already exists, this function will check if it's a subset of the species passed in and try to add to this file. This can be useful for large and growing projects.
#' @return A lookup table containing the accepted and suggested names for each original name input, and additional taxonomic information such as taxon rank, taxonomic status, taxon IDs and genera.
#' - original_name: the original plant name.
#' - aligned_name: the input plant name that has been aligned to a taxon name in the APC or APNI by the align_taxa function.
Expand Down Expand Up @@ -66,7 +66,8 @@ create_taxonomic_update_lookup <- function(taxa,
align_taxa(taxa, resources = resources,
APNI_matches = APNI_matches,
identifier = identifier,
imprecise_fuzzy_matches = imprecise_fuzzy_matches)
imprecise_fuzzy_matches = imprecise_fuzzy_matches,
output=output)

updated_data <-
update_taxonomy(aligned_data,
Expand Down
25 changes: 21 additions & 4 deletions R/load_taxonomic_resources.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#' @param version The version number of the dataset to use. Defaults to the default version.
#'
#' @param reload A logical indicating whether to reload the dataset from the data source. Defaults to FALSE.
#'
#' @param quiet A logical indicating whether to print status of loading to screen. Defaults to FALSE.
#'
#' @return The taxonomic resources data loaded into the global environment.
#' @export
Expand All @@ -23,20 +25,29 @@
load_taxonomic_resources <-
function(stable_or_current_data = "stable",
version = default_version(),
reload = FALSE) {
message("Loading resources...", appendLF = FALSE)
on.exit(message("...done"))
reload = FALSE,
quiet = FALSE) {



taxonomic_resources <- dataset_access_function(
version = version,
path = tools::R_user_dir("APCalign"),
type = stable_or_current_data
)



total_steps <- 3 # Define how many steps you expect in the function
pb <- utils::txtProgressBar(min = 0, max = total_steps, style = 2)
if(!quiet){
message("Loading resources into memory...")
utils::setTxtProgressBar(pb, 1)
}
if(is.null(taxonomic_resources)) {
return(NULL)
}


# Give list names
names(taxonomic_resources) <- c("APC", "APNI")

Expand Down Expand Up @@ -167,6 +178,8 @@ load_taxonomic_resources <-
dplyr::filter(taxonomic_status != "accepted") %>%
dplyr::mutate(taxonomic_dataset = "APC")


if(!quiet) utils::setTxtProgressBar(pb, 2)
# Repeated from above - bionomial, tronomials etc
taxonomic_resources[["APNI names"]] <-
taxonomic_resources$APNI %>%
Expand Down Expand Up @@ -236,6 +249,7 @@ load_taxonomic_resources <-
dplyr::mutate(taxonomic_dataset = "APC") %>%
dplyr::distinct(canonical_name, .keep_all = TRUE)

if(!quiet) utils::setTxtProgressBar(pb, 3)
taxonomic_resources[["genera_APNI"]] <-
taxonomic_resources$APNI %>%
dplyr::select(
Expand Down Expand Up @@ -269,6 +283,8 @@ load_taxonomic_resources <-
taxonomic_resources$APC %>%
dplyr::filter(taxon_rank %in% c("family"), taxonomic_status == "accepted")

close(pb)
if(!quiet) message("...done")
return(taxonomic_resources)
}

Expand Down Expand Up @@ -460,6 +476,7 @@ dataset_get <- function(version = default_version(),
path_to_apni <- file.path(path, paste0("apni", version, ".parquet"))

APC <- if (!file.exists(path_to_apc)) {
message("Downloading...")
download_and_read_parquet(apc.url, path_to_apc)
} else {
arrow::read_parquet(path_to_apc)
Expand Down
2 changes: 2 additions & 0 deletions R/update_taxonomy.R
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ update_taxonomy <- function(aligned_data,
taxa_out <- taxa_out %>% dplyr::arrange(row_number)

if (!is.null(output)) {
taxa_out$checked<-TRUE
taxa_out$known<-!is.na(taxa_out$accepted_name)
readr::write_csv(taxa_out, output)
message(" - output saved in file: ", output)
}
Expand Down
2 changes: 1 addition & 1 deletion man/create_taxonomic_update_lookup.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/load_taxonomic_resources.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 3cf5388

Please sign in to comment.