From af28fcff37ef007fe9b32171c0596d5e28807f33 Mon Sep 17 00:00:00 2001 From: Martin Morgan Date: Sat, 14 Sep 2024 15:38:21 -0400 Subject: [PATCH 01/13] route all calls to `httr::GET()` to `api_get()` - reduces code duplication, facilitates changing GET --- R/SWORD.R | 10 ++++------ R/SWORD_dataset.R | 10 ++++------ R/dataset_versions.R | 5 ++--- R/dataverse_metadata.R | 5 ++--- R/dataverse_search.R | 5 ++--- R/get_dataset.R | 16 ++++++---------- R/get_dataverse.R | 10 ++++------ R/get_facets.R | 5 ++--- R/get_file_by_id.R | 14 ++++---------- R/get_file_metadata.R | 5 +---- R/native_role_groups.R | 10 ++++------ R/native_roles.R | 16 +++++----------- R/native_roles_assignments.R | 5 ++--- R/native_user.R | 5 ++--- R/utils.R | 20 +++++++++++++++----- 15 files changed, 59 insertions(+), 82 deletions(-) diff --git a/R/SWORD.R b/R/SWORD.R index c8f4a2e..acdb793 100644 --- a/R/SWORD.R +++ b/R/SWORD.R @@ -17,9 +17,8 @@ #' @export service_document <- function(key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) { u <- paste0(api_url(server, prefix = "dvn/api/"), "data-deposit/v1.1/swordv2/service-document") - r <- httr::GET(u, httr::authenticate(key, ""), ...) - httr::stop_for_status(r) - x <- xml2::as_list(xml2::read_xml(httr::content(r, "text"))) + r <- api_get(u, httr::authenticate(key, ""), ...) + x <- xml2::as_list(xml2::read_xml(r)) w <- x$workspace out <- list() if ("title" %in% names(w)) { @@ -74,11 +73,10 @@ list_datasets <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server = dataverse <- get_dataverse(dataverse, key = key, server = server, ...)$alias } u <- paste0(api_url(server, prefix = "dvn/api/"), "data-deposit/v1.1/swordv2/collection/dataverse/", dataverse) - r <- httr::GET(u, httr::authenticate(key, ""), ...) - httr::stop_for_status(r) + r <- api_get(u, httr::authenticate(key, ""), ..., as = "raw") # clean up response structure - x <- xml2::as_list(xml2::read_xml(r$content)) + x <- xml2::as_list(xml2::read_xml(r)) feed <- x[["feed"]] out <- list(title = feed[["title"]][[1L]], generator = feed[["generator"]], diff --git a/R/SWORD_dataset.R b/R/SWORD_dataset.R index c5209fc..dcd859f 100644 --- a/R/SWORD_dataset.R +++ b/R/SWORD_dataset.R @@ -205,9 +205,8 @@ dataset_atom <- function(dataset, key = Sys.getenv("DATAVERSE_KEY"), server = Sy u <- paste0(api_url(server, prefix="dvn/api/"), "data-deposit/v1.1/swordv2/edit/study/", dataset) } - r <- httr::GET(u, httr::authenticate(key, ""), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - out <- parse_atom(rawToChar(r$content)) + r <- api_get(u, httr::authenticate(key, ""), ..., as = "raw") + out <- parse_atom(rawToChar(r)) out } @@ -229,7 +228,6 @@ dataset_statement <- function(dataset, key = Sys.getenv("DATAVERSE_KEY"), server dataset <- prepend_doi(dataset) u <- paste0(api_url(server, prefix="dvn/api/"), "data-deposit/v1.1/swordv2/statement/study/", dataset) } - r <- httr::GET(u, httr::authenticate(key, ""), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - parse_dataset_statement(rawToChar(r$content)) + r <- api_get(u, httr::authenticate(key, ""), ..., as = "raw") + parse_dataset_statement(rawToChar(r)) } diff --git a/R/dataset_versions.R b/R/dataset_versions.R index 17b59c8..31b82c3 100644 --- a/R/dataset_versions.R +++ b/R/dataset_versions.R @@ -20,9 +20,8 @@ dataset_versions <- function(dataset, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) { dataset <- dataset_id(dataset, key = key, server = server, ...) u <- paste0(api_url(server), "datasets/", dataset, "/versions") - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - out <- httr::content(r, encoding = "UTF-8")$data + r <- api_get(u, ..., key = key, as = NULL) + out <- r$data lapply(out, function(x) { x <- `class<-`(x, "dataverse_dataset_version") x$files <- lapply(x$files, `class<-`, "dataverse_file") diff --git a/R/dataverse_metadata.R b/R/dataverse_metadata.R index bbf54be..6cd63a6 100644 --- a/R/dataverse_metadata.R +++ b/R/dataverse_metadata.R @@ -18,9 +18,8 @@ dataverse_metadata <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) { dataverse <- dataverse_id(dataverse, key = key, server = server, ...) u <- paste0(api_url(server), "dataverses/", dataverse, "/metadatablocks") - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE)$data + r <- api_get(u, ..., key = key) + jsonlite::fromJSON(r, simplifyDataFrame = FALSE)$data } #' @title Set Dataverse metadata diff --git a/R/dataverse_search.R b/R/dataverse_search.R index 8f72d27..4d70439 100644 --- a/R/dataverse_search.R +++ b/R/dataverse_search.R @@ -103,9 +103,8 @@ function(..., u <- paste0(api_url(server), "search") # execute request - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), query = query) - httr::stop_for_status(r, task = httr::content(r)$message) - out <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8")) + r <- api_get(u, query = query, key = key) + out <- jsonlite::fromJSON(r) if (isTRUE(verbose)) { n_total <- ngettext(out$data$total_count, "result", "results") message(sprintf(paste0("%s of %s ", n_total, " retrieved"), out$data$count_in_response, out$data$total_count)) diff --git a/R/get_dataset.R b/R/get_dataset.R index 61e5351..ba24907 100644 --- a/R/get_dataset.R +++ b/R/get_dataset.R @@ -53,9 +53,8 @@ get_dataset <- function( } else { u <- paste0(api_url(server), "datasets/", dataset) } - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - parse_dataset(httr::content(r, as = "text", encoding = "UTF-8")) + r <- api_get(u, ..., key = key) + parse_dataset(r) } #' @rdname get_dataset @@ -79,10 +78,8 @@ dataset_metadata <- function( u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/metadata") } - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - out <- httr::content(r, as = "text", encoding = "UTF-8") - jsonlite::fromJSON(out)[["data"]] + r <- api_get(u, ..., key = key) + jsonlite::fromJSON(r)[["data"]] } #' @rdname get_dataset @@ -96,8 +93,7 @@ dataset_files <- function( ) { dataset <- dataset_id(dataset, key = key, server = server, ...) u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/files") - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - out <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE)$data + r <- api_get(u, ..., key = key) + out <- jsonlite::fromJSON(r, simplifyDataFrame = FALSE)$data structure(lapply(out, `class<-`, "dataverse_file")) } diff --git a/R/get_dataverse.R b/R/get_dataverse.R index ff51744..d0e7c4c 100644 --- a/R/get_dataverse.R +++ b/R/get_dataverse.R @@ -31,9 +31,8 @@ get_dataverse <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server = dataverse <- dataverse_id(dataverse, key = key, server = server, ...) } u <- paste0(api_url(server), "dataverses/", dataverse) - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - out <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8")) + r <- api_get(u, ..., key = key) + out <- jsonlite::fromJSON(r) structure(out$data, class = "dataverse") } @@ -42,9 +41,8 @@ get_dataverse <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server = dataverse_contents <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) { dataverse <- dataverse_id(dataverse, key = key, server = server, ...) u <- paste0(api_url(server), "dataverses/", dataverse, "/contents") - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - out <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE) + r <- api_get(u, ..., key = key) + out <- jsonlite::fromJSON(r, simplifyDataFrame = FALSE) structure(lapply(out$data, function(x) { `class<-`(x, if (x$type == "dataset") "dataverse_dataset" else "dataverse") })) diff --git a/R/get_facets.R b/R/get_facets.R index eca29b2..b10d75a 100644 --- a/R/get_facets.R +++ b/R/get_facets.R @@ -20,7 +20,6 @@ get_facets <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) { dataverse <- dataverse_id(dataverse, key = key, server = server, ...) u <- paste0(api_url(server), "dataverses/", dataverse, "/facets") - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"))$data + r <- api_get(u, ..., key = key) + jsonlite::fromJSON(r)$data } diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R index 3c24c85..cf00c4d 100644 --- a/R/get_file_by_id.R +++ b/R/get_file_by_id.R @@ -98,16 +98,10 @@ get_file_by_id <- function( if (return_url) { return(httr::modify_url(u, query = query)) } - if (isFALSE(progress)) - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), query = query, ...) - - if (isTRUE(progress)) - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), query = query, httr::progress(type = "down"), ...) - - - - httr::stop_for_status(r, task = httr::content(r)$message) - httr::content(r, as = "raw") + # add a progress bar; 'NULL' if progress is not TRUE. 'NULL' arguments + # are not seen by httr::GET() + progress_bar <- if (isTRUE(progress)) httr::progress(type = "down") + api_get(u, query = query, progress_bar, ..., key = key, as = "raw") } #' @rdname files diff --git a/R/get_file_metadata.R b/R/get_file_metadata.R index 013d430..72bd6c1 100644 --- a/R/get_file_metadata.R +++ b/R/get_file_metadata.R @@ -48,8 +48,5 @@ get_file_metadata <- u <- paste0(api_url(server), "access/datafile/", file, "/metadata/", format) } - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - out <- httr::content(r, as = "text", encoding = "UTF-8") - return(out) + api_get(u, ..., key = key) } diff --git a/R/native_role_groups.R b/R/native_role_groups.R index c1b0169..465cf20 100644 --- a/R/native_role_groups.R +++ b/R/native_role_groups.R @@ -79,9 +79,8 @@ update_group <- function(group, name, description, dataverse, key = Sys.getenv(" list_groups <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) { dataverse <- dataverse_id(dataverse, key = key, server = server, ...) u <- paste0(api_url(server), "dataverses/", dataverse, "/groups") - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - j <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE)$data + r <- api_get(u, ..., key = key) + j <- jsonlite::fromJSON(r, simplifyDataFrame = FALSE)$data lapply(j, function(x) { x$dataverse <- dataverse class(x) <- "dataverse_group" @@ -99,9 +98,8 @@ get_group <- function(group, dataverse, key = Sys.getenv("DATAVERSE_KEY"), serve dataverse <- dataverse_id(dataverse, key = key, server = server, ...) u <- paste0(api_url(server), "dataverses/", dataverse, "/groups/", group) } - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - j <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"))$data + r <- api_get(u, ..., key = key) + j <- jsonlite::fromJSON(r)$data j$dataverse <- dataverse structure(j, class = "dataverse_group") } diff --git a/R/native_roles.R b/R/native_roles.R index a4a895c..b1d5a11 100644 --- a/R/native_roles.R +++ b/R/native_roles.R @@ -21,9 +21,8 @@ # @export get_role <- function(role, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) { u <- paste0(api_url(server), "roles/", role) - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - j <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"))$data + r <- api_get(u, ..., key) + j <- jsonlite::fromJSON(r)$data j } @@ -43,17 +42,12 @@ list_roles <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server = Sy if (!missing(dataverse)) { dataverse <- dataverse_id(dataverse, key = key, server = server, ...) u <- paste0(api_url(server), "dataverses/", dataverse, "/roles") - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - out <- jsonlite::fromJSON(httr::content(r, "text", encoding = "UTF-8"))$data - structure(lapply(out, `class<-`, "dataverse_role")) } else { u <- paste0(api_url(server), "admin/roles") - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - out <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"))$data - structure(lapply(out, `class<-`, "dataverse_role")) } + r <- api_get(u, ..., key = key) + out <- jsonlite::fromJSON(r)$data + structure(lapply(out, `class<-`, "dataverse_role")) } # @rdname roles diff --git a/R/native_roles_assignments.R b/R/native_roles_assignments.R index cfc6d87..99e042e 100644 --- a/R/native_roles_assignments.R +++ b/R/native_roles_assignments.R @@ -19,9 +19,8 @@ get_assignments <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) { dataverse <- dataverse_id(dataverse, key = key, server = server, ...) u <- paste0(api_url(server), "dataverses/", dataverse, "/assignments") - r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...) - httr::stop_for_status(r, task = httr::content(r)$message) - out <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE)$data + r <- api_get(u, ..., key = key) + out <- jsonlite::fromJSON(r, simplifyDataFrame = FALSE)$data lapply(out, function(x) { x$dataverse <- dataverse class(x) <- "dataverse_role_assignment" diff --git a/R/native_user.R b/R/native_user.R index 59f160e..bbb5399 100644 --- a/R/native_user.R +++ b/R/native_user.R @@ -39,8 +39,7 @@ create_user <- function(password, key = Sys.getenv("DATAVERSE_KEY"), server = Sy #' @export get_user_key <- function(user, password, server = Sys.getenv("DATAVERSE_SERVER"), ...) { u <- paste0(api_url(server), "builtin-users/", user, "/api-token?password=", password) - r <- httr::GET(u, ...) - httr::stop_for_status(r, task = httr::content(r)$message) - j <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8")) + r <- api_get(u, ...) + j <- jsonlite::fromJSON(r) j$data$message } diff --git a/R/utils.R b/R/utils.R index 27e613a..e411e60 100644 --- a/R/utils.R +++ b/R/utils.R @@ -26,11 +26,12 @@ dataset_id.default <- function(x, ...) { dataset_id.character <- function(x, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) { x <- prepend_doi(x) u <- paste0(api_url(server), "datasets/:persistentId?persistentId=", x) - r <- tryCatch(httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...), - error = function(e) { - stop("Could not retrieve Dataset ID from persistent identifier!") - }) - jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"))[["data"]][["id"]] + r <- tryCatch({ + api_get(u, ..., key = key) + }, error = function(e) { + stop("Could not retrieve Dataset ID from persistent identifier!") + }) + jsonlite::fromJSON(r)[["data"]][["id"]] } #' @export dataset_id.dataverse_dataset <- function(x, ...) { @@ -204,6 +205,15 @@ api_url <- function(server = Sys.getenv("DATAVERSE_SERVER"), prefix = "api/") { return(paste0("https://", domain, "/", prefix)) } +## common httr::GET() uses +api_get <- function(url, ..., key = NULL, as = "text") { + if (!is.null(key)) + key <- httr::add_headers("X-Dataverse-key", key) + r <- httr::GET(url, ..., key) + httr::stop_for_status(r, task = httr::content(r)$message) + httr::content(r, as = as, encoding = "UTF-8") +} + # parse dataset response into list/dataframe parse_dataset <- function(out) { out <- jsonlite::fromJSON(out)$data From 197e48bdcbdf1b56c7798566a0d69ec9e093f4f0 Mon Sep 17 00:00:00 2001 From: Martin Morgan Date: Sat, 14 Sep 2024 17:37:59 -0400 Subject: [PATCH 02/13] implement disk caching of API calls using memoise::memoise() - caching enabled by default, for 30 days - add `use_cache = FALSE` to most dataverse functions to temporarily disable use - set environment variable DATAVERSE_USE_CACHE to FALSE to disable across function calls --- DESCRIPTION | 3 +++ NAMESPACE | 4 ++++ R/onload.R | 29 +++++++++++++++++++++++++---- R/utils.R | 30 ++++++++++++++++++++++++------ man-roxygen/dots.R | 9 ++++++--- man/URLs.Rd | 8 +++++--- man/add_dataset_file.Rd | 8 +++++--- man/add_file.Rd | 8 +++++--- man/create_dataset.Rd | 8 +++++--- man/create_dataverse.Rd | 8 +++++--- man/dataset_atom.Rd | 8 +++++--- man/dataset_versions.Rd | 8 +++++--- man/dataverse_metadata.Rd | 8 +++++--- man/delete_dataset.Rd | 8 +++++--- man/delete_dataverse.Rd | 8 +++++--- man/delete_file.Rd | 8 +++++--- man/delete_sword_dataset.Rd | 8 +++++--- man/files.Rd | 8 +++++--- man/get_dataset.Rd | 8 +++++--- man/get_dataverse.Rd | 8 +++++--- man/get_facets.Rd | 8 +++++--- man/get_file_metadata.Rd | 8 +++++--- man/get_user_key.Rd | 8 +++++--- man/initiate_sword_dataset.Rd | 8 +++++--- man/list_datasets.Rd | 8 +++++--- man/publish_dataset.Rd | 8 +++++--- man/publish_dataverse.Rd | 8 +++++--- man/publish_sword_dataset.Rd | 8 +++++--- man/service_document.Rd | 8 +++++--- man/set_dataverse_metadata.Rd | 8 +++++--- tests/testthat.R | 7 +++++-- 31 files changed, 192 insertions(+), 90 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b130c0e..5282a3c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,10 +41,13 @@ Authors@R: Imports: checkmate, httr, + memoise, + cachem, jsonlite, readr, stats, utils, + tools, xml2 Suggests: covr, diff --git a/NAMESPACE b/NAMESPACE index 32d841a..a24a77f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -63,3 +63,7 @@ export(service_document) export(set_dataverse_metadata) export(update_dataset) export(update_dataset_file) +importFrom(cachem,cache_disk) +importFrom(checkmate,assert_character) +importFrom(checkmate,assert_logical) +importFrom(memoise,memoise) diff --git a/R/onload.R b/R/onload.R index 48de84a..e0fbc38 100644 --- a/R/onload.R +++ b/R/onload.R @@ -1,6 +1,27 @@ +#' @importFrom cachem cache_disk +#' +#' @importFrom memoise memoise .onLoad <- function(libname, pkgname) { - # a <- Sys.getenv("DATAVERSE_SERVER") - # if(a == "") { - # Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") - # } + # a <- Sys.getenv("DATAVERSE_SERVER") + # if(a == "") { + # Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") + # } + + ## implement API disk cache via 'memoise' + cache_directory <- file.path( + tools::R_user_dir(pkgname, "cache"), + "api_cache" + ) + get <- api_get_impl + if (!dir.exists(cache_directory)) { + status <- dir.create(cache_directory, recursive = TRUE) + if (!status) + warning("'dataverse' failed to create API cache") + } + if (dir.exists(cache_directory)) { + # disk cache with max age 30 days + cache <- cache_disk(cache_directory, max_age = 60 * 60 * 24 * 30) + get <- memoise(get, cache = cache) + } + api_get_memoized <<- get } diff --git a/R/utils.R b/R/utils.R index e411e60..dc62a13 100644 --- a/R/utils.R +++ b/R/utils.R @@ -206,14 +206,32 @@ api_url <- function(server = Sys.getenv("DATAVERSE_SERVER"), prefix = "api/") { } ## common httr::GET() uses -api_get <- function(url, ..., key = NULL, as = "text") { - if (!is.null(key)) - key <- httr::add_headers("X-Dataverse-key", key) - r <- httr::GET(url, ..., key) - httr::stop_for_status(r, task = httr::content(r)$message) - httr::content(r, as = as, encoding = "UTF-8") +#' @importFrom checkmate assert_character assert_logical +api_get <- function(url, ..., key = NULL, as = "text", use_cache = as.logical(Sys.getenv("DATAVERSE_USE_CACHE", TRUE))) { + assert_character(url, any.missing = FALSE, len = 1L, null.ok = TRUE) + assert_character(key, any.missing = FALSE, len = 1L, null.ok = TRUE) + assert_character(as, any.missing = FALSE, len = 1L, null.ok = TRUE) + assert_logical(use_cache, any.missing = FALSE, len = 1L) + if (use_cache) { + get <- api_get_memoized + } else { + get <- api_get_impl + } + get(url, ..., key = key, as = as) } +## cache implemented via memoization; memoized function defined in +## .onLoad() +api_get_impl <- function(url, ..., key = NULL, as = "text") { + if (!is.null(key)) + key <- httr::add_headers("X-Dataverse-key", key) + r <- httr::GET(url, ..., key) + httr::stop_for_status(r, task = httr::content(r)$message) + httr::content(r, as = as, encoding = "UTF-8") +} + +api_get_memoized <- NULL + # parse dataset response into list/dataframe parse_dataset <- function(out) { out <- jsonlite::fromJSON(out)$data diff --git a/man-roxygen/dots.R b/man-roxygen/dots.R index cf74d0d..38907cc 100644 --- a/man-roxygen/dots.R +++ b/man-roxygen/dots.R @@ -1,3 +1,6 @@ -#' @param ... Additional arguments passed to an HTTP request function, such as -#' \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -#' \code{\link[httr]{DELETE}}. +#' @param ... Additional arguments passed to an HTTP request function, +#' such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +#' \code{\link[httr]{DELETE}}. By default, HTTP requests use +#' values cached from previous identical calls. Use +#' \code{use_cache=FALSE} (or `Sys.setenv(DATAVERSE_USE_CACHE = +#' FALSE)` if cached API calls are not desired. diff --git a/man/URLs.Rd b/man/URLs.Rd index aaa67d1..947f80f 100644 --- a/man/URLs.Rd +++ b/man/URLs.Rd @@ -85,9 +85,11 @@ available, download the original version instead of the ingested? If there was no ingested version, is set to NA. Note in \verb{get_dataframe_*}, \code{original} is set to FALSE by default. Either can be changed.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} \item{filename}{Filename of the dataset, with file extension as shown in Dataverse (for example, if nlsw88.dta was the original but is displayed as the ingested diff --git a/man/add_dataset_file.Rd b/man/add_dataset_file.Rd index 8e9fa05..0d9d16a 100644 --- a/man/add_dataset_file.Rd +++ b/man/add_dataset_file.Rd @@ -47,9 +47,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} \item{id}{An integer specifying a file identifier; or, if \code{doi} is specified, a character string specifying a file name within the DOI-identified dataset; or an diff --git a/man/add_file.Rd b/man/add_file.Rd index 64e5670..fdee650 100644 --- a/man/add_file.Rd +++ b/man/add_file.Rd @@ -30,9 +30,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ An object of class \dQuote{dataset_atom}. diff --git a/man/create_dataset.Rd b/man/create_dataset.Rd index 51510ab..8fb1525 100644 --- a/man/create_dataset.Rd +++ b/man/create_dataset.Rd @@ -39,9 +39,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} \item{dataset}{A character specifying a persistent identification ID for a dataset, for example \code{"10.70122/FK2/HXJVJU"}. Alternatively, an object of class diff --git a/man/create_dataverse.Rd b/man/create_dataverse.Rd index 7b59cd9..83bb08f 100644 --- a/man/create_dataverse.Rd +++ b/man/create_dataverse.Rd @@ -27,9 +27,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A list. diff --git a/man/dataset_atom.Rd b/man/dataset_atom.Rd index 605994a..5e26711 100644 --- a/man/dataset_atom.Rd +++ b/man/dataset_atom.Rd @@ -35,9 +35,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A list. For \code{dataset_atom}, an object of class \dQuote{dataset_atom}. diff --git a/man/dataset_versions.Rd b/man/dataset_versions.Rd index 5cea74d..99731b5 100644 --- a/man/dataset_versions.Rd +++ b/man/dataset_versions.Rd @@ -29,9 +29,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A list of class \dQuote{dataverse_dataset_version}. diff --git a/man/dataverse_metadata.Rd b/man/dataverse_metadata.Rd index c6ae364..4656e0a 100644 --- a/man/dataverse_metadata.Rd +++ b/man/dataverse_metadata.Rd @@ -27,9 +27,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A list diff --git a/man/delete_dataset.Rd b/man/delete_dataset.Rd index c5ae98f..f3b8d5d 100644 --- a/man/delete_dataset.Rd +++ b/man/delete_dataset.Rd @@ -29,9 +29,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A logical. diff --git a/man/delete_dataverse.Rd b/man/delete_dataverse.Rd index 132c693..aefc0ca 100644 --- a/man/delete_dataverse.Rd +++ b/man/delete_dataverse.Rd @@ -27,9 +27,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A logical. diff --git a/man/delete_file.Rd b/man/delete_file.Rd index 839bfcd..bc2ddcc 100644 --- a/man/delete_file.Rd +++ b/man/delete_file.Rd @@ -27,9 +27,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ If successful, a logical \code{TRUE}, else possibly some information. diff --git a/man/delete_sword_dataset.Rd b/man/delete_sword_dataset.Rd index da5eee2..e9d0c66 100644 --- a/man/delete_sword_dataset.Rd +++ b/man/delete_sword_dataset.Rd @@ -27,9 +27,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ If successful, a logical \code{TRUE}, else possibly some information. diff --git a/man/files.Rd b/man/files.Rd index 13741aa..a72e861 100644 --- a/man/files.Rd +++ b/man/files.Rd @@ -99,9 +99,11 @@ available, download the original version instead of the ingested? If there was no ingested version, is set to NA. Note in \verb{get_dataframe_*}, \code{original} is set to FALSE by default. Either can be changed.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} \item{filename}{Filename of the dataset, with file extension as shown in Dataverse (for example, if nlsw88.dta was the original but is displayed as the ingested diff --git a/man/get_dataset.Rd b/man/get_dataset.Rd index cab1c56..8c74c1d 100644 --- a/man/get_dataset.Rd +++ b/man/get_dataset.Rd @@ -51,9 +51,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} \item{block}{A character string specifying a metadata block to retrieve. By default this is \dQuote{citation}. Other values may be available, depending diff --git a/man/get_dataverse.Rd b/man/get_dataverse.Rd index d984700..485c73d 100644 --- a/man/get_dataverse.Rd +++ b/man/get_dataverse.Rd @@ -38,9 +38,11 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{check}{A logical indicating whether to check that the value of \code{dataverse} is actually a numeric} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A list of class \dQuote{dataverse}. diff --git a/man/get_facets.Rd b/man/get_facets.Rd index 54aadcb..546a8f2 100644 --- a/man/get_facets.Rd +++ b/man/get_facets.Rd @@ -27,9 +27,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A list. diff --git a/man/get_file_metadata.Rd b/man/get_file_metadata.Rd index 4ec313d..283dce0 100644 --- a/man/get_file_metadata.Rd +++ b/man/get_file_metadata.Rd @@ -40,9 +40,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A character vector containing a DDI diff --git a/man/get_user_key.Rd b/man/get_user_key.Rd index aac73cc..81d26fc 100644 --- a/man/get_user_key.Rd +++ b/man/get_user_key.Rd @@ -13,9 +13,11 @@ get_user_key(user, password, server = Sys.getenv("DATAVERSE_SERVER"), ...) \item{server}{The Dataverse instance. See \code{get_file}.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A list. diff --git a/man/initiate_sword_dataset.Rd b/man/initiate_sword_dataset.Rd index 13893f0..19c62eb 100644 --- a/man/initiate_sword_dataset.Rd +++ b/man/initiate_sword_dataset.Rd @@ -30,9 +30,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ An object of class \dQuote{dataset_atom}. diff --git a/man/list_datasets.Rd b/man/list_datasets.Rd index f8f6c08..0882a89 100644 --- a/man/list_datasets.Rd +++ b/man/list_datasets.Rd @@ -27,9 +27,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A list. diff --git a/man/publish_dataset.Rd b/man/publish_dataset.Rd index 2b7f27b..9644a98 100644 --- a/man/publish_dataset.Rd +++ b/man/publish_dataset.Rd @@ -32,9 +32,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A list. diff --git a/man/publish_dataverse.Rd b/man/publish_dataverse.Rd index fb229ae..ac69533 100644 --- a/man/publish_dataverse.Rd +++ b/man/publish_dataverse.Rd @@ -27,9 +27,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A list. diff --git a/man/publish_sword_dataset.Rd b/man/publish_sword_dataset.Rd index e53e03b..e9c0b81 100644 --- a/man/publish_sword_dataset.Rd +++ b/man/publish_sword_dataset.Rd @@ -27,9 +27,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A list. diff --git a/man/service_document.Rd b/man/service_document.Rd index 3cfac1e..e753edd 100644 --- a/man/service_document.Rd +++ b/man/service_document.Rd @@ -24,9 +24,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A list of class \dQuote{sword_service_document}, possibly with one or more \dQuote{sword_collection} entries. The latter are SWORD representations of a Dataverse. These can be passed to other SWORD API functions, e.g., for creating a new dataset. diff --git a/man/set_dataverse_metadata.Rd b/man/set_dataverse_metadata.Rd index 6a25f3c..de8620f 100644 --- a/man/set_dataverse_metadata.Rd +++ b/man/set_dataverse_metadata.Rd @@ -33,9 +33,11 @@ be set as a default via an environment variable. To set a default, run or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} -\item{...}{Additional arguments passed to an HTTP request function, such as -\code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}.} +\item{...}{Additional arguments passed to an HTTP request function, +such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or +\code{\link[httr]{DELETE}}. By default, HTTP requests use +values cached from previous identical calls. Use +\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} } \value{ A list diff --git a/tests/testthat.R b/tests/testthat.R index a5ae096..c520ba2 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -14,8 +14,11 @@ if (!requireNamespace("yaml", quietly = TRUE)) { config <- yaml::read_yaml(system.file("constants.yml", package = "dataverse")) # config <- yaml::read_yaml("inst/constants.yml") - Sys.setenv("DATAVERSE_SERVER" = config$server) - Sys.setenv("DATAVERSE_KEY" = config$api_token) + Sys.setenv( + DATAVERSE_SERVER = config$server, + DATAVERSE_KEY = config$api_token, + DATAVERSE_USE_CACHE = FALSE + ) # To better identify the source of problems, check if the token is expired. # This check *should* be unnecessary on CRAN, since not CRAN tests should From f0e46a8692953009dccf884615c7370cf5ff674d Mon Sep 17 00:00:00 2001 From: Martin Morgan Date: Sun, 15 Sep 2024 11:37:21 -0400 Subject: [PATCH 03/13] update DESCRIPTION and NEWS files --- DESCRIPTION | 7 ++++++- NEWS.md | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5282a3c..7eb58e2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,7 +37,12 @@ Authors@R: family = "Gruber", role = c("ctb"), email = "JohannesB.Gruber@gmail.com", - comment = c(ORCID = "0000-0001-9177-1772"))) + comment = c(ORCID = "0000-0001-9177-1772")), + person(given = "Martin", + family = "Morgan", + role = "ctb", + email = "mtmorgan.xyz@gmail.com" + comment = c(ORCID = "0000-0002-5874-8148"))) Imports: checkmate, httr, diff --git a/NEWS.md b/NEWS.md index 50b86d9..0e4cdfe 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ # CHANGES in dataverse 0.3.14 +* Implement a simple cache for API (including file download) calls, improving performance and reliability (#112, #135) * Improve recommendation for rdata loading (#107, #127) * `get_file_by_*()` can now return the download URL to be used in external functions or programs, useful for large files (#128, implemented in #129 @JBGruber and @kuriwaki) * Removes remote resource from vignette and move them to ghactions (#131) From d23cbd3bb138c8a4b05f752e24fa1315681e5e7a Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Tue, 17 Sep 2024 21:39:14 -0400 Subject: [PATCH 04/13] Add missing comma in DESCRIPTION author attribute --- DESCRIPTION | 2 +- man/dataverse.Rd | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7eb58e2..a023c5b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,7 +41,7 @@ Authors@R: person(given = "Martin", family = "Morgan", role = "ctb", - email = "mtmorgan.xyz@gmail.com" + email = "mtmorgan.xyz@gmail.com", comment = c(ORCID = "0000-0002-5874-8148"))) Imports: checkmate, diff --git a/man/dataverse.Rd b/man/dataverse.Rd index a35185c..915e33a 100644 --- a/man/dataverse.Rd +++ b/man/dataverse.Rd @@ -60,6 +60,7 @@ Other contributors: \item Jan Kanis [contributor] \item Edward Jee [contributor] \item Johannes Gruber \email{JohannesB.Gruber@gmail.com} (\href{https://orcid.org/0000-0001-9177-1772}{ORCID}) [contributor] + \item Martin Morgan \email{mtmorgan.xyz@gmail.com} (\href{https://orcid.org/0000-0002-5874-8148}{ORCID}) [contributor] } } From af5a490808615f23ec77725cf871f4223e4cca0d Mon Sep 17 00:00:00 2001 From: Martin Morgan Date: Tue, 15 Oct 2024 09:59:59 -0400 Subject: [PATCH 05/13] implement cache management - 'disk', 'session' or 'none' options, see ?use_cache - cache_dataset() to select 'disk' for versioned datasets, 'none' for ':latest' etc - cache_path(), cache_info(), cache_reset() for management by user - assert_use_cache() for argument validation --- NAMESPACE | 9 +++- R/cache.R | 93 ++++++++++++++++++++++++++++++++++++ man/cache.Rd | 56 ++++++++++++++++++++++ tests/testthat/tests-cache.R | 25 ++++++++++ 4 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 R/cache.R create mode 100644 man/cache.Rd create mode 100644 tests/testthat/tests-cache.R diff --git a/NAMESPACE b/NAMESPACE index a24a77f..13af94f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,10 @@ S3method(print,get_file) S3method(print,sword_service_document) export(add_dataset_file) export(add_file) +export(cache_dataset) +export(cache_info) +export(cache_path) +export(cache_reset) export(create_dataset) export(create_dataverse) export(dataset_atom) @@ -64,6 +68,7 @@ export(set_dataverse_metadata) export(update_dataset) export(update_dataset_file) importFrom(cachem,cache_disk) -importFrom(checkmate,assert_character) -importFrom(checkmate,assert_logical) +importFrom(checkmate,assert_string) +importFrom(checkmate,test_string) importFrom(memoise,memoise) +importFrom(tools,R_user_dir) diff --git a/R/cache.R b/R/cache.R new file mode 100644 index 0000000..c26949d --- /dev/null +++ b/R/cache.R @@ -0,0 +1,93 @@ +#' @rdname cache +#' @aliases use_cache +#' @title Utilities for cache management +#' @description The dataverse package uses disk and session caches to improve network performance. Use of the cache is described on this page. +#' @details +#' Use of the cache is determined by the value of the `use_cache =` argument to dataset and other API calls, or by the environment variable `DATAVERSE_USE_CACHE`. Possible values are +#' +#' - `"none"`: do not use the cache. This is the default for datasets that are versioned with `":draft"`, `":latest"`, and `":latest-published"`. +#' - `"session"`: cache API requests for the duration of the *R* session. This is the default for API calls that do not involve file or dataset retrieval. +#' - `"disk": use a permanent disk cache. This is the default for files and explicitly versioned datasets. +#' +#' @template version +#' @details +#' `cache_dataset()` determines whether a dataset or file should be cached based on the version specification. +#' @return +#' `cache_dataset()` returns `"disk"` if the dataset version is to be cached to disk, `"none"` otherwise. +#' @importFrom checkmate assert_string +#' @examples +#' cache_dataset(":latest") # "none" +#' cache_dataset("1.2") # "disk" +#' @export +cache_dataset <- function(version) { + assert_string(version) + if (version %in% c(":draft", ":latest", ":latest-published")) { + "none" + } else { + "disk" + } +} + +#' @rdname cache +#' @details +#' `cache_path()` finds or creates the location (directory) on the file system containing the cache. +#' +#' @return +#' `cache_path()` returns the file path to the directory containing the cache. +#' +#' @examples +#' cache_path() +#' +#' @importFrom tools R_user_dir +#' @export +cache_path <- function() { + cache_path <- file.path(R_user_dir("dataverse", "cache"), "api_cache") + if (!dir.exists(cache_path)) { + status <- dir.create(cache_path, recursive = TRUE) + if (!status) + warning("'dataverse' failed to create a 'disk' cache") + } + + cache_path +} + +#' @rdname cache +#' @details +#' `cache_info()` queries the cache for information about the name, size, and other attributes of files in the cache. The file name is a 'hash' of the function used to retrieve the file; it is not useful for identifying specific files. +#' @return +#' `cache_info()` returns a data.frame containing names and sizes of files in the cache. +#' @examples +#' cache_info() +#' @export +cache_info <- function() { + cache_path <- cache_path() + if (dir.exists(cache_path)) { + files <- dir(cache_path(), full.names = TRUE) + info <- file.info(files) + rownames(info) <- basename(files) + info + } +} + +#' @rdname cache +#' @details +#' `cache_reset()` clears all downloaded files from the disk cache. +#' @returns +#' `cache_reset()` returns the path to the (now empty) cache, invisibly) +#' @export +cache_reset <- function() { + cache_path <- cache_path() + if (dir.exists(cache_path)) + cache_disk(cache_path)$reset() + invisible(cache_path) +} + +## utility to check valid values of `use_cache =` +#' @importFrom checkmate test_string +assert_use_cache <- function(use_cache) { + test <- + test_string(use_cache) && + use_cache %in% c("disk", "session", "none") + if (!test) + stop("argument 'use_cache' is not correct, see ?use_cache") +} diff --git a/man/cache.Rd b/man/cache.Rd new file mode 100644 index 0000000..ca12883 --- /dev/null +++ b/man/cache.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cache.R +\name{cache_dataset} +\alias{cache_dataset} +\alias{use_cache} +\alias{cache_path} +\alias{cache_info} +\alias{cache_reset} +\title{Utilities for cache management} +\usage{ +cache_dataset(version) + +cache_path() + +cache_info() + +cache_reset() +} +\arguments{ +\item{version}{A character string specifying a version of the dataset. This can be one of \dQuote{:draft} (the current draft), \dQuote{:latest} (the latest draft, if it exists, or the latest published version), \dQuote{:latest-published} (the latest published version, ignoring any draft), or \dQuote{x.y} (where \samp{x} is a major version and \samp{y} is a minor version; the \samp{.y} can be omitted to obtain a major version). In lieu of this, a dataset's version-specific identification number can be used for the \code{dataset} argument.} +} +\value{ +\code{cache_dataset()} returns \code{"disk"} if the dataset version is to be cached to disk, \code{"none"} otherwise. + +\code{cache_path()} returns the file path to the directory containing the cache. + +\code{cache_info()} returns a data.frame containing names and sizes of files in the cache. + +\code{cache_reset()} returns the path to the (now empty) cache, invisibly) +} +\description{ +The dataverse package uses disk and session caches to improve network performance. Use of the cache is described on this page. +} +\details{ +Use of the cache is determined by the value of the \verb{use_cache =} argument to dataset and other API calls, or by the environment variable \code{DATAVERSE_USE_CACHE}. Possible values are +\itemize{ +\item \code{"none"}: do not use the cache. This is the default for datasets that are versioned with \code{":draft"}, \code{":latest"}, and \code{":latest-published"}. +\item \code{"session"}: cache API requests for the duration of the \emph{R} session. This is the default for API calls that do not involve file or dataset retrieval. +\item `"disk": use a permanent disk cache. This is the default for files and explicitly versioned datasets. +} + +\code{cache_dataset()} determines whether a dataset or file should be cached based on the version specification. + +\code{cache_path()} finds or creates the location (directory) on the file system containing the cache. + +\code{cache_info()} queries the cache for information about the name, size, and other attributes of files in the cache. The file name is a 'hash' of the function used to retrieve the file; it is not useful for identifying specific files. + +\code{cache_reset()} clears all downloaded files from the disk cache. +} +\examples{ +cache_dataset(":latest") # "none" +cache_dataset("1.2") # "disk" +cache_path() + +cache_info() +} diff --git a/tests/testthat/tests-cache.R b/tests/testthat/tests-cache.R new file mode 100644 index 0000000..fd4e357 --- /dev/null +++ b/tests/testthat/tests-cache.R @@ -0,0 +1,25 @@ +test_that("cache management works", { + expect_true(dir.exists(cache_path())) + + expect_identical(cache_dataset(":latest"), "none") + expect_identical(cache_dataset("1.2"), "disk") + + info <- cache_info() + expect_s3_class(info, "data.frame") + expect_setequal( + names(info), + c("size", "isdir", "mode", "mtime", "ctime", "atime", "uid", + "gid", "uname", "grname") + ) +}) + +test_that("'api_get' validates use_cache", { + testthat::skip_on_cran() + + expect_error(get_url_by_name( + filename = "nlsw88.tab", + dataset = "10.70122/FK2/PPIAXE", + server = "demo.dataverse.org", + use_cache = "BAD VALUE" + ), ".*argument 'use_cache' is not correct, see \\?use_cache") +}) From d8ae3bad9e30126ce951b0f1832435f21c8a38cb Mon Sep 17 00:00:00 2001 From: Martin Morgan Date: Tue, 15 Oct 2024 10:02:02 -0400 Subject: [PATCH 06/13] update get_dataset() etc with explicit use_cache() - additional documentation updates --- R/get_dataset.R | 22 ++++++++++++--------- R/onload.R | 33 ++++++++++++------------------- R/utils.R | 37 +++++++++++++++++++++-------------- man-roxygen/dots.R | 7 +++---- man/URLs.Rd | 6 +++--- man/add_dataset_file.Rd | 6 +++--- man/add_file.Rd | 6 +++--- man/create_dataset.Rd | 6 +++--- man/create_dataverse.Rd | 6 +++--- man/dataset_atom.Rd | 6 +++--- man/dataset_versions.Rd | 6 +++--- man/dataverse_metadata.Rd | 6 +++--- man/delete_dataset.Rd | 6 +++--- man/delete_dataverse.Rd | 6 +++--- man/delete_file.Rd | 6 +++--- man/delete_sword_dataset.Rd | 6 +++--- man/files.Rd | 6 +++--- man/get_dataset.Rd | 17 ++++++++++------ man/get_dataverse.Rd | 6 +++--- man/get_facets.Rd | 6 +++--- man/get_file_metadata.Rd | 6 +++--- man/get_user_key.Rd | 6 +++--- man/initiate_sword_dataset.Rd | 6 +++--- man/list_datasets.Rd | 6 +++--- man/publish_dataset.Rd | 6 +++--- man/publish_dataverse.Rd | 6 +++--- man/publish_sword_dataset.Rd | 6 +++--- man/service_document.Rd | 6 +++--- man/set_dataverse_metadata.Rd | 6 +++--- tests/testthat.R | 2 +- 30 files changed, 135 insertions(+), 127 deletions(-) diff --git a/R/get_dataset.R b/R/get_dataset.R index ba24907..d7a80fb 100644 --- a/R/get_dataset.R +++ b/R/get_dataset.R @@ -20,6 +20,7 @@ #' @template version #' @template envvars #' @template dots +#' @param use_cache one of `"disk"`, `"session"`, or `"none"`, describing how datasets are cached to reduce network traffic. See \code{\link{cache_dataset}} for details. #' @return A list of class \dQuote{dataverse_dataset} or a list of a form dependent #' on the specific metadata block retrieved. \code{dataset_files} returns a list of #' objects of class \dQuote{dataverse_file}. @@ -45,15 +46,16 @@ get_dataset <- function( version = ":latest", key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), - ... + ..., + use_cache = Sys.getenv("DATAVERSE_USE_CACHE", cache_dataset(version)) ) { - dataset <- dataset_id(dataset, key = key, server = server, ...) + dataset <- dataset_id(dataset, key = key, server = server, ..., use_cache = use_cache) if (!is.null(version)) { u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version) } else { u <- paste0(api_url(server), "datasets/", dataset) } - r <- api_get(u, ..., key = key) + r <- api_get(u, ..., key = key, use_cache = use_cache) parse_dataset(r) } @@ -69,16 +71,17 @@ dataset_metadata <- function( block = "citation", key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), - ... + ..., + use_cache = Sys.getenv("DATAVERSE_USE_CACHE", cache_dataset(version)) ) { - dataset <- dataset_id(dataset, key = key, server = server, ...) + dataset <- dataset_id(dataset, key = key, server = server, ..., use_cache = use_cache) if (!is.null(block)) { u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/metadata/", block) } else { u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/metadata") } - r <- api_get(u, ..., key = key) + r <- api_get(u, ..., key = key, use_cache = use_cache) jsonlite::fromJSON(r)[["data"]] } @@ -89,11 +92,12 @@ dataset_files <- function( version = ":latest", key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), - ... + ..., + use_cache = Sys.getenv("DATAVERSE_USE_CACHE", cache_dataset(version)) ) { - dataset <- dataset_id(dataset, key = key, server = server, ...) + dataset <- dataset_id(dataset, key = key, server = server, ..., use_cache = use_cache) u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/files") - r <- api_get(u, ..., key = key) + r <- api_get(u, ..., key = key, use_cache = use_cache) out <- jsonlite::fromJSON(r, simplifyDataFrame = FALSE)$data structure(lapply(out, `class<-`, "dataverse_file")) } diff --git a/R/onload.R b/R/onload.R index e0fbc38..a129f9e 100644 --- a/R/onload.R +++ b/R/onload.R @@ -2,26 +2,19 @@ #' #' @importFrom memoise memoise .onLoad <- function(libname, pkgname) { - # a <- Sys.getenv("DATAVERSE_SERVER") - # if(a == "") { - # Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") - # } + ## + ## 'memoise' httr::GET calls + ## - ## implement API disk cache via 'memoise' - cache_directory <- file.path( - tools::R_user_dir(pkgname, "cache"), - "api_cache" - ) - get <- api_get_impl - if (!dir.exists(cache_directory)) { - status <- dir.create(cache_directory, recursive = TRUE) - if (!status) - warning("'dataverse' failed to create API cache") - } - if (dir.exists(cache_directory)) { - # disk cache with max age 30 days - cache <- cache_disk(cache_directory, max_age = 60 * 60 * 24 * 30) - get <- memoise(get, cache = cache) + ## API session cache + api_get_session_cache <<- memoise(api_get_impl) + + ## API disk cache + cache_path <- cache_path() + if (dir.exists(cache_path)) { + # disk cache, no age or size limits + cache <- cache_disk(cache_path) + get_disk <- memoise(api_get_impl, cache = cache) } - api_get_memoized <<- get + api_get_disk_cache <<- get_disk } diff --git a/R/utils.R b/R/utils.R index dc62a13..4b05544 100644 --- a/R/utils.R +++ b/R/utils.R @@ -27,9 +27,12 @@ dataset_id.character <- function(x, key = Sys.getenv("DATAVERSE_KEY"), server = x <- prepend_doi(x) u <- paste0(api_url(server), "datasets/:persistentId?persistentId=", x) r <- tryCatch({ - api_get(u, ..., key = key) + api_get(u, ..., key = key) }, error = function(e) { - stop("Could not retrieve Dataset ID from persistent identifier!") + stop( + "Could not retrieve Dataset ID from persistent identifier! ", + conditionMessage(e) + ) }) jsonlite::fromJSON(r)[["data"]][["id"]] } @@ -206,21 +209,23 @@ api_url <- function(server = Sys.getenv("DATAVERSE_SERVER"), prefix = "api/") { } ## common httr::GET() uses -#' @importFrom checkmate assert_character assert_logical -api_get <- function(url, ..., key = NULL, as = "text", use_cache = as.logical(Sys.getenv("DATAVERSE_USE_CACHE", TRUE))) { - assert_character(url, any.missing = FALSE, len = 1L, null.ok = TRUE) - assert_character(key, any.missing = FALSE, len = 1L, null.ok = TRUE) - assert_character(as, any.missing = FALSE, len = 1L, null.ok = TRUE) - assert_logical(use_cache, any.missing = FALSE, len = 1L) - if (use_cache) { - get <- api_get_memoized - } else { - get <- api_get_impl - } +#' @importFrom checkmate assert_string +api_get <- function(url, ..., key = NULL, as = "text", use_cache = Sys.getenv("DATAVERSE_USE_CACHE", "session")) { + assert_string(url) + assert_string(key, null.ok = TRUE) + assert_string(as, null.ok = TRUE) + assert_use_cache(use_cache) + get <- switch( + use_cache, + "none" = api_get_impl, + "session" = api_get_session_cache, + "disk" = api_get_disk_cache, + stop("unknown value for 'use_cache'") + ) get(url, ..., key = key, as = as) } -## cache implemented via memoization; memoized function defined in +## cache implemented via memoization; memoized functions defined in ## .onLoad() api_get_impl <- function(url, ..., key = NULL, as = "text") { if (!is.null(key)) @@ -230,7 +235,9 @@ api_get_impl <- function(url, ..., key = NULL, as = "text") { httr::content(r, as = as, encoding = "UTF-8") } -api_get_memoized <- NULL +api_get_session_cache <- NULL # per-session memoisatoin + +api_get_disk_cache <- NULL # 'permanent' memoisation # parse dataset response into list/dataframe parse_dataset <- function(out) { diff --git a/man-roxygen/dots.R b/man-roxygen/dots.R index 38907cc..89f575e 100644 --- a/man-roxygen/dots.R +++ b/man-roxygen/dots.R @@ -1,6 +1,5 @@ #' @param ... Additional arguments passed to an HTTP request function, #' such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -#' \code{\link[httr]{DELETE}}. By default, HTTP requests use -#' values cached from previous identical calls. Use -#' \code{use_cache=FALSE} (or `Sys.setenv(DATAVERSE_USE_CACHE = -#' FALSE)` if cached API calls are not desired. +#' \code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +#' on how the *R* dataverse package uses disk and session caches to +#' improve network performance. diff --git a/man/URLs.Rd b/man/URLs.Rd index 947f80f..7462ba1 100644 --- a/man/URLs.Rd +++ b/man/URLs.Rd @@ -87,9 +87,9 @@ no ingested version, is set to NA. Note in \verb{get_dataframe_*}, \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} \item{filename}{Filename of the dataset, with file extension as shown in Dataverse (for example, if nlsw88.dta was the original but is displayed as the ingested diff --git a/man/add_dataset_file.Rd b/man/add_dataset_file.Rd index 0d9d16a..f1fbbd2 100644 --- a/man/add_dataset_file.Rd +++ b/man/add_dataset_file.Rd @@ -49,9 +49,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} \item{id}{An integer specifying a file identifier; or, if \code{doi} is specified, a character string specifying a file name within the DOI-identified dataset; or an diff --git a/man/add_file.Rd b/man/add_file.Rd index fdee650..a2f30a9 100644 --- a/man/add_file.Rd +++ b/man/add_file.Rd @@ -32,9 +32,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ An object of class \dQuote{dataset_atom}. diff --git a/man/create_dataset.Rd b/man/create_dataset.Rd index 8fb1525..3e5f0d3 100644 --- a/man/create_dataset.Rd +++ b/man/create_dataset.Rd @@ -41,9 +41,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} \item{dataset}{A character specifying a persistent identification ID for a dataset, for example \code{"10.70122/FK2/HXJVJU"}. Alternatively, an object of class diff --git a/man/create_dataverse.Rd b/man/create_dataverse.Rd index 83bb08f..da7ff59 100644 --- a/man/create_dataverse.Rd +++ b/man/create_dataverse.Rd @@ -29,9 +29,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A list. diff --git a/man/dataset_atom.Rd b/man/dataset_atom.Rd index 5e26711..4584a06 100644 --- a/man/dataset_atom.Rd +++ b/man/dataset_atom.Rd @@ -37,9 +37,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A list. For \code{dataset_atom}, an object of class \dQuote{dataset_atom}. diff --git a/man/dataset_versions.Rd b/man/dataset_versions.Rd index 99731b5..e51cb3e 100644 --- a/man/dataset_versions.Rd +++ b/man/dataset_versions.Rd @@ -31,9 +31,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A list of class \dQuote{dataverse_dataset_version}. diff --git a/man/dataverse_metadata.Rd b/man/dataverse_metadata.Rd index 4656e0a..b39c0c1 100644 --- a/man/dataverse_metadata.Rd +++ b/man/dataverse_metadata.Rd @@ -29,9 +29,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A list diff --git a/man/delete_dataset.Rd b/man/delete_dataset.Rd index f3b8d5d..62e0e78 100644 --- a/man/delete_dataset.Rd +++ b/man/delete_dataset.Rd @@ -31,9 +31,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A logical. diff --git a/man/delete_dataverse.Rd b/man/delete_dataverse.Rd index aefc0ca..2e0803e 100644 --- a/man/delete_dataverse.Rd +++ b/man/delete_dataverse.Rd @@ -29,9 +29,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A logical. diff --git a/man/delete_file.Rd b/man/delete_file.Rd index bc2ddcc..ab5c5bf 100644 --- a/man/delete_file.Rd +++ b/man/delete_file.Rd @@ -29,9 +29,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ If successful, a logical \code{TRUE}, else possibly some information. diff --git a/man/delete_sword_dataset.Rd b/man/delete_sword_dataset.Rd index e9d0c66..f77326c 100644 --- a/man/delete_sword_dataset.Rd +++ b/man/delete_sword_dataset.Rd @@ -29,9 +29,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ If successful, a logical \code{TRUE}, else possibly some information. diff --git a/man/files.Rd b/man/files.Rd index a72e861..fcbabe3 100644 --- a/man/files.Rd +++ b/man/files.Rd @@ -101,9 +101,9 @@ no ingested version, is set to NA. Note in \verb{get_dataframe_*}, \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} \item{filename}{Filename of the dataset, with file extension as shown in Dataverse (for example, if nlsw88.dta was the original but is displayed as the ingested diff --git a/man/get_dataset.Rd b/man/get_dataset.Rd index 8c74c1d..16630d9 100644 --- a/man/get_dataset.Rd +++ b/man/get_dataset.Rd @@ -11,7 +11,8 @@ get_dataset( version = ":latest", key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), - ... + ..., + use_cache = Sys.getenv("DATAVERSE_USE_CACHE", cache_dataset(version)) ) dataset_metadata( @@ -20,7 +21,8 @@ dataset_metadata( block = "citation", key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), - ... + ..., + use_cache = Sys.getenv("DATAVERSE_USE_CACHE", cache_dataset(version)) ) dataset_files( @@ -28,7 +30,8 @@ dataset_files( version = ":latest", key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), - ... + ..., + use_cache = Sys.getenv("DATAVERSE_USE_CACHE", cache_dataset(version)) ) } \arguments{ @@ -53,9 +56,11 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} + +\item{use_cache}{one of \code{"disk"}, \code{"session"}, or \code{"none"}, describing how datasets are cached to reduce network traffic. See \code{\link{cache_dataset}} for details.} \item{block}{A character string specifying a metadata block to retrieve. By default this is \dQuote{citation}. Other values may be available, depending diff --git a/man/get_dataverse.Rd b/man/get_dataverse.Rd index 485c73d..7925955 100644 --- a/man/get_dataverse.Rd +++ b/man/get_dataverse.Rd @@ -40,9 +40,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A list of class \dQuote{dataverse}. diff --git a/man/get_facets.Rd b/man/get_facets.Rd index 546a8f2..6f86052 100644 --- a/man/get_facets.Rd +++ b/man/get_facets.Rd @@ -29,9 +29,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A list. diff --git a/man/get_file_metadata.Rd b/man/get_file_metadata.Rd index 283dce0..7be3b2c 100644 --- a/man/get_file_metadata.Rd +++ b/man/get_file_metadata.Rd @@ -42,9 +42,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A character vector containing a DDI diff --git a/man/get_user_key.Rd b/man/get_user_key.Rd index 81d26fc..be2d2b8 100644 --- a/man/get_user_key.Rd +++ b/man/get_user_key.Rd @@ -15,9 +15,9 @@ get_user_key(user, password, server = Sys.getenv("DATAVERSE_SERVER"), ...) \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A list. diff --git a/man/initiate_sword_dataset.Rd b/man/initiate_sword_dataset.Rd index 19c62eb..170fea8 100644 --- a/man/initiate_sword_dataset.Rd +++ b/man/initiate_sword_dataset.Rd @@ -32,9 +32,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ An object of class \dQuote{dataset_atom}. diff --git a/man/list_datasets.Rd b/man/list_datasets.Rd index 0882a89..bc7cb77 100644 --- a/man/list_datasets.Rd +++ b/man/list_datasets.Rd @@ -29,9 +29,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A list. diff --git a/man/publish_dataset.Rd b/man/publish_dataset.Rd index 9644a98..0ebd463 100644 --- a/man/publish_dataset.Rd +++ b/man/publish_dataset.Rd @@ -34,9 +34,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A list. diff --git a/man/publish_dataverse.Rd b/man/publish_dataverse.Rd index ac69533..2c611c1 100644 --- a/man/publish_dataverse.Rd +++ b/man/publish_dataverse.Rd @@ -29,9 +29,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A list. diff --git a/man/publish_sword_dataset.Rd b/man/publish_sword_dataset.Rd index e9c0b81..0899f68 100644 --- a/man/publish_sword_dataset.Rd +++ b/man/publish_sword_dataset.Rd @@ -29,9 +29,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A list. diff --git a/man/service_document.Rd b/man/service_document.Rd index e753edd..1c48bf8 100644 --- a/man/service_document.Rd +++ b/man/service_document.Rd @@ -26,9 +26,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A list of class \dQuote{sword_service_document}, possibly with one or more \dQuote{sword_collection} entries. The latter are SWORD representations of a Dataverse. These can be passed to other SWORD API functions, e.g., for creating a new dataset. diff --git a/man/set_dataverse_metadata.Rd b/man/set_dataverse_metadata.Rd index de8620f..4f85106 100644 --- a/man/set_dataverse_metadata.Rd +++ b/man/set_dataverse_metadata.Rd @@ -35,9 +35,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or -\code{\link[httr]{DELETE}}. By default, HTTP requests use -values cached from previous identical calls. Use -\code{use_cache=FALSE} (or \code{Sys.setenv(DATAVERSE_USE_CACHE = FALSE)} if cached API calls are not desired.} +\code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details +on how the \emph{R} dataverse package uses disk and session caches to +improve network performance.} } \value{ A list diff --git a/tests/testthat.R b/tests/testthat.R index c520ba2..ef235f0 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -17,7 +17,7 @@ if (!requireNamespace("yaml", quietly = TRUE)) { Sys.setenv( DATAVERSE_SERVER = config$server, DATAVERSE_KEY = config$api_token, - DATAVERSE_USE_CACHE = FALSE + DATAVERSE_USE_CACHE = "none" ) # To better identify the source of problems, check if the token is expired. From 4e29bbe0062a1d575889b3647fa619853749c4b3 Mon Sep 17 00:00:00 2001 From: Martin Morgan Date: Tue, 15 Oct 2024 10:25:37 -0400 Subject: [PATCH 07/13] update cache_info() to reflect cross-platform file.info output --- tests/testthat/tests-cache.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/testthat/tests-cache.R b/tests/testthat/tests-cache.R index fd4e357..e66303f 100644 --- a/tests/testthat/tests-cache.R +++ b/tests/testthat/tests-cache.R @@ -6,11 +6,10 @@ test_that("cache management works", { info <- cache_info() expect_s3_class(info, "data.frame") - expect_setequal( - names(info), - c("size", "isdir", "mode", "mtime", "ctime", "atime", "uid", - "gid", "uname", "grname") - ) + ## subset of column names common across platforms; see ?file.info + ## 'uname' not available in Windows <= R 4.4.1 + colnames <- c("size", "isdir", "mode", "mtime", "ctime", "atime") + expect_true(all(colnames %in% names(info))) }) test_that("'api_get' validates use_cache", { From e5e32848ae92263bc86c7c6f12952638b96ba205 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 16 Oct 2024 14:48:16 -0400 Subject: [PATCH 08/13] Make version argument a bit clearer --- man-roxygen/version.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/man-roxygen/version.R b/man-roxygen/version.R index 70cfc78..b4fb687 100644 --- a/man-roxygen/version.R +++ b/man-roxygen/version.R @@ -1 +1,7 @@ -#' @param version A character string specifying a version of the dataset. This can be one of \dQuote{:draft} (the current draft), \dQuote{:latest} (the latest draft, if it exists, or the latest published version), \dQuote{:latest-published} (the latest published version, ignoring any draft), or \dQuote{x.y} (where \samp{x} is a major version and \samp{y} is a minor version; the \samp{.y} can be omitted to obtain a major version). In lieu of this, a dataset's version-specific identification number can be used for the \code{dataset} argument. +#' @param version A character string specifying a version of the dataset. +#' This can be of the form `"1.1"` or `"1"` (where in `"x.y"`, x is a major version and y is an optional minor version), +#' `":latest"` (the latest draft, if it exists, or the latest published version), +#' `":latest-published"` (the latest published version, ignoring any draft), or +#' `":draft"` (the current draft), +#' In lieu of this, a dataset's version-specific identification number can be used for the \code{dataset} argument. +#' We recommend using the number format so that the function stores a cache of the data (See \link{cache}). From d8d2be882b9142ce5fcdc528e5d1e43b41af5fb7 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 16 Oct 2024 15:16:39 -0400 Subject: [PATCH 09/13] Rewrite the `version` argument for clarity --- R/get_dataframe.R | 1 + man-roxygen/version.R | 17 ++++++++++------- man/cache.Rd | 11 ++++++++++- man/get_dataframe.Rd | 11 +++++++++++ man/get_dataset.Rd | 11 ++++++++++- 5 files changed, 42 insertions(+), 9 deletions(-) diff --git a/R/get_dataframe.R b/R/get_dataframe.R index bba2d9b..502bddc 100644 --- a/R/get_dataframe.R +++ b/R/get_dataframe.R @@ -28,6 +28,7 @@ #' `.tab` files so if `original = FALSE`, `.f` is set to `readr::read_tsv`. #' #' @inheritDotParams get_file +#' @template version #' #' @return A R object that is returned by the default or user-supplied function #' `.f` argument. For example, if `.f = readr::read_tsv()`, the function will diff --git a/man-roxygen/version.R b/man-roxygen/version.R index b4fb687..35a6cfc 100644 --- a/man-roxygen/version.R +++ b/man-roxygen/version.R @@ -1,7 +1,10 @@ -#' @param version A character string specifying a version of the dataset. -#' This can be of the form `"1.1"` or `"1"` (where in `"x.y"`, x is a major version and y is an optional minor version), -#' `":latest"` (the latest draft, if it exists, or the latest published version), -#' `":latest-published"` (the latest published version, ignoring any draft), or -#' `":draft"` (the current draft), -#' In lieu of this, a dataset's version-specific identification number can be used for the \code{dataset} argument. -#' We recommend using the number format so that the function stores a cache of the data (See \link{cache}). +#' @param version A character specifying a version of the dataset. +#' This can be of the form `"1.1"` or `"1"` (where in `"x.y"`, x is a major +#' version and y is an optional minor version), or +#' `":latest"` (the default, the latest published version). +#' In lieu of this, a dataset's version-specific identification number can be +#' used for the `dataset` argument. We recommend using the number format so that +#' the function stores a cache of the data (See \code{\link{cache_dataset}}). +#' If the user specifies a `key` or `DATAVERSE_KEY` argument, they can access the +#' draft version by `":draft"` (the current draft) or `":latest"` (which will +#' prioritize the draft over the latest published version. diff --git a/man/cache.Rd b/man/cache.Rd index ca12883..7d68404 100644 --- a/man/cache.Rd +++ b/man/cache.Rd @@ -17,7 +17,16 @@ cache_info() cache_reset() } \arguments{ -\item{version}{A character string specifying a version of the dataset. This can be one of \dQuote{:draft} (the current draft), \dQuote{:latest} (the latest draft, if it exists, or the latest published version), \dQuote{:latest-published} (the latest published version, ignoring any draft), or \dQuote{x.y} (where \samp{x} is a major version and \samp{y} is a minor version; the \samp{.y} can be omitted to obtain a major version). In lieu of this, a dataset's version-specific identification number can be used for the \code{dataset} argument.} +\item{version}{A character specifying a version of the dataset. +This can be of the form \code{"1.1"} or \code{"1"} (where in \code{"x.y"}, x is a major +version and y is an optional minor version), or +\code{":latest"} (the default, the latest published version). +In lieu of this, a dataset's version-specific identification number can be +used for the \code{dataset} argument. We recommend using the number format so that +the function stores a cache of the data (See \code{\link{cache_dataset}}). +If the user specifies a \code{key} or \code{DATAVERSE_KEY} argument, they can access the +draft version by \code{":draft"} (the current draft) or \code{":latest"} (which will +prioritize the draft over the latest published version.} } \value{ \code{cache_dataset()} returns \code{"disk"} if the dataset version is to be cached to disk, \code{"none"} otherwise. diff --git a/man/get_dataframe.Rd b/man/get_dataframe.Rd index 354c566..303f7e2 100644 --- a/man/get_dataframe.Rd +++ b/man/get_dataframe.Rd @@ -73,6 +73,17 @@ Defaults to \code{FALSE}.} \item{filedoi}{A DOI for a single file (not the entire dataset), of the form \code{"10.70122/FK2/PPIAXE/MHDB0O"} or \code{"doi:10.70122/FK2/PPIAXE/MHDB0O"}. Can be a vector for multiple files.} + +\item{version}{A character specifying a version of the dataset. +This can be of the form \code{"1.1"} or \code{"1"} (where in \code{"x.y"}, x is a major +version and y is an optional minor version), or +\code{":latest"} (the default, the latest published version). +In lieu of this, a dataset's version-specific identification number can be +used for the \code{dataset} argument. We recommend using the number format so that +the function stores a cache of the data (See \code{\link{cache_dataset}}). +If the user specifies a \code{key} or \code{DATAVERSE_KEY} argument, they can access the +draft version by \code{":draft"} (the current draft) or \code{":latest"} (which will +prioritize the draft over the latest published version.} } \value{ A R object that is returned by the default or user-supplied function diff --git a/man/get_dataset.Rd b/man/get_dataset.Rd index 16630d9..64d7c5b 100644 --- a/man/get_dataset.Rd +++ b/man/get_dataset.Rd @@ -39,7 +39,16 @@ dataset_files( for example \code{"10.70122/FK2/HXJVJU"}. Alternatively, an object of class \dQuote{dataverse_dataset} obtained by \code{dataverse_contents()}.} -\item{version}{A character string specifying a version of the dataset. This can be one of \dQuote{:draft} (the current draft), \dQuote{:latest} (the latest draft, if it exists, or the latest published version), \dQuote{:latest-published} (the latest published version, ignoring any draft), or \dQuote{x.y} (where \samp{x} is a major version and \samp{y} is a minor version; the \samp{.y} can be omitted to obtain a major version). In lieu of this, a dataset's version-specific identification number can be used for the \code{dataset} argument.} +\item{version}{A character specifying a version of the dataset. +This can be of the form \code{"1.1"} or \code{"1"} (where in \code{"x.y"}, x is a major +version and y is an optional minor version), or +\code{":latest"} (the default, the latest published version). +In lieu of this, a dataset's version-specific identification number can be +used for the \code{dataset} argument. We recommend using the number format so that +the function stores a cache of the data (See \code{\link{cache_dataset}}). +If the user specifies a \code{key} or \code{DATAVERSE_KEY} argument, they can access the +draft version by \code{":draft"} (the current draft) or \code{":latest"} (which will +prioritize the draft over the latest published version.} \item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. From fb963b0fe9e8cc73f14ffdbdf2af0783f0a9b3cf Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 16 Oct 2024 15:31:37 -0400 Subject: [PATCH 10/13] Add caching in vignette --- vignettes/C-download.Rmd | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vignettes/C-download.Rmd b/vignettes/C-download.Rmd index a5ba31d..cb4beb1 100644 --- a/vignettes/C-download.Rmd +++ b/vignettes/C-download.Rmd @@ -94,6 +94,10 @@ head(energy) The dataverse package can also download datasets that are _drafts_ (i.e. versions not released publicly), as long as the user of the dataset provides their appropriate DATAVERSE_KEY. Users may need to modify the metadata of a datafile, such as adding a descriptive label, for the data downloading to work properly in this case. This is because the the file identifier UNF, which the read function relies on, may only appear after metadata has been added. +## Caching large datasets + +As of v0.3.15, datasets are _cached_ on your computer if the user specifies a version of the dataset. The next time the code is run, the function will read from the cache rather than re-downloading from the Dataverse. Version specification can be done, e.g., by setting `version = "3"` for V3, for instance. This is useful to avoid re-downloading the identical dataset every time, especially if they take some time to download. To turn off or view the settings of caching, see `cache_dataset()`. + ## Retrieving Custom Data Formats (RDS, Stata, SPSS) From 313179f74f6dcd32140a08f0c734dd2401a62a51 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 16 Oct 2024 15:31:45 -0400 Subject: [PATCH 11/13] Up version --- DESCRIPTION | 2 +- NEWS.md | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a023c5b..68d439e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: dataverse -Version: 0.3.14 +Version: 0.3.15 Title: Client for Dataverse 4+ Repositories Authors@R: c(person(given = "Shiro", diff --git a/NEWS.md b/NEWS.md index 0e4cdfe..8473970 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,11 @@ # dataverse +# CHANGES in dataverse 0.3.15 + +* Implement a cache for API calls (including file download) when dataset version is specified. The functions will reload from the cache automatically the second time. (#112, #135, by @mtmorgan) + # CHANGES in dataverse 0.3.14 -* Implement a simple cache for API (including file download) calls, improving performance and reliability (#112, #135) * Improve recommendation for rdata loading (#107, #127) * `get_file_by_*()` can now return the download URL to be used in external functions or programs, useful for large files (#128, implemented in #129 @JBGruber and @kuriwaki) * Removes remote resource from vignette and move them to ghactions (#131) From 9f3010e487933266646c78881c1f6b67302e10cf Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 16 Oct 2024 15:44:51 -0400 Subject: [PATCH 12/13] Further tweaks to version doc and how it is invoked in functions --- R/get_dataframe.R | 1 - R/get_file.R | 2 ++ man-roxygen/version.R | 5 +++-- man/cache.Rd | 7 ++++--- man/files.Rd | 13 +++++++++++++ man/get_dataframe.Rd | 22 +++++++++++----------- man/get_dataset.Rd | 7 ++++--- 7 files changed, 37 insertions(+), 20 deletions(-) diff --git a/R/get_dataframe.R b/R/get_dataframe.R index 502bddc..bba2d9b 100644 --- a/R/get_dataframe.R +++ b/R/get_dataframe.R @@ -28,7 +28,6 @@ #' `.tab` files so if `original = FALSE`, `.f` is set to `readr::read_tsv`. #' #' @inheritDotParams get_file -#' @template version #' #' @return A R object that is returned by the default or user-supplied function #' `.f` argument. For example, if `.f = readr::read_tsv()`, the function will diff --git a/R/get_file.R b/R/get_file.R index cf2fd92..db8925f 100644 --- a/R/get_file.R +++ b/R/get_file.R @@ -34,6 +34,7 @@ #' @template envvars #' @template dots #' @template ds +#' @template version #' #' @return \code{get_file} returns a raw vector (or list of raw vectors, #' if \code{length(file) > 1}), which can be saved locally with the `writeBin` @@ -89,6 +90,7 @@ get_file <- function( key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), original = TRUE, + version = ":latest", ... ) { diff --git a/man-roxygen/version.R b/man-roxygen/version.R index 35a6cfc..86e9457 100644 --- a/man-roxygen/version.R +++ b/man-roxygen/version.R @@ -2,9 +2,10 @@ #' This can be of the form `"1.1"` or `"1"` (where in `"x.y"`, x is a major #' version and y is an optional minor version), or #' `":latest"` (the default, the latest published version). -#' In lieu of this, a dataset's version-specific identification number can be -#' used for the `dataset` argument. We recommend using the number format so that +#' We recommend using the number format so that #' the function stores a cache of the data (See \code{\link{cache_dataset}}). #' If the user specifies a `key` or `DATAVERSE_KEY` argument, they can access the #' draft version by `":draft"` (the current draft) or `":latest"` (which will #' prioritize the draft over the latest published version. +#' A dataset's version-specific identification number can be +#' used for the `dataset` argument as well to avoid the `version` argument. diff --git a/man/cache.Rd b/man/cache.Rd index 7d68404..b13dbb6 100644 --- a/man/cache.Rd +++ b/man/cache.Rd @@ -21,12 +21,13 @@ cache_reset() This can be of the form \code{"1.1"} or \code{"1"} (where in \code{"x.y"}, x is a major version and y is an optional minor version), or \code{":latest"} (the default, the latest published version). -In lieu of this, a dataset's version-specific identification number can be -used for the \code{dataset} argument. We recommend using the number format so that +We recommend using the number format so that the function stores a cache of the data (See \code{\link{cache_dataset}}). If the user specifies a \code{key} or \code{DATAVERSE_KEY} argument, they can access the draft version by \code{":draft"} (the current draft) or \code{":latest"} (which will -prioritize the draft over the latest published version.} +prioritize the draft over the latest published version. +A dataset's version-specific identification number can be +used for the \code{dataset} argument as well to avoid the \code{version} argument.} } \value{ \code{cache_dataset()} returns \code{"disk"} if the dataset version is to be cached to disk, \code{"none"} otherwise. diff --git a/man/files.Rd b/man/files.Rd index fcbabe3..704d824 100644 --- a/man/files.Rd +++ b/man/files.Rd @@ -16,6 +16,7 @@ get_file( key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), original = TRUE, + version = ":latest", ... ) @@ -99,6 +100,18 @@ available, download the original version instead of the ingested? If there was no ingested version, is set to NA. Note in \verb{get_dataframe_*}, \code{original} is set to FALSE by default. Either can be changed.} +\item{version}{A character specifying a version of the dataset. +This can be of the form \code{"1.1"} or \code{"1"} (where in \code{"x.y"}, x is a major +version and y is an optional minor version), or +\code{":latest"} (the default, the latest published version). +We recommend using the number format so that +the function stores a cache of the data (See \code{\link{cache_dataset}}). +If the user specifies a \code{key} or \code{DATAVERSE_KEY} argument, they can access the +draft version by \code{":draft"} (the current draft) or \code{":latest"} (which will +prioritize the draft over the latest published version. +A dataset's version-specific identification number can be +used for the \code{dataset} argument as well to avoid the \code{version} argument.} + \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or \code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details diff --git a/man/get_dataframe.Rd b/man/get_dataframe.Rd index 303f7e2..81821b4 100644 --- a/man/get_dataframe.Rd +++ b/man/get_dataframe.Rd @@ -64,6 +64,17 @@ be set as a default via an environment variable. To set a default, run \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")} or add \code{DATAVERSE_SERVER = "dataverse.harvard.edu"} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} + \item{\code{version}}{A character specifying a version of the dataset. +This can be of the form \code{"1.1"} or \code{"1"} (where in \code{"x.y"}, x is a major +version and y is an optional minor version), or +\code{":latest"} (the default, the latest published version). +We recommend using the number format so that +the function stores a cache of the data (See \code{\link{cache_dataset}}). +If the user specifies a \code{key} or \code{DATAVERSE_KEY} argument, they can access the +draft version by \code{":draft"} (the current draft) or \code{":latest"} (which will +prioritize the draft over the latest published version. +A dataset's version-specific identification number can be +used for the \code{dataset} argument as well to avoid the \code{version} argument.} \item{\code{return_url}}{Instead of downloading the file, return the URL for download. Defaults to \code{FALSE}.} }} @@ -73,17 +84,6 @@ Defaults to \code{FALSE}.} \item{filedoi}{A DOI for a single file (not the entire dataset), of the form \code{"10.70122/FK2/PPIAXE/MHDB0O"} or \code{"doi:10.70122/FK2/PPIAXE/MHDB0O"}. Can be a vector for multiple files.} - -\item{version}{A character specifying a version of the dataset. -This can be of the form \code{"1.1"} or \code{"1"} (where in \code{"x.y"}, x is a major -version and y is an optional minor version), or -\code{":latest"} (the default, the latest published version). -In lieu of this, a dataset's version-specific identification number can be -used for the \code{dataset} argument. We recommend using the number format so that -the function stores a cache of the data (See \code{\link{cache_dataset}}). -If the user specifies a \code{key} or \code{DATAVERSE_KEY} argument, they can access the -draft version by \code{":draft"} (the current draft) or \code{":latest"} (which will -prioritize the draft over the latest published version.} } \value{ A R object that is returned by the default or user-supplied function diff --git a/man/get_dataset.Rd b/man/get_dataset.Rd index 64d7c5b..ff04551 100644 --- a/man/get_dataset.Rd +++ b/man/get_dataset.Rd @@ -43,12 +43,13 @@ for example \code{"10.70122/FK2/HXJVJU"}. Alternatively, an object of class This can be of the form \code{"1.1"} or \code{"1"} (where in \code{"x.y"}, x is a major version and y is an optional minor version), or \code{":latest"} (the default, the latest published version). -In lieu of this, a dataset's version-specific identification number can be -used for the \code{dataset} argument. We recommend using the number format so that +We recommend using the number format so that the function stores a cache of the data (See \code{\link{cache_dataset}}). If the user specifies a \code{key} or \code{DATAVERSE_KEY} argument, they can access the draft version by \code{":draft"} (the current draft) or \code{":latest"} (which will -prioritize the draft over the latest published version.} +prioritize the draft over the latest published version. +A dataset's version-specific identification number can be +used for the \code{dataset} argument as well to avoid the \code{version} argument.} \item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. From 9a8ecc0a852846d9f067b043932f8383850fc6ad Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 16 Oct 2024 15:55:27 -0400 Subject: [PATCH 13/13] Add doc on how to stop using the cache --- man-roxygen/version.R | 4 ++-- man/cache.Rd | 4 ++-- man/files.Rd | 4 ++-- man/get_dataframe.Rd | 4 ++-- man/get_dataset.Rd | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/man-roxygen/version.R b/man-roxygen/version.R index 86e9457..cb109dc 100644 --- a/man-roxygen/version.R +++ b/man-roxygen/version.R @@ -7,5 +7,5 @@ #' If the user specifies a `key` or `DATAVERSE_KEY` argument, they can access the #' draft version by `":draft"` (the current draft) or `":latest"` (which will #' prioritize the draft over the latest published version. -#' A dataset's version-specific identification number can be -#' used for the `dataset` argument as well to avoid the `version` argument. +#' Finally, set `use_cache = "none"` to not read from the cache and re-download +#' afresh even when `version` is provided. diff --git a/man/cache.Rd b/man/cache.Rd index b13dbb6..de5a18b 100644 --- a/man/cache.Rd +++ b/man/cache.Rd @@ -26,8 +26,8 @@ the function stores a cache of the data (See \code{\link{cache_dataset}}). If the user specifies a \code{key} or \code{DATAVERSE_KEY} argument, they can access the draft version by \code{":draft"} (the current draft) or \code{":latest"} (which will prioritize the draft over the latest published version. -A dataset's version-specific identification number can be -used for the \code{dataset} argument as well to avoid the \code{version} argument.} +Finally, set \code{use_cache = "none"} to not read from the cache and re-download +afresh even when \code{version} is provided.} } \value{ \code{cache_dataset()} returns \code{"disk"} if the dataset version is to be cached to disk, \code{"none"} otherwise. diff --git a/man/files.Rd b/man/files.Rd index 704d824..ef0f187 100644 --- a/man/files.Rd +++ b/man/files.Rd @@ -109,8 +109,8 @@ the function stores a cache of the data (See \code{\link{cache_dataset}}). If the user specifies a \code{key} or \code{DATAVERSE_KEY} argument, they can access the draft version by \code{":draft"} (the current draft) or \code{":latest"} (which will prioritize the draft over the latest published version. -A dataset's version-specific identification number can be -used for the \code{dataset} argument as well to avoid the \code{version} argument.} +Finally, set \code{use_cache = "none"} to not read from the cache and re-download +afresh even when \code{version} is provided.} \item{...}{Additional arguments passed to an HTTP request function, such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or diff --git a/man/get_dataframe.Rd b/man/get_dataframe.Rd index 81821b4..0266fd7 100644 --- a/man/get_dataframe.Rd +++ b/man/get_dataframe.Rd @@ -73,8 +73,8 @@ the function stores a cache of the data (See \code{\link{cache_dataset}}). If the user specifies a \code{key} or \code{DATAVERSE_KEY} argument, they can access the draft version by \code{":draft"} (the current draft) or \code{":latest"} (which will prioritize the draft over the latest published version. -A dataset's version-specific identification number can be -used for the \code{dataset} argument as well to avoid the \code{version} argument.} +Finally, set \code{use_cache = "none"} to not read from the cache and re-download +afresh even when \code{version} is provided.} \item{\code{return_url}}{Instead of downloading the file, return the URL for download. Defaults to \code{FALSE}.} }} diff --git a/man/get_dataset.Rd b/man/get_dataset.Rd index ff04551..d5c35b6 100644 --- a/man/get_dataset.Rd +++ b/man/get_dataset.Rd @@ -48,8 +48,8 @@ the function stores a cache of the data (See \code{\link{cache_dataset}}). If the user specifies a \code{key} or \code{DATAVERSE_KEY} argument, they can access the draft version by \code{":draft"} (the current draft) or \code{":latest"} (which will prioritize the draft over the latest published version. -A dataset's version-specific identification number can be -used for the \code{dataset} argument as well to avoid the \code{version} argument.} +Finally, set \code{use_cache = "none"} to not read from the cache and re-download +afresh even when \code{version} is provided.} \item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail.