From de947071d3c6d0bdbacdda0b39960c31cf6ef2d6 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Tue, 7 Nov 2023 16:35:42 -0500 Subject: [PATCH 01/48] docs and fixes --- NAMESPACE | 2 -- R/class_aws.R | 4 ---- R/class_cloud.R | 1 - R/class_gcp.R | 4 ---- man/tar_delete.Rd | 6 ++++-- man/tar_destroy.Rd | 6 ++++-- man/tar_prune.Rd | 6 ++++-- 7 files changed, 12 insertions(+), 17 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 2a58766cf..9b3f96669 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -95,9 +95,7 @@ S3method(store_delete_objects,default) S3method(store_delete_objects,tar_aws) S3method(store_delete_objects,tar_gcp) S3method(store_ensure_correct_hash,default) -S3method(store_ensure_correct_hash,tar_aws) S3method(store_ensure_correct_hash,tar_cloud) -S3method(store_ensure_correct_hash,tar_gcp) S3method(store_ensure_correct_hash,tar_store_file) S3method(store_ensure_correct_hash,tar_url) S3method(store_exist_object,default) diff --git a/R/class_aws.R b/R/class_aws.R index f27fddb72..aae074599 100644 --- a/R/class_aws.R +++ b/R/class_aws.R @@ -250,10 +250,6 @@ store_upload_object_aws <- function(store) { invisible() } -#' @export -store_ensure_correct_hash.tar_aws <- function(store, storage, deployment) { -} - #' @export store_has_correct_hash.tar_aws <- function(store) { hash <- store_aws_hash(store) diff --git a/R/class_cloud.R b/R/class_cloud.R index fbd97f2ee..7f13fc9c0 100644 --- a/R/class_cloud.R +++ b/R/class_cloud.R @@ -41,7 +41,6 @@ store_hash_late.tar_cloud <- function(store) { #' @export store_ensure_correct_hash.tar_cloud <- function(store, storage, deployment) { - store_wait_correct_hash(store) } #' @export diff --git a/R/class_gcp.R b/R/class_gcp.R index 10dcaeee4..009c7d734 100644 --- a/R/class_gcp.R +++ b/R/class_gcp.R @@ -206,10 +206,6 @@ store_upload_object_gcp <- function(store) { invisible() } -#' @export -store_ensure_correct_hash.tar_gcp <- function(store, storage, deployment) { -} - #' @export store_has_correct_hash.tar_gcp <- function(store) { hash <- store_gcp_hash(store = store) diff --git a/man/tar_delete.Rd b/man/tar_delete.Rd index a3d1a2c37..7fe694377 100644 --- a/man/tar_delete.Rd +++ b/man/tar_delete.Rd @@ -21,9 +21,11 @@ or \code{tidyselect} helpers like \code{\link[=any_of]{any_of()}} and \code{\lin from the cloud if applicable (e.g. AWS, GCP). If \code{FALSE}, files are not deleted from the cloud.} -\item{batch_size}{Positive integer, number of target objects to delete +\item{batch_size}{Positive integer between 1 and 1000, +number of target objects to delete from the cloud with each HTTP API request. -Currently only supported for AWS.} +Currently only supported for AWS. +Cannot be more than 1000.} \item{verbose}{Logical of length 1, whether to print console messages to show progress when deleting each batch of targets from each diff --git a/man/tar_destroy.Rd b/man/tar_destroy.Rd index ecb393e1c..4a346f4a3 100644 --- a/man/tar_destroy.Rd +++ b/man/tar_destroy.Rd @@ -48,9 +48,11 @@ See \code{\link[=tar_workspace]{tar_workspace()}} for details. data store. }} -\item{batch_size}{Positive integer, number of target objects to delete +\item{batch_size}{Positive integer between 1 and 1000, +number of target objects to delete from the cloud with each HTTP API request. -Currently only supported for AWS.} +Currently only supported for AWS. +Cannot be more than 1000.} \item{verbose}{Logical of length 1, whether to print console messages to show progress when deleting each batch of targets from each diff --git a/man/tar_prune.Rd b/man/tar_prune.Rd index c0a1cdd35..b3086e9d0 100644 --- a/man/tar_prune.Rd +++ b/man/tar_prune.Rd @@ -20,9 +20,11 @@ tar_prune( from the cloud if applicable (e.g. AWS, GCP). If \code{FALSE}, files are not deleted from the cloud.} -\item{batch_size}{Positive integer, number of target objects to delete +\item{batch_size}{Positive integer between 1 and 1000, +number of target objects to delete from the cloud with each HTTP API request. -Currently only supported for AWS.} +Currently only supported for AWS. +Cannot be more than 1000.} \item{verbose}{Logical of length 1, whether to print console messages to show progress when deleting each batch of targets from each From 51e34a28f33d9a257eed8b61b4b62eeae5336461 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Thu, 9 Nov 2023 12:37:57 -0500 Subject: [PATCH 02/48] Add utility aws_s3_list_etags() --- R/utils_aws.R | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/R/utils_aws.R b/R/utils_aws.R index 3f499cd82..7cd4a31d1 100644 --- a/R/utils_aws.R +++ b/R/utils_aws.R @@ -61,6 +61,52 @@ aws_s3_exists <- function( ) } +aws_s3_list_etags <- function( + prefix, + bucket, + page_size = 1000L, + verbose = TRUE, + region = NULL, + endpoint = NULL, + version = NULL, + args = list(), + max_tries = NULL, + seconds_timeout = NULL, + close_connection = NULL, + s3_force_path_style = NULL +) { + client <- aws_s3_client( + endpoint = endpoint, + region = region, + seconds_timeout = seconds_timeout, + close_connection = close_connection, + s3_force_path_style = s3_force_path_style, + max_tries = max_tries + ) + args$Bucket <- bucket + args$Prefix <- prefix + args <- supported_args(fun = client$list_objects_v2, args = args) + if (verbose) { + tar_message_run( + "Listing objects in AWS S3 bucket ", + bucket, + " prefix ", + prefix, + "..." + ) + } + pages <- paws.common::paginate( + Operation = do.call(what = client$list_objects_v2, args = args) + ) + out <- list() + for (page in pages) { + for (object in page$Contents) { + out[[object$Key]] <- eval(parse(text = object$ETag)) + } + } + out +} + aws_s3_download <- function( file, key, From f89d25b346062cf3ea7492b7cef79706d037576f Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Thu, 9 Nov 2023 12:54:55 -0500 Subject: [PATCH 03/48] Test aws_list_etags() --- R/utils_aws.R | 3 ++- tests/aws/test-utils_aws.R | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/R/utils_aws.R b/R/utils_aws.R index 7cd4a31d1..3e4ab47cc 100644 --- a/R/utils_aws.R +++ b/R/utils_aws.R @@ -96,7 +96,8 @@ aws_s3_list_etags <- function( ) } pages <- paws.common::paginate( - Operation = do.call(what = client$list_objects_v2, args = args) + Operation = do.call(what = client$list_objects_v2, args = args), + PageSize = page_size ) out <- list() for (page in pages) { diff --git a/tests/aws/test-utils_aws.R b/tests/aws/test-utils_aws.R index b7c1068aa..efff9b681 100644 --- a/tests/aws/test-utils_aws.R +++ b/tests/aws/test-utils_aws.R @@ -642,3 +642,31 @@ tar_test("graceful error on multipart upload", { class = "tar_condition_file" ) }) + +tar_test("aws_s3_list_etags()", { + bucket <- random_bucket_name() + paws.storage::s3()$create_bucket(Bucket = bucket) + on.exit(aws_s3_delete_bucket(bucket)) + expect_equal( + aws_s3_list_etags(prefix = "/", bucket = bucket), + list() + ) + for (key in c("w", "x", "y", "z")) { + paws.storage::s3()$put_object( + Body = charToRaw(key), + Key = key, + Bucket = bucket + ) + } + out <- aws_s3_list_etags(prefix = "", bucket = bucket) + out2 <- aws_s3_list_etags(prefix = "", bucket = bucket, page_size = 2L) + expect_equal(out, out2) + expect_equal(length(out), 4L) + expect_equal(sort(names(out)), sort(c("w", "x", "y", "z"))) + for (etag in out) { + expect_true(is.character(etag)) + expect_true(!anyNA(etag)) + expect_equal(length(etag), 1L) + expect_gt(nchar(etag), 10L) + } +}) From 38d6e3ce929434dcd30fa8177642d033c067ef0c Mon Sep 17 00:00:00 2001 From: wlandau Date: Thu, 9 Nov 2023 16:52:23 -0500 Subject: [PATCH 04/48] Add gcp_gcs_list_md5s() --- R/utils_gcp.R | 52 +++++++++++++++++++++++++++++++------- tests/gcp/test-utils_gcp.R | 37 +++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 9 deletions(-) diff --git a/R/utils_gcp.R b/R/utils_gcp.R index e78e45605..c66d72d97 100644 --- a/R/utils_gcp.R +++ b/R/utils_gcp.R @@ -10,7 +10,7 @@ gcp_gcs_head <- function( bucket = gcp_gcs_bucket(), version = NULL, verbose = FALSE, - max_tries + max_tries = NULL ) { verbose <- verbose %|||% FALSE old_try_attempts <- getOption("googleAuthR.tryAttempts") @@ -39,7 +39,7 @@ gcp_gcs_exists <- function( bucket = gcp_gcs_bucket(verbose = verbose, max_tries = max_tries), version = NULL, verbose = FALSE, - max_tries + max_tries = NULL ) { !is.null( gcp_gcs_head( @@ -52,7 +52,12 @@ gcp_gcs_exists <- function( ) } -gcp_gcs_bucket <- function(verbose = FALSE, max_tries) { +gcp_gcs_list_md5s <- function( + prefix, + bucket = gcp_gcs_bucket(), + verbose = TRUE, + max_tries = NULL +) { verbose <- verbose %|||% FALSE old_try_attempts <- getOption("googleAuthR.tryAttempts") on.exit(options(googleAuthR.tryAttempts = old_try_attempts), add = TRUE) @@ -62,9 +67,23 @@ gcp_gcs_bucket <- function(verbose = FALSE, max_tries) { options(googleAuthR.tryAttempts = max_tries %|||% 5L) ) gcp_gcs_auth(verbose = verbose, max_tries = max_tries) - if_any(verbose, identity, suppressMessages) ( - googleCloudStorageR::gcs_get_global_bucket() + if (verbose) { + tar_message_run( + "Listing objects in GCS bucket ", + bucket, + " prefix ", + prefix, + "..." + ) + } + results <- googleCloudStorageR::gcs_list_objects( + prefix = prefix, + bucket = bucket, + detail = "full" ) + out <- as.list(results$md5) + names(out) <- results$name + out } gcp_gcs_download <- function( @@ -73,7 +92,7 @@ gcp_gcs_download <- function( bucket = gcp_gcs_bucket(verbose = verbose, max_tries = max_tries), version = NULL, verbose = FALSE, - max_tries + max_tries = NULL ) { verbose <- verbose %|||% FALSE old_try_attempts <- getOption("googleAuthR.tryAttempts") @@ -101,7 +120,7 @@ gcp_gcs_delete <- function( bucket = gcp_gcs_bucket(verbose = verbose, max_tries = max_tries), version = NULL, verbose = FALSE, - max_tries + max_tries = NULL ) { verbose <- verbose %|||% FALSE old_try_attempts <- getOption("googleAuthR.tryAttempts") @@ -136,7 +155,7 @@ gcp_gcs_upload <- function( metadata = list(), predefined_acl = "private", verbose = FALSE, - max_tries + max_tries = NULL ) { verbose <- verbose %|||% FALSE old_try_attempts <- getOption("googleAuthR.tryAttempts") @@ -165,7 +184,7 @@ gcp_gcs_upload <- function( ) } -gcp_gcs_auth <- function(verbose = FALSE, max_tries) { +gcp_gcs_auth <- function(verbose = FALSE, max_tries = NULL) { verbose <- verbose %|||% FALSE if (isTRUE(tar_runtime$gcp_auth)) { return() @@ -188,4 +207,19 @@ gcp_gcs_auth <- function(verbose = FALSE, max_tries) { tar_runtime$gcp_auth <- TRUE invisible() } + +gcp_gcs_bucket <- function(verbose = FALSE, max_tries = NULL) { + verbose <- verbose %|||% FALSE + old_try_attempts <- getOption("googleAuthR.tryAttempts") + on.exit(options(googleAuthR.tryAttempts = old_try_attempts), add = TRUE) + if_any( + is.null(max_tries), + NULL, + options(googleAuthR.tryAttempts = max_tries %|||% 5L) + ) + gcp_gcs_auth(verbose = verbose, max_tries = max_tries) + if_any(verbose, identity, suppressMessages) ( + googleCloudStorageR::gcs_get_global_bucket() + ) +} # nocov end diff --git a/tests/gcp/test-utils_gcp.R b/tests/gcp/test-utils_gcp.R index 7fb9e81e2..c5ed23df5 100644 --- a/tests/gcp/test-utils_gcp.R +++ b/tests/gcp/test-utils_gcp.R @@ -325,3 +325,40 @@ tar_test("gcp_gcs_upload: upload twice, get the correct version", { ) expect_equal(readLines(tmp), "second") }) + +tar_test("gcp_gcs_list_md5s()", { + skip_if_no_gcp() + gcp_gcs_auth(max_tries = 5) + bucket <- random_bucket_name() + # needs to be a GCP project the tester auth has access to + project <- Sys.getenv("GCE_DEFAULT_PROJECT_ID") + googleCloudStorageR::gcs_create_bucket( + bucket, + projectId = project, + versioning = TRUE + ) + on.exit(gcp_gcs_delete_bucket(bucket)) + expect_equal( + gcp_gcs_list_md5s(prefix = "/", bucket = bucket), + list() + ) + tmp <- tempfile() + writeLines("a", tmp) + for (key in c("w", "x", "y", "z")) { + gcp_gcs_upload( + file = tmp, + key = key, + bucket = bucket, + max_tries = 5 + ) + } + out <- gcp_gcs_list_md5s(prefix = "", bucket = bucket) + expect_equal(length(out), 4L) + expect_equal(sort(names(out)), sort(c("w", "x", "y", "z"))) + for (etag in out) { + expect_true(is.character(etag)) + expect_true(!anyNA(etag)) + expect_equal(length(etag), 1L) + expect_gt(nchar(etag), 10L) + } +}) From de970fec30a2016d71a204259fcdc390fdb4ab17 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 11:43:31 -0500 Subject: [PATCH 05/48] Switch to ETags for AWS S3 targets --- R/class_aws.R | 4 ++-- R/utils_aws.R | 2 +- tests/aws/test-aws_endpoint.R | 8 ++++---- tests/aws/test-aws_hpc.R | 2 +- tests/aws/test-aws_long.R | 2 +- tests/aws/test-aws_meta.R | 14 +++++++------- tests/gcp/test-gcp_hpc.R | 2 +- tests/gcp/test-gcp_long.R | 2 +- tests/gcp/test-gcp_meta.R | 14 +++++++------- 9 files changed, 25 insertions(+), 25 deletions(-) diff --git a/R/class_aws.R b/R/class_aws.R index aae074599..b8ab7a1e3 100644 --- a/R/class_aws.R +++ b/R/class_aws.R @@ -224,7 +224,6 @@ store_upload_object_aws <- function(store) { bucket = bucket, region = store_aws_region(store$file$path), endpoint = store_aws_endpoint(store$file$path), - metadata = list("targets-hash" = store$file$hash), part_size = aws$part_size, args = aws$args, max_tries = aws$max_tries, @@ -247,6 +246,7 @@ store_upload_object_aws <- function(store) { invert = TRUE ) store$file$path <- c(path, paste0("version=", head$VersionId)) + store$file$hash <- digest_chr64(head$ETag) invisible() } @@ -271,7 +271,7 @@ store_aws_hash <- function(store) { close_connection = aws$close_connection, s3_force_path_style = aws$s3_force_path_style ) - head$Metadata[["targets-hash"]] + digest_chr64(head$ETag) } # nocov end diff --git a/R/utils_aws.R b/R/utils_aws.R index 3e4ab47cc..65fd4ee14 100644 --- a/R/utils_aws.R +++ b/R/utils_aws.R @@ -102,7 +102,7 @@ aws_s3_list_etags <- function( out <- list() for (page in pages) { for (object in page$Contents) { - out[[object$Key]] <- eval(parse(text = object$ETag)) + out[[object$Key]] <- object$ETag } } out diff --git a/tests/aws/test-aws_endpoint.R b/tests/aws/test-aws_endpoint.R index 9dd30b46a..9c9f522f4 100644 --- a/tests/aws/test-aws_endpoint.R +++ b/tests/aws/test-aws_endpoint.R @@ -39,7 +39,7 @@ tar_test("aws qs format data gets stored", { }) expr <- tar_tidy_eval(expr, environment(), TRUE) eval(as.call(list(`tar_script`, expr, ask = FALSE))) - tar_make(callr_function = NULL) + tar_make(callr_function = NULL, reporter = "silent") expect_true( aws_s3_exists( bucket = bucket_name, @@ -97,10 +97,10 @@ tar_test("aws qs format invalidation", { }) expr <- tar_tidy_eval(expr, environment(), TRUE) eval(as.call(list(`tar_script`, expr, ask = FALSE))) - tar_make(callr_function = NULL) + tar_make(callr_function = NULL, reporter = "silent") expect_equal(tar_progress(x)$progress, "built") expect_equal(tar_progress(y)$progress, "built") - tar_make(callr_function = NULL) + tar_make(callr_function = NULL, reporter = "silent") progress <- tar_progress() progress <- progress[progress$progress != "skipped", ] expect_equal(nrow(progress), 0L) @@ -122,7 +122,7 @@ tar_test("aws qs format invalidation", { }) expr <- tar_tidy_eval(expr, environment(), TRUE) eval(as.call(list(`tar_script`, expr, ask = FALSE))) - tar_make(callr_function = NULL) + tar_make(callr_function = NULL, reporter = "silent") expect_equal(tar_progress(x)$progress, "built") expect_equal(tar_progress(y)$progress, "built") expect_equal(tar_read(x), "x_value2") diff --git a/tests/aws/test-aws_hpc.R b/tests/aws/test-aws_hpc.R index b4062d19a..3235cbb4b 100644 --- a/tests/aws/test-aws_hpc.R +++ b/tests/aws/test-aws_hpc.R @@ -29,6 +29,6 @@ tar_test("AWS S3 + HPC", { ) }, env = list(bucket_name = bucket_name)) do.call(tar_script, list(code = code)) - tar_make_future() + tar_make_future(reporter = "silent") expect_equal(tar_read(c), 2L) }) diff --git a/tests/aws/test-aws_long.R b/tests/aws/test-aws_long.R index 781933d1c..690ab871e 100644 --- a/tests/aws/test-aws_long.R +++ b/tests/aws/test-aws_long.R @@ -39,7 +39,7 @@ tar_test("pipeline continuously uploads metadata", { expr <- tar_tidy_eval(expr, environment(), TRUE) eval(as.call(list(`tar_script`, expr, ask = FALSE))) R.utils::withTimeout( - expr = tar_make(seconds_meta_upload = 1), + expr = tar_make(seconds_meta_upload = 1, reporter = "silent"), timeout = 30, onTimeout = "silent" ) diff --git a/tests/aws/test-aws_meta.R b/tests/aws/test-aws_meta.R index 830cca86e..17e30e067 100644 --- a/tests/aws/test-aws_meta.R +++ b/tests/aws/test-aws_meta.R @@ -29,7 +29,7 @@ tar_test("AWS meta", { ) }, env = list(bucket_name = bucket_name)) do.call(tar_script, list(code = code)) - tar_make() + tar_make(reporter = "silent") expect_true(all(tar_progress()$progress == "built")) expect_equal(tar_read(c), 2L) for (file in c("meta", "process", "progress", "crew")) { @@ -52,7 +52,7 @@ tar_test("AWS meta", { } unlink(path_meta(path_store_default())) expect_equal(sort(tar_outdated()), sort(c("a", "b", "c"))) - tar_make() + tar_make(reporter = "silent") expect_true(all(tar_progress()$progress == "skipped")) tar_destroy() for (file in c("meta", "process", "progress", "crew")) { @@ -106,7 +106,7 @@ tar_test("AWS tar_meta_delete()", { ) }, env = list(bucket_name = bucket_name)) do.call(tar_script, list(code = code)) - tar_make() + tar_make(reporter = "silent") expect_true(all(tar_progress()$progress == "built")) expect_equal(tar_read(c), 2L) for (file in c("meta", "process", "progress", "crew")) { @@ -179,7 +179,7 @@ tar_test("AWS tar_meta_upload()", { ) }, env = list(bucket_name = bucket_name)) do.call(tar_script, list(code = code)) - tar_make() + tar_make(reporter = "silent") expect_true(all(tar_progress()$progress == "built")) expect_equal(tar_read(c), 2L) for (file in c("meta", "process", "progress", "crew")) { @@ -244,7 +244,7 @@ tar_test("AWS tar_meta_download()", { ) }, env = list(bucket_name = bucket_name)) do.call(tar_script, list(code = code)) - tar_make() + tar_make(reporter = "silent") expect_true(all(tar_progress()$progress == "built")) expect_equal(tar_read(c), 2L) for (file in c("meta", "process", "progress", "crew")) { @@ -297,7 +297,7 @@ tar_test("AWS tar_meta_sync() upload", { ) }, env = list(bucket_name = bucket_name)) do.call(tar_script, list(code = code)) - tar_make() + tar_make(reporter = "silent") expect_true(all(tar_progress()$progress == "built")) expect_equal(tar_read(c), 2L) for (file in c("meta", "process", "progress", "crew")) { @@ -362,7 +362,7 @@ tar_test("AWS tar_meta_sync() download", { ) }, env = list(bucket_name = bucket_name)) do.call(tar_script, list(code = code)) - tar_make() + tar_make(reporter = "silent") expect_true(all(tar_progress()$progress == "built")) expect_equal(tar_read(c), 2L) for (file in c("meta", "process", "progress", "crew")) { diff --git a/tests/gcp/test-gcp_hpc.R b/tests/gcp/test-gcp_hpc.R index 50fe7fc23..3fa63950d 100644 --- a/tests/gcp/test-gcp_hpc.R +++ b/tests/gcp/test-gcp_hpc.R @@ -31,6 +31,6 @@ tar_test("gcp + HPC", { ) }, env = list(bucket_name = bucket_name)) do.call(tar_script, list(code = code)) - tar_make_future() + tar_make_future(reporter = "silent") expect_equal(tar_read(c), 2L) }) diff --git a/tests/gcp/test-gcp_long.R b/tests/gcp/test-gcp_long.R index 4dd4b76d3..0eb36a5f4 100644 --- a/tests/gcp/test-gcp_long.R +++ b/tests/gcp/test-gcp_long.R @@ -41,7 +41,7 @@ tar_test("pipeline continuously uploads metadata", { expr <- tar_tidy_eval(expr, environment(), TRUE) eval(as.call(list(`tar_script`, expr, ask = FALSE))) R.utils::withTimeout( - expr = tar_make(seconds_meta_upload = 1), + expr = tar_make(seconds_meta_upload = 1, reporter = "silent"), timeout = 30, onTimeout = "silent" ) diff --git a/tests/gcp/test-gcp_meta.R b/tests/gcp/test-gcp_meta.R index e64d8b804..a69e25dcd 100644 --- a/tests/gcp/test-gcp_meta.R +++ b/tests/gcp/test-gcp_meta.R @@ -30,7 +30,7 @@ tar_test("gcp meta", { ) }, env = list(bucket_name = bucket_name)) do.call(tar_script, list(code = code)) - tar_make() + tar_make(reporter = "silent") expect_true(all(tar_progress()$progress == "built")) expect_equal(tar_read(c), 2L) for (file in c("meta", "process", "progress", "crew")) { @@ -53,7 +53,7 @@ tar_test("gcp meta", { } unlink(path_meta(path_store_default())) expect_equal(sort(tar_outdated()), sort(c("a", "b", "c"))) - tar_make() + tar_make(reporter = "silent") expect_true(all(tar_progress()$progress == "skipped")) tar_destroy() for (file in c("meta", "process", "progress", "crew")) { @@ -108,7 +108,7 @@ tar_test("gcp tar_meta_delete()", { ) }, env = list(bucket_name = bucket_name)) do.call(tar_script, list(code = code)) - tar_make() + tar_make(reporter = "silent") expect_true(all(tar_progress()$progress == "built")) expect_equal(tar_read(c), 2L) for (file in c("meta", "process", "progress", "crew")) { @@ -182,7 +182,7 @@ tar_test("gcp tar_meta_upload()", { ) }, env = list(bucket_name = bucket_name)) do.call(tar_script, list(code = code)) - tar_make() + tar_make(reporter = "silent") expect_true(all(tar_progress()$progress == "built")) expect_equal(tar_read(c), 2L) for (file in c("meta", "process", "progress", "crew")) { @@ -248,7 +248,7 @@ tar_test("gcp tar_meta_download()", { ) }, env = list(bucket_name = bucket_name)) do.call(tar_script, list(code = code)) - tar_make() + tar_make(reporter = "silent") expect_true(all(tar_progress()$progress == "built")) expect_equal(tar_read(c), 2L) for (file in c("meta", "process", "progress", "crew")) { @@ -302,7 +302,7 @@ tar_test("gcp tar_meta_sync() upload", { ) }, env = list(bucket_name = bucket_name)) do.call(tar_script, list(code = code)) - tar_make() + tar_make(reporter = "silent") expect_true(all(tar_progress()$progress == "built")) expect_equal(tar_read(c), 2L) for (file in c("meta", "process", "progress", "crew")) { @@ -368,7 +368,7 @@ tar_test("gcp tar_meta_sync() download", { ) }, env = list(bucket_name = bucket_name)) do.call(tar_script, list(code = code)) - tar_make() + tar_make(reporter = "silent") expect_true(all(tar_progress()$progress == "built")) expect_equal(tar_read(c), 2L) for (file in c("meta", "process", "progress", "crew")) { From ab200be7cc97367b9c1154807cdde7da69dbf367 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 11:52:50 -0500 Subject: [PATCH 06/48] news --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 94c7d2187..d27a877d2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,7 @@ Because of the changes below, upgrading to this version of `targets` will unavoidably invalidate previously built targets in existing pipelines. Your pipeline code should still work, but any targets you ran before will most likely need to rerun after the upgrade. * Use SHA512 during the creation of target-specific pseudo-random number generator seeds (#1139). This change decreases the risk of overlapping/correlated random number generator streams. See the "RNG overlap" section of the `tar_seed_create()` help file for details and justification. Unfortunately, this change will invalidate all currently built targets because the seeds will be different. To avoid rerunning your whole pipeline, set `cue = tar_cue(seed = FALSE)` in `tar_target()`. +* For cloud storage: instead of the hash of the local file, use the ETag for AWS S3 targets and the MD5 hash for GCP GCS targets (#1172). Sanitize with `targets:::digest_chr64()` in both cases before storing the result in the metadata. ## Other changes From e889619dd2447f24ee1032bc98262b9edd301ed0 Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 10 Nov 2023 12:06:51 -0500 Subject: [PATCH 07/48] Use MD5s for gcp targets --- R/class_gcp.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/class_gcp.R b/R/class_gcp.R index 009c7d734..9eeb2f0f1 100644 --- a/R/class_gcp.R +++ b/R/class_gcp.R @@ -183,7 +183,6 @@ store_upload_object_gcp <- function(store) { file = store$file$stage, key = key, bucket = bucket, - metadata = list("targets-hash" = store$file$hash), predefined_acl = store$resources$gcp$predefined_acl %|||% "private", verbose = store$resources$gcp$verbose %|||% FALSE, max_tries = store$resources$gcp$max_tries %|||% 5L @@ -203,6 +202,7 @@ store_upload_object_gcp <- function(store) { invert = TRUE ) store$file$path <- c(path, paste0("version=", head$generation)) + store$file$hash <- digest_chr64(head$md5) invisible() } @@ -221,7 +221,7 @@ store_gcp_hash <- function(store) { verbose = store$resources$gcp$verbose %|||% FALSE, max_tries = store$resources$gcp$max_tries %|||% 5L ) - head$metadata[["targets-hash"]] + digest_chr64(head$md5) } # nocov end From 63cd3596eaf32c35078ac73b7214a80df61dd8aa Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 13:15:55 -0500 Subject: [PATCH 08/48] More cloud resource fields --- R/class_resources_aws.R | 37 ++++++---- R/class_resources_gcp.R | 23 +++--- R/tar_resources_aws.R | 49 +++++++++---- R/tar_resources_gcp.R | 3 + man/tar_resources_aws.Rd | 40 ++++++++--- man/tar_resources_gcp.Rd | 11 +++ tests/testthat/test-tar_resources_aws.R | 93 +++++++++++++++++++++++++ tests/testthat/test-tar_resources_gcp.R | 39 +++++++++++ 8 files changed, 249 insertions(+), 46 deletions(-) diff --git a/R/class_resources_aws.R b/R/class_resources_aws.R index 560a94325..d9149d625 100644 --- a/R/class_resources_aws.R +++ b/R/class_resources_aws.R @@ -2,24 +2,30 @@ resources_aws_init <- function( bucket = NULL, prefix = tar_path_objects_dir_cloud(), region = NULL, - part_size = 5 * (2 ^ 20), endpoint = NULL, + s3_force_path_style = NULL, + version = "latest", + part_size = 5 * (2 ^ 20), + page_size = 1000L, max_tries = NULL, seconds_timeout = NULL, close_connection = NULL, - s3_force_path_style = NULL, + verbose = TRUE, args = list() ) { resources_aws_new( bucket = bucket, prefix = prefix, region = region, - part_size = part_size, endpoint = endpoint, + s3_force_path_style = s3_force_path_style, + version = version, + part_size = part_size, + page_size = page_size, max_tries = max_tries, seconds_timeout = seconds_timeout, close_connection = close_connection, - s3_force_path_style = s3_force_path_style, + verbose = verbose, args = args ) } @@ -28,51 +34,58 @@ resources_aws_new <- function( bucket = NULL, prefix = NULL, region = NULL, - part_size = NULL, endpoint = NULL, + s3_force_path_style = NULL, + version = NULL, + part_size = NULL, + page_size = NULL, max_tries = NULL, seconds_timeout = NULL, close_connection = NULL, - s3_force_path_style = NULL, + verbose = TRUE, args = NULL ) { force(bucket) force(prefix) force(region) - force(part_size) force(endpoint) + force(s3_force_path_style) + force(version) + force(part_size) + force(page_size) force(max_tries) force(seconds_timeout) force(close_connection) - force(s3_force_path_style) + force(verbose) force(args) enclass(environment(), c("tar_resources_aws", "tar_resources")) } #' @export resources_validate.tar_resources_aws <- function(resources) { - for (field in c("bucket", "prefix")) { + for (field in c("bucket", "prefix", "version")) { tar_assert_scalar(resources[[field]]) tar_assert_chr(resources[[field]]) tar_assert_none_na(resources[[field]]) tar_assert_nzchar(resources[[field]]) } - for (field in c("region", "endpiont")) { + for (field in c("region", "endpoint")) { tar_assert_scalar(resources[[field]] %|||% "x") tar_assert_chr(resources[[field]] %|||% "x") tar_assert_none_na(resources[[field]] %|||% "x") } - for (field in c("part_size", "max_tries", "seconds_timeout")) { + for (field in c("part_size", "page_size", "max_tries", "seconds_timeout")) { tar_assert_scalar(resources[[field]] %|||% 1L) tar_assert_dbl(resources[[field]] %|||% 1L) tar_assert_none_na(resources[[field]] %|||% 1L) tar_assert_ge(resources[[field]] %|||% 1L, 0L) } - for (field in c("close_connection", "s3_force_path_style")) { + for (field in c("close_connection", "s3_force_path_style", "verbose")) { tar_assert_scalar(resources[[field]] %|||% TRUE) tar_assert_lgl(resources[[field]] %|||% TRUE) tar_assert_none_na(resources[[field]] %|||% TRUE) } + tar_assert_in(resources$version, c("latest", "meta")) resources_aws_validate_args(resources$args) } diff --git a/R/class_resources_gcp.R b/R/class_resources_gcp.R index 9e90ed145..8a3d19a2c 100644 --- a/R/class_resources_gcp.R +++ b/R/class_resources_gcp.R @@ -1,6 +1,7 @@ resources_gcp_init <- function( bucket = NULL, prefix = tar_path_objects_dir_cloud(), + version = "latest", predefined_acl = "private", max_tries = 5L, verbose = FALSE @@ -8,6 +9,7 @@ resources_gcp_init <- function( resources_gcp_new( bucket = bucket, prefix = prefix, + version = version, predefined_acl = predefined_acl, max_tries = max_tries, verbose = verbose @@ -17,12 +19,14 @@ resources_gcp_init <- function( resources_gcp_new <- function( bucket = NULL, prefix = NULL, + version = NULL, predefined_acl = NULL, max_tries = NULL, verbose = NULL ) { force(bucket) force(prefix) + force(version) force(predefined_acl) force(max_tries) force(verbose) @@ -31,22 +35,19 @@ resources_gcp_new <- function( #' @export resources_validate.tar_resources_gcp <- function(resources) { - message <- "GCP resources require a valid bucket name." - tar_assert_scalar(resources$bucket, msg = message) - tar_assert_chr(resources$bucket, msg = message) - tar_assert_none_na(resources$bucket, msg = message) - tar_assert_nzchar(resources$bucket, msg = message) - tar_assert_scalar(resources$prefix) - tar_assert_chr(resources$prefix) - tar_assert_nzchar(resources$prefix) - tar_assert_scalar(resources$predefined_acl) - tar_assert_chr(resources$predefined_acl) - tar_assert_nzchar(resources$predefined_acl) + for (field in c("bucket", "prefix", "predefined_acl")) { + message <- paste("GCP resources require a valid", field) + tar_assert_scalar(resources[[field]], msg = message) + tar_assert_chr(resources[[field]], msg = message) + tar_assert_none_na(resources[[field]], msg = message) + tar_assert_nzchar(resources[[field]], msg = message) + } tar_assert_scalar(resources$max_tries %|||% 1L) tar_assert_dbl(resources$max_tries %|||% 1L) tar_assert_none_na(resources$max_tries %|||% 1L) tar_assert_ge(resources$max_tries %|||% 1L, 0L) tar_assert_scalar(resources$verbose) + tar_assert_in(resources$version, c("latest", "meta")) tar_assert_lgl(resources$verbose) } diff --git a/R/tar_resources_aws.R b/R/tar_resources_aws.R index bf8b7cb8f..4cbbee8b0 100644 --- a/R/tar_resources_aws.R +++ b/R/tar_resources_aws.R @@ -23,11 +23,6 @@ #' was added on 2023-08-24: `targets` version 1.2.2.9000.) #' @param region Character of length 1, AWS region containing the S3 bucket. #' Set to `NULL` to use the default region. -#' @param part_size Positive numeric of length 1, number of bytes -#' for each part of a multipart upload. (Except the last part, -#' which is the remainder.) In a multipart upload, each part -#' must be at least 5 MB. The default value of the `part_size` -#' argument is `5 * (2 ^ 20)`. #' @param endpoint Character of length 1, URL endpoint for S3 storage. #' Defaults to the Amazon AWS endpoint if `NULL`. Example: #' To use the S3 protocol with Google Cloud Storage, @@ -45,14 +40,33 @@ #' object versioning turned on, `targets` may fail to record object #' versions. Google Cloud Storage in particular has this #' incompatibility. +#' @param s3_force_path_style Logical of length 1, whether to use path-style +#' addressing for S3 requests. +#' @param version Character of length 1: `"latest"` to read the latest +#' version of the target in the bucket (default), or `"meta"` to +#' read the version recorded in the metadata. This affects how `targets` +#' downloads target data and makes sure it is up to date. `"latest"` +#' is sufficient for most cases. Use `"meta"` if you are reverting to +#' a historical copy of the metadata (`_targets/meta/meta`) and wish to use +#' `targets` to use the corresponding old copies of versioned data in a +#' versioned bucket. The `version` argument is only applicable if +#' the bucket has versioning enabled. +#' @param part_size Positive numeric of length 1, number of bytes +#' for each part of a multipart upload. (Except the last part, +#' which is the remainder.) In a multipart upload, each part +#' must be at least 5 MB. The default value of the `part_size` +#' argument is `5 * (2 ^ 20)`. +#' @param page_size Positive integer of length 1, number of items in each +#' page for paginated HTTP requests such as listing objects. #' @param max_tries Positive integer of length 1, maximum number of attempts #' to access a network resource on AWS. #' @param seconds_timeout Positive numeric of length 1, #' number of seconds until an HTTP connection times out. #' @param close_connection Logical of length 1, whether to close HTTP #' connections immediately. -#' @param s3_force_path_style Logical of length 1, whether to use path-style -#' addressing for S3 requests. +#' @param verbose Logical of length 1, whether to print console messages +#' when running computationally expensive operations such as listing +#' objects in a large bucket. #' @param ... Named arguments to functions in `paws.storage::s3()` to manage #' S3 storage. The documentation of these specific functions #' is linked from `https://www.paws-r-sdk.com/docs/s3/`. @@ -89,14 +103,17 @@ tar_resources_aws <- function( bucket = targets::tar_option_get("resources")$aws$bucket, prefix = targets::tar_option_get("resources")$aws$prefix, region = targets::tar_option_get("resources")$aws$region, - part_size = targets::tar_option_get("resources")$aws$part_size, endpoint = targets::tar_option_get("resources")$aws$endpoint, - max_tries = targets::tar_option_get("resources")$aws$max_tries, - seconds_timeout = targets::tar_option_get("resources")$aws$seconds_timeout, - close_connection = targets::tar_option_get("resources")$aws$close_connection, s3_force_path_style = targets::tar_option_get( "resources" )$aws$s3_force_path_style, + version = targets::tar_option_get("resources")$aws$version, + part_size = targets::tar_option_get("resources")$aws$part_size, + page_size = targets::tar_option_get("resources")$aws$page_size, + max_tries = targets::tar_option_get("resources")$aws$max_tries, + seconds_timeout = targets::tar_option_get("resources")$aws$seconds_timeout, + close_connection = targets::tar_option_get("resources")$aws$close_connection, + verbose = targets::tar_option_get("resources")$aws$verbose, ... ) { if (is.null(prefix)) { @@ -104,7 +121,10 @@ tar_resources_aws <- function( prefix <- path_store_default() } prefix <- prefix %|||% targets::tar_path_objects_dir_cloud() + version <- version %|||% "latest" part_size <- part_size %|||% (5 * (2 ^ 20)) + page_size <- page_size %|||% 1000L + verbose <- verbose %|||% TRUE args <- list(...) default_args <- targets::tar_option_get("resources")$aws$args for (name in names(default_args)) { @@ -114,12 +134,15 @@ tar_resources_aws <- function( bucket = bucket, prefix = prefix, region = region, - part_size = part_size, endpoint = endpoint, + s3_force_path_style = s3_force_path_style, + version = version, + part_size = part_size, + page_size = page_size, max_tries = max_tries, seconds_timeout = seconds_timeout, close_connection = close_connection, - s3_force_path_style = s3_force_path_style, + verbose = verbose, args = args ) resources_validate(out) diff --git a/R/tar_resources_gcp.R b/R/tar_resources_gcp.R index 8dce358bf..8a98fc8e6 100644 --- a/R/tar_resources_gcp.R +++ b/R/tar_resources_gcp.R @@ -36,6 +36,7 @@ tar_resources_gcp <- function( bucket = targets::tar_option_get("resources")$gcp$bucket, prefix = targets::tar_option_get("resources")$gcp$prefix, + version = targets::tar_option_get("resources")$gcp$version, predefined_acl = targets::tar_option_get("resources")$gcp$predefined_acl, max_tries = targets::tar_option_get("resources")$gcp$max_tries, verbose = targets::tar_option_get("resources")$gcp$verbose @@ -44,11 +45,13 @@ tar_resources_gcp <- function( tar_warn_prefix() prefix <- path_store_default() } + version <- version %|||% "latest" predefined_acl <- predefined_acl %|||% "private" verbose <- verbose %|||% FALSE out <- resources_gcp_init( bucket = bucket, prefix = prefix, + version = version, predefined_acl = predefined_acl, max_tries = max_tries, verbose = verbose diff --git a/man/tar_resources_aws.Rd b/man/tar_resources_aws.Rd index 116a4fd0f..239c5a93f 100644 --- a/man/tar_resources_aws.Rd +++ b/man/tar_resources_aws.Rd @@ -8,12 +8,15 @@ tar_resources_aws( bucket = targets::tar_option_get("resources")$aws$bucket, prefix = targets::tar_option_get("resources")$aws$prefix, region = targets::tar_option_get("resources")$aws$region, - part_size = targets::tar_option_get("resources")$aws$part_size, endpoint = targets::tar_option_get("resources")$aws$endpoint, + s3_force_path_style = targets::tar_option_get("resources")$aws$s3_force_path_style, + version = targets::tar_option_get("resources")$aws$version, + part_size = targets::tar_option_get("resources")$aws$part_size, + page_size = targets::tar_option_get("resources")$aws$page_size, max_tries = targets::tar_option_get("resources")$aws$max_tries, seconds_timeout = targets::tar_option_get("resources")$aws$seconds_timeout, close_connection = targets::tar_option_get("resources")$aws$close_connection, - s3_force_path_style = targets::tar_option_get("resources")$aws$s3_force_path_style, + verbose = targets::tar_option_get("resources")$aws$verbose, ... ) } @@ -33,12 +36,6 @@ was added on 2023-08-24: \code{targets} version 1.2.2.9000.)} \item{region}{Character of length 1, AWS region containing the S3 bucket. Set to \code{NULL} to use the default region.} -\item{part_size}{Positive numeric of length 1, number of bytes -for each part of a multipart upload. (Except the last part, -which is the remainder.) In a multipart upload, each part -must be at least 5 MB. The default value of the \code{part_size} -argument is \code{5 * (2 ^ 20)}.} - \item{endpoint}{Character of length 1, URL endpoint for S3 storage. Defaults to the Amazon AWS endpoint if \code{NULL}. Example: To use the S3 protocol with Google Cloud Storage, @@ -57,6 +54,28 @@ object versioning turned on, \code{targets} may fail to record object versions. Google Cloud Storage in particular has this incompatibility.} +\item{s3_force_path_style}{Logical of length 1, whether to use path-style +addressing for S3 requests.} + +\item{version}{Character of length 1: \code{"latest"} to read the latest +version of the target in the bucket (default), or \code{"meta"} to +read the version recorded in the metadata. This affects how \code{targets} +downloads target data and makes sure it is up to date. \code{"latest"} +is sufficient for most cases. Use \code{"meta"} if you are reverting to +a historical copy of the metadata (\verb{_targets/meta/meta}) and wish to use +\code{targets} to use the corresponding old copies of versioned data in a +versioned bucket. The \code{version} argument is only applicable if +the bucket has versioning enabled.} + +\item{part_size}{Positive numeric of length 1, number of bytes +for each part of a multipart upload. (Except the last part, +which is the remainder.) In a multipart upload, each part +must be at least 5 MB. The default value of the \code{part_size} +argument is \code{5 * (2 ^ 20)}.} + +\item{page_size}{Positive integer of length 1, number of items in each +page for paginated HTTP requests such as listing objects.} + \item{max_tries}{Positive integer of length 1, maximum number of attempts to access a network resource on AWS.} @@ -66,8 +85,9 @@ number of seconds until an HTTP connection times out.} \item{close_connection}{Logical of length 1, whether to close HTTP connections immediately.} -\item{s3_force_path_style}{Logical of length 1, whether to use path-style -addressing for S3 requests.} +\item{verbose}{Logical of length 1, whether to print console messages +when running computationally expensive operations such as listing +objects in a large bucket.} \item{...}{Named arguments to functions in \code{paws.storage::s3()} to manage S3 storage. The documentation of these specific functions diff --git a/man/tar_resources_gcp.Rd b/man/tar_resources_gcp.Rd index ad67a2992..bad55f3d7 100644 --- a/man/tar_resources_gcp.Rd +++ b/man/tar_resources_gcp.Rd @@ -8,6 +8,7 @@ Google Cloud Storage (GCS)} tar_resources_gcp( bucket = targets::tar_option_get("resources")$gcp$bucket, prefix = targets::tar_option_get("resources")$gcp$prefix, + version = targets::tar_option_get("resources")$gcp$version, predefined_acl = targets::tar_option_get("resources")$gcp$predefined_acl, max_tries = targets::tar_option_get("resources")$gcp$max_tries, verbose = targets::tar_option_get("resources")$gcp$verbose @@ -26,6 +27,16 @@ In the future, \code{targets} will begin requiring explicitly user-supplied prefixes. (This last note was added on 2023-08-24: \code{targets} version 1.2.2.9000.)} +\item{version}{Character of length 1: \code{"latest"} to read the latest +version of the target in the bucket (default), or \code{"meta"} to +read the version recorded in the metadata. This affects how \code{targets} +downloads target data and makes sure it is up to date. \code{"latest"} +is sufficient for most cases. Use \code{"meta"} if you are reverting to +a historical copy of the metadata (\verb{_targets/meta/meta}) and wish to use +\code{targets} to use the corresponding old copies of versioned data in a +versioned bucket. The \code{version} argument is only applicable if +the bucket has versioning enabled.} + \item{predefined_acl}{Character of length 1, user access to the object. See \code{?googleCloudStorageR::gcs_upload} for possible values. Defaults to \code{"private"}.} diff --git a/tests/testthat/test-tar_resources_aws.R b/tests/testthat/test-tar_resources_aws.R index f46e9f3c0..28bc8f822 100644 --- a/tests/testthat/test-tar_resources_aws.R +++ b/tests/testthat/test-tar_resources_aws.R @@ -195,3 +195,96 @@ tar_test("tar_resources_aws() wants a prefix", { class = "tar_condition_deprecate" ) }) + +tar_test("tar_resources_aws() verbose", { + skip_cran() + skip_on_os("windows") + skip_if_not_installed("paws.storage") + tar_option_set( + resources = tar_resources( + aws = tar_resources_aws( + prefix = "x", + bucket = "x" + ) + ) + ) + out <- tar_resources_aws() + expect_true(out$verbose) + tar_option_set( + resources = tar_resources( + aws = tar_resources_aws( + verbose = FALSE, + prefix = "x", + bucket = "x" + ) + ) + ) + out <- tar_resources_aws() + expect_false(out$verbose) +}) + +tar_test("tar_resources_aws() page_size", { + skip_cran() + skip_on_os("windows") + skip_if_not_installed("paws.storage") + tar_option_set( + resources = tar_resources( + aws = tar_resources_aws( + prefix = "x", + bucket = "x" + ) + ) + ) + out <- tar_resources_aws() + expect_equal(out$page_size, 1000L) + tar_option_set( + resources = tar_resources( + aws = tar_resources_aws( + page_size = 3L, + prefix = "x", + bucket = "x" + ) + ) + ) + out <- tar_resources_aws() + expect_equal(out$page_size, 3L) +}) + +tar_test("tar_resources_aws() version", { + skip_cran() + skip_on_os("windows") + skip_if_not_installed("paws.storage") + tar_option_set( + resources = tar_resources( + aws = tar_resources_aws( + prefix = "x", + bucket = "x" + ) + ) + ) + out <- tar_resources_aws() + expect_equal(out$version, "latest") + tar_option_set( + resources = tar_resources( + aws = tar_resources_aws( + version = "meta", + prefix = "x", + bucket = "x" + ) + ) + ) + out <- tar_resources_aws() + expect_equal(out$version, "meta") + expect_error( + tar_option_set( + resources = tar_resources( + aws = tar_resources_aws( + version = "nope", + prefix = "x", + bucket = "x" + ) + ) + ), + class = "tar_condition_validate" + ) +}) diff --git a/tests/testthat/test-tar_resources_gcp.R b/tests/testthat/test-tar_resources_gcp.R index 0f8fa4110..e80984f14 100644 --- a/tests/testthat/test-tar_resources_gcp.R +++ b/tests/testthat/test-tar_resources_gcp.R @@ -74,3 +74,42 @@ tar_test("tar_resources_gcp() wants a prefix", { class = "tar_condition_deprecate" ) }) + +tar_test("tar_resources_gcp() version", { + skip_cran() + skip_on_os("windows") + skip_if_not_installed("googleCloudStorageR") + tar_option_set( + resources = tar_resources( + gcp = tar_resources_gcp( + prefix = "x", + bucket = "x" + ) + ) + ) + out <- tar_resources_gcp() + expect_equal(out$version, "latest") + tar_option_set( + resources = tar_resources( + gcp = tar_resources_gcp( + version = "meta", + prefix = "x", + bucket = "x" + ) + ) + ) + out <- tar_resources_gcp() + expect_equal(out$version, "meta") + expect_error( + tar_option_set( + resources = tar_resources( + gcp = tar_resources_gcp( + version = "nope", + prefix = "x", + bucket = "x" + ) + ) + ), + class = "tar_condition_validate" + ) +}) From 8bc485112eaaafa3d7e234ddda574eff09a3281c Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 13:18:24 -0500 Subject: [PATCH 09/48] news --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index d27a877d2..d4c566634 100644 --- a/NEWS.md +++ b/NEWS.md @@ -18,6 +18,9 @@ Because of the changes below, upgrading to this version of `targets` will unavoi * For AWS S3 targets, `tar_delete()`, `tar_destroy()`, and `tar_prune()` now use efficient batched calls to `delete_objects()` instead of costly individual calls to `delete_object()` (#1171). * Add a new `verbose` argument to `tar_delete()`, `tar_destroy()`, and `tar_prune()`. * Add a new `batch_size` argument to `tar_delete()`, `tar_destroy()`, and `tar_prune()`. +* Add new arguments `page_size`, `version`, and `verbose` to `tar_resources_aws()` (#1172). +* Add new argument `version` to `tar_resources_gcp()` (#1172). + # targets 1.3.2 From cdda2a8df01e269947ca0862288b3bbbd32f88e3 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 13:28:39 -0500 Subject: [PATCH 10/48] store_aws_version_use() --- R/class_aws.R | 9 +++++++++ tests/testthat/test-class_aws.R | 29 ++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/R/class_aws.R b/R/class_aws.R index b8ab7a1e3..d14e913c4 100644 --- a/R/class_aws.R +++ b/R/class_aws.R @@ -101,6 +101,15 @@ store_aws_version <- function(path) { if_any(length(out) && nzchar(out), out, NULL) } +store_aws_version_use <- function(store, path) { + if_any( + is.null(store$resources$aws$version) || + store$resources$aws$version == "latest", + store_aws_version(path), + NULL + ) +} + store_aws_path_field <- function(path, pattern) { path <- store_aws_split_colon(path) keyvalue_field(x = path, pattern = pattern) diff --git a/tests/testthat/test-class_aws.R b/tests/testthat/test-class_aws.R index 8fad288fb..9ff6c35fc 100644 --- a/tests/testthat/test-class_aws.R +++ b/tests/testthat/test-class_aws.R @@ -66,7 +66,34 @@ tar_test("store_aws_version()", { expect_null(store_aws_version(letters)) }) -tar_test("store_aws_version()", { +tar_test("store_aws_version_use()", { + target <- tar_target(x, 1, repository = "aws") + path <- c("bucket=b", "version=number") + expect_equal(store_aws_version_use(target$store, path), "number") + expect_null(store_aws_version_use(target$store, letters)) + resources <- tar_resources( + aws = tar_resources_aws( + bucket = "x", + prefix = "y", + version = "latest" + ) + ) + target <- tar_target(x, 1, repository = "aws", resources = resources) + expect_equal(store_aws_version_use(target$store, path), "number") + expect_null(store_aws_version_use(target$store, letters)) + resources <- tar_resources( + aws = tar_resources_aws( + bucket = "x", + prefix = "y", + version = "meta" + ) + ) + target <- tar_target(x, 1, repository = "aws", resources = resources) + expect_null(store_aws_version_use(target$store, path)) + expect_null(store_aws_version_use(target$store, letters)) +}) + +tar_test("store_aws_version() endpoint", { path <- c( "bucket=b", sprintf("endpoint=%s", base64url::base64_urlencode("answer")) From f18a297c506eec2a8a05787f2a104efa9d7aa2a8 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 13:30:54 -0500 Subject: [PATCH 11/48] store_gcp_version_use() --- R/class_gcp.R | 9 +++++++++ tests/testthat/test-class_gcp.R | 27 +++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/R/class_gcp.R b/R/class_gcp.R index 9eeb2f0f1..0bc38de19 100644 --- a/R/class_gcp.R +++ b/R/class_gcp.R @@ -57,6 +57,15 @@ store_gcp_version <- function(path) { if_any(length(out) && nzchar(out), out, NULL) } +store_gcp_version_use <- function(store, path) { + if_any( + is.null(store$resources$gcp$version) || + store$resources$gcp$version == "latest", + store_gcp_version(path), + NULL + ) +} + store_gcp_path_field <- function(path, pattern) { keyvalue_field(x = path, pattern = pattern) } diff --git a/tests/testthat/test-class_gcp.R b/tests/testthat/test-class_gcp.R index 53eb86c2a..8211083ac 100644 --- a/tests/testthat/test-class_gcp.R +++ b/tests/testthat/test-class_gcp.R @@ -32,6 +32,33 @@ tar_test("store_gcp_version()", { expect_null(store_gcp_version(letters)) }) +tar_test("store_gcp_version_use()", { + target <- tar_target(x, 1, repository = "gcp") + path <- c("bucket=b", "version=number") + expect_equal(store_gcp_version_use(target$store, path), "number") + expect_null(store_gcp_version_use(target$store, letters)) + resources <- tar_resources( + gcp = tar_resources_gcp( + bucket = "x", + prefix = "y", + version = "latest" + ) + ) + target <- tar_target(x, 1, repository = "gcp", resources = resources) + expect_equal(store_gcp_version_use(target$store, path), "number") + expect_null(store_gcp_version_use(target$store, letters)) + resources <- tar_resources( + gcp = tar_resources_gcp( + bucket = "x", + prefix = "y", + version = "meta" + ) + ) + target <- tar_target(x, 1, repository = "gcp", resources = resources) + expect_null(store_gcp_version_use(target$store, path)) + expect_null(store_gcp_version_use(target$store, letters)) +}) + tar_test("package detection", { skip_cran() target <- tar_target(x, "x_value", format = "feather", repository = "gcp") From c20b4b51e33863dab20d454b0e2a0744a9b4cbef Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 13:51:55 -0500 Subject: [PATCH 12/48] Align on cloud versioning policy --- NAMESPACE | 1 - NEWS.md | 1 - R/class_aws.R | 11 +++-- R/class_aws_file.R | 2 +- R/class_gcp.R | 10 ++-- R/class_gcp_file.R | 2 +- R/tar_delete.R | 2 +- R/tar_destroy.R | 2 +- R/tar_load.R | 1 + R/tar_load_raw.R | 1 + R/tar_prune.R | 1 + R/tar_read.R | 26 +++++++++++ R/tar_read_raw.R | 1 + R/tar_unversion.R | 55 ---------------------- _pkgdown.yml | 1 - man/tar_delete.Rd | 36 ++++++++------- man/tar_destroy.Rd | 36 ++++++++------- man/tar_invalidate.Rd | 3 +- man/tar_load.Rd | 29 ++++++++++++ man/tar_load_raw.Rd | 29 ++++++++++++ man/tar_prune.Rd | 32 ++++++++++++- man/tar_prune_list.Rd | 3 +- man/tar_read.Rd | 29 ++++++++++++ man/tar_read_raw.Rd | 29 ++++++++++++ man/tar_unversion.Rd | 63 ------------------------- tests/testthat/test-tar_unversion.R | 71 ----------------------------- 26 files changed, 234 insertions(+), 243 deletions(-) delete mode 100644 R/tar_unversion.R delete mode 100644 man/tar_unversion.Rd delete mode 100644 tests/testthat/test-tar_unversion.R diff --git a/NAMESPACE b/NAMESPACE index 9b3f96669..eede48c4d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -495,7 +495,6 @@ export(tar_timestamp_raw) export(tar_toggle) export(tar_traceback) export(tar_unscript) -export(tar_unversion) export(tar_validate) export(tar_visnetwork) export(tar_warn_deprecate) diff --git a/NEWS.md b/NEWS.md index d4c566634..8fad9703a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,7 +14,6 @@ Because of the changes below, upgrading to this version of `targets` will unavoi * Add function `tar_seed_set()` which sets a seed and sets all the RNG algorithms to their defaults in the R installation of the user. Each target now uses `tar_seed_set()` function to set its seed before running its R command (#1139). * Deprecate `tar_seed()` in favor of the new `tar_seed_get()` function. * Migrate to the changes in `clustermq` 0.9.0 (@mschubert). -* Add a new `tar_unversion()` function to remove version IDs from the metadata of cloud targets. This makes it easier to remove all versions of target data using functions `tar_destroy()` and `tar_delete()`. * For AWS S3 targets, `tar_delete()`, `tar_destroy()`, and `tar_prune()` now use efficient batched calls to `delete_objects()` instead of costly individual calls to `delete_object()` (#1171). * Add a new `verbose` argument to `tar_delete()`, `tar_destroy()`, and `tar_prune()`. * Add a new `batch_size` argument to `tar_delete()`, `tar_destroy()`, and `tar_prune()`. diff --git a/R/class_aws.R b/R/class_aws.R index d14e913c4..a398b4e19 100644 --- a/R/class_aws.R +++ b/R/class_aws.R @@ -147,7 +147,7 @@ store_read_object.tar_aws <- function(store) { file = scratch, region = store_aws_region(path), endpoint = store_aws_endpoint(path), - version = store_aws_version(path), + version = store_aws_version_use(store, path), args = aws$args, max_tries = aws$max_tries, seconds_timeout = aws$seconds_timeout, @@ -166,7 +166,7 @@ store_exist_object.tar_aws <- function(store, name = NULL) { bucket = store_aws_bucket(path), region = store_aws_region(path), endpoint = store_aws_endpoint(path), - version = store_aws_version(path), + version = store_aws_version_use(store, path), args = aws$args, max_tries = aws$max_tries, seconds_timeout = aws$seconds_timeout, @@ -187,7 +187,10 @@ store_delete_objects.tar_aws <- function(store, meta, batch_size, verbose) { endpoint <- store_aws_endpoint(example_path) objects <- map( subset$path, - ~list(Key = store_aws_key(.x), VersionId = store_aws_version(.x)) + ~list( + Key = store_aws_key(.x), + VersionId = store_aws_version_use(store, .x) + ) ) message <- paste( "could not delete one or more objects from AWS bucket %s.", @@ -273,7 +276,7 @@ store_aws_hash <- function(store) { bucket = store_aws_bucket(path), region = store_aws_region(path), endpoint = store_aws_endpoint(path), - version = store_aws_version(path), + version = store_aws_version_use(store, path), args = aws$args, max_tries = aws$max_tries, seconds_timeout = aws$seconds_timeout, diff --git a/R/class_aws_file.R b/R/class_aws_file.R index 2626b79d7..2f85373c1 100644 --- a/R/class_aws_file.R +++ b/R/class_aws_file.R @@ -66,7 +66,7 @@ store_read_object.tar_aws_file <- function(store) { file = scratch, region = store_aws_region(path), endpoint = store_aws_endpoint(path), - version = store_aws_version(path), + version = store_aws_version_use(store, path), args = aws$args, max_tries = aws$max_tries, seconds_timeout = aws$seconds_timeout, diff --git a/R/class_gcp.R b/R/class_gcp.R index 0bc38de19..fe9eb8f43 100644 --- a/R/class_gcp.R +++ b/R/class_gcp.R @@ -88,7 +88,7 @@ store_read_object.tar_gcp <- function(store) { key = key, bucket = bucket, file = scratch, - version = store_gcp_version(path), + version = store_gcp_version_use(store, path), verbose = store$resources$gcp$verbose, max_tries = store$resources$gcp$max_tries ) @@ -101,7 +101,7 @@ store_exist_object.tar_gcp <- function(store, name = NULL) { gcp_gcs_exists( key = store_gcp_key(path), bucket = store_gcp_bucket(path), - version = store_gcp_version(path), + version = store_gcp_version_use(store, path), verbose = store$resources$gcp$verbose %|||% FALSE, max_tries = store$resources$gcp$max_tries %|||% 5L ) @@ -112,7 +112,7 @@ store_delete_object.tar_gcp <- function(store, name = NULL) { path <- store$file$path key <- store_gcp_key(path) bucket <- store_gcp_bucket(path) - version <- store_gcp_version(path) + version <- store_gcp_version_use(store, path) message <- paste( "could not delete target %s from gcp bucket %s key %s.", "Either delete the object manually in the gcp web console", @@ -145,7 +145,7 @@ store_delete_objects.tar_gcp <- function(store, meta, batch_size, verbose) { for (index in seq_len(nrow(subset))) { path <- subset$path[[index]] key <- store_gcp_key(path) - version <- store_gcp_version(path) + version <- store_gcp_version_use(store, path) message <- paste( "could not object %s from gcp bucket %s.", "You may need to delete it manually.\nMessage: " @@ -226,7 +226,7 @@ store_gcp_hash <- function(store) { head <- gcp_gcs_head( key = store_gcp_key(path), bucket = store_gcp_bucket(path), - version = store_gcp_version(path), + version = store_gcp_version_use(store, path), verbose = store$resources$gcp$verbose %|||% FALSE, max_tries = store$resources$gcp$max_tries %|||% 5L ) diff --git a/R/class_gcp_file.R b/R/class_gcp_file.R index dbfaf50c0..50e6fc666 100644 --- a/R/class_gcp_file.R +++ b/R/class_gcp_file.R @@ -53,7 +53,7 @@ store_read_object.tar_gcp_file <- function(store) { key = key, bucket = bucket, file = scratch, - version = store_gcp_version(path), + version = store_gcp_version_use(store, path), verbose = store$resources$gcp$verbose, max_tries = store$resources$gcp$max_tries ) diff --git a/R/tar_delete.R b/R/tar_delete.R index d07ee3c30..8db633a21 100644 --- a/R/tar_delete.R +++ b/R/tar_delete.R @@ -19,7 +19,7 @@ #' will be deleted. For patterns no longer in the metadata, #' branches are left alone. #' @inheritSection tar_meta Storage access -#' @inheritSection tar_unversion Deleting cloud target data +#' @inheritSection tar_read Cloud target data versioning #' @inheritParams tar_validate #' @param names Names of the targets to remove from `_targets/objects/`. #' You can supply symbols diff --git a/R/tar_destroy.R b/R/tar_destroy.R index 412763427..8a9ff9b30 100644 --- a/R/tar_destroy.R +++ b/R/tar_destroy.R @@ -26,7 +26,7 @@ #' The next run of the pipeline will start from scratch, #' and it will not skip any targets. #' @inheritSection tar_meta Storage access -#' @inheritSection tar_unversion Deleting cloud target data +#' @inheritSection tar_read Cloud target data versioning #' @return `NULL` (invisibly). #' @inheritParams tar_delete #' @inheritParams tar_validate diff --git a/R/tar_load.R b/R/tar_load.R index 2f1ba682a..1255a4e71 100644 --- a/R/tar_load.R +++ b/R/tar_load.R @@ -9,6 +9,7 @@ #' to load all targets. #' @return Nothing. #' @inheritSection tar_meta Storage access +#' @inheritSection tar_read Cloud target data versioning #' @inheritParams tar_load_raw #' @param names Names of the targets to load. #' You may supply `tidyselect` helpers like [any_of()] and [starts_with()]. diff --git a/R/tar_load_raw.R b/R/tar_load_raw.R index 7ef72cb3d..0268e89b3 100644 --- a/R/tar_load_raw.R +++ b/R/tar_load_raw.R @@ -6,6 +6,7 @@ #' or `tarchetypes::tar_render()`. #' @return Nothing. #' @inheritSection tar_meta Storage access +#' @inheritSection tar_read Cloud target data versioning #' @inheritParams tar_read #' @param names Character vector, names of the targets to load. #' Names are expected to appear in the metadata in `_targets/meta`. diff --git a/R/tar_prune.R b/R/tar_prune.R index dddf2dbbc..a8a63b6dc 100644 --- a/R/tar_prune.R +++ b/R/tar_prune.R @@ -6,6 +6,7 @@ #' and remove target metadata from `_targets/meta/meta` #' for targets that are no longer part of the pipeline. #' @inheritSection tar_meta Storage access +#' @inheritSection tar_read Cloud target data versioning #' @details #' `tar_prune()` is useful if you recently worked through #' multiple changes to your project and are now trying to diff --git a/R/tar_read.R b/R/tar_read.R index aa3654936..9a4caae17 100644 --- a/R/tar_read.R +++ b/R/tar_read.R @@ -5,6 +5,32 @@ #' `_targets/objects/`. For dynamic files (i.e. `format = "file"`) #' the paths are returned. #' @inheritSection tar_meta Storage access +#' @section Cloud target data versioning: +#' Some buckets in Amazon S3 or Google Cloud Storage are "versioned", +#' which means they track historical versions of each data object. +#' If you use `targets` with cloud storage +#' () +#' and versioning is turned on, then `targets` will record each +#' version of each target in its metadata. +#' +#' However, by default, +#' `targets` *uses* only the latest version in the bucket. +#' You may instead want to +#' use the specific version of the target recorded in the local metadata +#' (for example, if you previously committed the metadata file +#' `_targets/meta/meta` to version control, and now you want to roll +#' back the code and data together to an earlier point in time). +#' To do this, you will +#' need to modify the `resources` argument of [tar_target()] and/or +#' [tar_option_set()] via [tar_resources()]. In [tar_resources_aws()] +#' or [tar_resources_gcp()], set the `version` argument to `"meta"`. +#' Modifying your code this way in `_targets.R` will control functions that +#' read `_targets.R` when they run, such as [tar_make()], [tar_outdated()], +#' and [tar_visnetwork()]. To apply `version = "meta"` to functions that +#' do not read `_targets.R`, such as [tar_read()] and [tar_load()], +#' set `resources` in [tar_option_set()] in your local R session. +#' You can do this manually, or if you coded those options in `_targets.R`, +#' you can manually run `_targets.R` using [tar_load_globals()]. #' @return The target's return value from its file in #' `_targets/objects/`, or the paths to the custom files and directories #' if `format = "file"` was set. diff --git a/R/tar_read_raw.R b/R/tar_read_raw.R index 7dc7ef525..cde84e406 100644 --- a/R/tar_read_raw.R +++ b/R/tar_read_raw.R @@ -8,6 +8,7 @@ #' `_targets/objects/`, or the paths to the custom files and directories #' if `format = "file"` was set. #' @inheritSection tar_meta Storage access +#' @inheritSection tar_read Cloud target data versioning #' @inheritParams tar_validate #' @param name Character, name of the target to read. #' @param branches Integer of indices of the branches to load diff --git a/R/tar_unversion.R b/R/tar_unversion.R deleted file mode 100644 index 727967b8b..000000000 --- a/R/tar_unversion.R +++ /dev/null @@ -1,55 +0,0 @@ -#' @title Delete cloud object version IDs from local metadata. -#' @export -#' @family clean -#' @description Delete version IDs from local metadata. -#' @section Deleting cloud target data: -#' Some buckets in Amazon S3 or Google Cloud Storage are "versioned", -#' which means they track historical versions of each data object. -#' If you use `targets` with cloud storage -#' () -#' and versioning is turned on, then `targets` will record each -#' version of each target in its metadata. -#' -#' By default, functions [tar_delete()] and [tar_destroy()] only remove -#' the current version ID of each target as recorded in the local -#' metadata. Extra steps are required to remove the *latest* version -#' of each object, whatever that version may be: -#' -#' 1. Make sure your local copy of the metadata is current and -#' up to date. You may need to run [tar_meta_download()] or -#' [tar_meta_sync()]. -#' 2. Run [tar_unversion()] to remove the recorded version IDs of -#' your targets in the local metadata. -#' 3. With the version IDs gone, [tar_delete()] and [tar_destroy()] -#' will delete all the versions of the affected targets. -#' @return `NULL` (invisibly). -#' @param names Tidyselect expression to identify the targets to drop -#' version IDs. -#' @inheritParams tar_validate -tar_unversion <- function( - names = tidyselect::everything(), - store = targets::tar_config_get("store") -) { - tar_assert_allow_meta("tar_unversion", store) - tar_assert_store(store = store) - tar_assert_path(path_meta(store)) - meta <- meta_init(path_store = store) - data <- as.data.frame(meta$database$read_condensed_data()) - names_quosure <- rlang::enquo(names) - names <- tar_tidyselect_eval(names_quosure, data$name) - tar_assert_chr(names, "names arg of tar_unversion() must eval to chr") - replacement <- "version=" - pattern <- paste0("^", replacement, ".*") - unversion <- data$name %in% names & - !is.na(data$repository) & - data$repository != "local" - for (index in which(unversion)) { - data$path[[index]] <- gsub( - pattern = pattern, - replacement = replacement, - x = data$path[[index]] - ) - } - meta$database$overwrite_storage(data) - invisible() -} diff --git a/_pkgdown.yml b/_pkgdown.yml index 52e3e28b1..de686c103 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -101,7 +101,6 @@ reference: - '`tar_prune`' - '`tar_prune_list`' - '`tar_unscript`' - - '`tar_unversion`' - title: Progress contents: - '`tar_poll`' diff --git a/man/tar_delete.Rd b/man/tar_delete.Rd index 7fe694377..b7370f993 100644 --- a/man/tar_delete.Rd +++ b/man/tar_delete.Rd @@ -88,7 +88,7 @@ target factories in the \code{tarchetypes} package such as \code{tar_render()} and \code{tar_quarto()}. } -\section{Deleting cloud target data}{ +\section{Cloud target data versioning}{ Some buckets in Amazon S3 or Google Cloud Storage are "versioned", which means they track historical versions of each data object. @@ -97,19 +97,24 @@ If you use \code{targets} with cloud storage and versioning is turned on, then \code{targets} will record each version of each target in its metadata. -By default, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove -the current version ID of each target as recorded in the local -metadata. Extra steps are required to remove the \emph{latest} version -of each object, whatever that version may be: -\enumerate{ -\item Make sure your local copy of the metadata is current and -up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or -\code{\link[=tar_meta_sync]{tar_meta_sync()}}. -\item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of -your targets in the local metadata. -\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} -will delete all the versions of the affected targets. -} +However, by default, +\code{targets} \emph{uses} only the latest version in the bucket. +You may instead want to +use the specific version of the target recorded in the local metadata +(for example, if you previously committed the metadata file +\verb{_targets/meta/meta} to version control, and now you want to roll +back the code and data together to an earlier point in time). +To do this, you will +need to modify the \code{resources} argument of \code{\link[=tar_target]{tar_target()}} and/or +\code{\link[=tar_option_set]{tar_option_set()}} via \code{\link[=tar_resources]{tar_resources()}}. In \code{\link[=tar_resources_aws]{tar_resources_aws()}} +or \code{\link[=tar_resources_gcp]{tar_resources_gcp()}}, set the \code{version} argument to \code{"meta"}. +Modifying your code this way in \verb{_targets.R} will control functions that +read \verb{_targets.R} when they run, such as \code{\link[=tar_make]{tar_make()}}, \code{\link[=tar_outdated]{tar_outdated()}}, +and \code{\link[=tar_visnetwork]{tar_visnetwork()}}. To apply \code{version = "meta"} to functions that +do not read \verb{_targets.R}, such as \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}}, +set \code{resources} in \code{\link[=tar_option_set]{tar_option_set()}} in your local R session. +You can do this manually, or if you coded those options in \verb{_targets.R}, +you can manually run \verb{_targets.R} using \code{\link[=tar_load_globals]{tar_load_globals()}}. } \examples{ @@ -133,7 +138,6 @@ Other clean: \code{\link{tar_destroy}()}, \code{\link{tar_invalidate}()}, \code{\link{tar_prune_list}()}, -\code{\link{tar_prune}()}, -\code{\link{tar_unversion}()} +\code{\link{tar_prune}()} } \concept{clean} diff --git a/man/tar_destroy.Rd b/man/tar_destroy.Rd index 4a346f4a3..218e29cf8 100644 --- a/man/tar_destroy.Rd +++ b/man/tar_destroy.Rd @@ -135,7 +135,7 @@ target factories in the \code{tarchetypes} package such as \code{tar_render()} and \code{tar_quarto()}. } -\section{Deleting cloud target data}{ +\section{Cloud target data versioning}{ Some buckets in Amazon S3 or Google Cloud Storage are "versioned", which means they track historical versions of each data object. @@ -144,19 +144,24 @@ If you use \code{targets} with cloud storage and versioning is turned on, then \code{targets} will record each version of each target in its metadata. -By default, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove -the current version ID of each target as recorded in the local -metadata. Extra steps are required to remove the \emph{latest} version -of each object, whatever that version may be: -\enumerate{ -\item Make sure your local copy of the metadata is current and -up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or -\code{\link[=tar_meta_sync]{tar_meta_sync()}}. -\item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of -your targets in the local metadata. -\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} -will delete all the versions of the affected targets. -} +However, by default, +\code{targets} \emph{uses} only the latest version in the bucket. +You may instead want to +use the specific version of the target recorded in the local metadata +(for example, if you previously committed the metadata file +\verb{_targets/meta/meta} to version control, and now you want to roll +back the code and data together to an earlier point in time). +To do this, you will +need to modify the \code{resources} argument of \code{\link[=tar_target]{tar_target()}} and/or +\code{\link[=tar_option_set]{tar_option_set()}} via \code{\link[=tar_resources]{tar_resources()}}. In \code{\link[=tar_resources_aws]{tar_resources_aws()}} +or \code{\link[=tar_resources_gcp]{tar_resources_gcp()}}, set the \code{version} argument to \code{"meta"}. +Modifying your code this way in \verb{_targets.R} will control functions that +read \verb{_targets.R} when they run, such as \code{\link[=tar_make]{tar_make()}}, \code{\link[=tar_outdated]{tar_outdated()}}, +and \code{\link[=tar_visnetwork]{tar_visnetwork()}}. To apply \code{version = "meta"} to functions that +do not read \verb{_targets.R}, such as \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}}, +set \code{resources} in \code{\link[=tar_option_set]{tar_option_set()}} in your local R session. +You can do this manually, or if you coded those options in \verb{_targets.R}, +you can manually run \verb{_targets.R} using \code{\link[=tar_load_globals]{tar_load_globals()}}. } \examples{ @@ -174,7 +179,6 @@ Other clean: \code{\link{tar_delete}()}, \code{\link{tar_invalidate}()}, \code{\link{tar_prune_list}()}, -\code{\link{tar_prune}()}, -\code{\link{tar_unversion}()} +\code{\link{tar_prune}()} } \concept{clean} diff --git a/man/tar_invalidate.Rd b/man/tar_invalidate.Rd index a2e819f7a..471bea689 100644 --- a/man/tar_invalidate.Rd +++ b/man/tar_invalidate.Rd @@ -87,7 +87,6 @@ Other clean: \code{\link{tar_delete}()}, \code{\link{tar_destroy}()}, \code{\link{tar_prune_list}()}, -\code{\link{tar_prune}()}, -\code{\link{tar_unversion}()} +\code{\link{tar_prune}()} } \concept{clean} diff --git a/man/tar_load.Rd b/man/tar_load.Rd index ca0c16965..85f538ccf 100644 --- a/man/tar_load.Rd +++ b/man/tar_load.Rd @@ -90,6 +90,35 @@ target factories in the \code{tarchetypes} package such as \code{tar_render()} and \code{tar_quarto()}. } +\section{Cloud target data versioning}{ + +Some buckets in Amazon S3 or Google Cloud Storage are "versioned", +which means they track historical versions of each data object. +If you use \code{targets} with cloud storage +(\url{https://books.ropensci.org/targets/cloud-storage.html}) +and versioning is turned on, then \code{targets} will record each +version of each target in its metadata. + +However, by default, +\code{targets} \emph{uses} only the latest version in the bucket. +You may instead want to +use the specific version of the target recorded in the local metadata +(for example, if you previously committed the metadata file +\verb{_targets/meta/meta} to version control, and now you want to roll +back the code and data together to an earlier point in time). +To do this, you will +need to modify the \code{resources} argument of \code{\link[=tar_target]{tar_target()}} and/or +\code{\link[=tar_option_set]{tar_option_set()}} via \code{\link[=tar_resources]{tar_resources()}}. In \code{\link[=tar_resources_aws]{tar_resources_aws()}} +or \code{\link[=tar_resources_gcp]{tar_resources_gcp()}}, set the \code{version} argument to \code{"meta"}. +Modifying your code this way in \verb{_targets.R} will control functions that +read \verb{_targets.R} when they run, such as \code{\link[=tar_make]{tar_make()}}, \code{\link[=tar_outdated]{tar_outdated()}}, +and \code{\link[=tar_visnetwork]{tar_visnetwork()}}. To apply \code{version = "meta"} to functions that +do not read \verb{_targets.R}, such as \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}}, +set \code{resources} in \code{\link[=tar_option_set]{tar_option_set()}} in your local R session. +You can do this manually, or if you coded those options in \verb{_targets.R}, +you can manually run \verb{_targets.R} using \code{\link[=tar_load_globals]{tar_load_globals()}}. +} + \examples{ if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. diff --git a/man/tar_load_raw.Rd b/man/tar_load_raw.Rd index 123d9b42e..2a7f15e98 100644 --- a/man/tar_load_raw.Rd +++ b/man/tar_load_raw.Rd @@ -86,6 +86,35 @@ target factories in the \code{tarchetypes} package such as \code{tar_render()} and \code{tar_quarto()}. } +\section{Cloud target data versioning}{ + +Some buckets in Amazon S3 or Google Cloud Storage are "versioned", +which means they track historical versions of each data object. +If you use \code{targets} with cloud storage +(\url{https://books.ropensci.org/targets/cloud-storage.html}) +and versioning is turned on, then \code{targets} will record each +version of each target in its metadata. + +However, by default, +\code{targets} \emph{uses} only the latest version in the bucket. +You may instead want to +use the specific version of the target recorded in the local metadata +(for example, if you previously committed the metadata file +\verb{_targets/meta/meta} to version control, and now you want to roll +back the code and data together to an earlier point in time). +To do this, you will +need to modify the \code{resources} argument of \code{\link[=tar_target]{tar_target()}} and/or +\code{\link[=tar_option_set]{tar_option_set()}} via \code{\link[=tar_resources]{tar_resources()}}. In \code{\link[=tar_resources_aws]{tar_resources_aws()}} +or \code{\link[=tar_resources_gcp]{tar_resources_gcp()}}, set the \code{version} argument to \code{"meta"}. +Modifying your code this way in \verb{_targets.R} will control functions that +read \verb{_targets.R} when they run, such as \code{\link[=tar_make]{tar_make()}}, \code{\link[=tar_outdated]{tar_outdated()}}, +and \code{\link[=tar_visnetwork]{tar_visnetwork()}}. To apply \code{version = "meta"} to functions that +do not read \verb{_targets.R}, such as \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}}, +set \code{resources} in \code{\link[=tar_option_set]{tar_option_set()}} in your local R session. +You can do this manually, or if you coded those options in \verb{_targets.R}, +you can manually run \verb{_targets.R} using \code{\link[=tar_load_globals]{tar_load_globals()}}. +} + \examples{ if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. diff --git a/man/tar_prune.Rd b/man/tar_prune.Rd index b3086e9d0..b0baa20b8 100644 --- a/man/tar_prune.Rd +++ b/man/tar_prune.Rd @@ -124,6 +124,35 @@ target factories in the \code{tarchetypes} package such as \code{tar_render()} and \code{tar_quarto()}. } +\section{Cloud target data versioning}{ + +Some buckets in Amazon S3 or Google Cloud Storage are "versioned", +which means they track historical versions of each data object. +If you use \code{targets} with cloud storage +(\url{https://books.ropensci.org/targets/cloud-storage.html}) +and versioning is turned on, then \code{targets} will record each +version of each target in its metadata. + +However, by default, +\code{targets} \emph{uses} only the latest version in the bucket. +You may instead want to +use the specific version of the target recorded in the local metadata +(for example, if you previously committed the metadata file +\verb{_targets/meta/meta} to version control, and now you want to roll +back the code and data together to an earlier point in time). +To do this, you will +need to modify the \code{resources} argument of \code{\link[=tar_target]{tar_target()}} and/or +\code{\link[=tar_option_set]{tar_option_set()}} via \code{\link[=tar_resources]{tar_resources()}}. In \code{\link[=tar_resources_aws]{tar_resources_aws()}} +or \code{\link[=tar_resources_gcp]{tar_resources_gcp()}}, set the \code{version} argument to \code{"meta"}. +Modifying your code this way in \verb{_targets.R} will control functions that +read \verb{_targets.R} when they run, such as \code{\link[=tar_make]{tar_make()}}, \code{\link[=tar_outdated]{tar_outdated()}}, +and \code{\link[=tar_visnetwork]{tar_visnetwork()}}. To apply \code{version = "meta"} to functions that +do not read \verb{_targets.R}, such as \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}}, +set \code{resources} in \code{\link[=tar_option_set]{tar_option_set()}} in your local R session. +You can do this manually, or if you coded those options in \verb{_targets.R}, +you can manually run \verb{_targets.R} using \code{\link[=tar_load_globals]{tar_load_globals()}}. +} + \examples{ if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. @@ -149,7 +178,6 @@ Other clean: \code{\link{tar_delete}()}, \code{\link{tar_destroy}()}, \code{\link{tar_invalidate}()}, -\code{\link{tar_prune_list}()}, -\code{\link{tar_unversion}()} +\code{\link{tar_prune_list}()} } \concept{clean} diff --git a/man/tar_prune_list.Rd b/man/tar_prune_list.Rd index 154dd0ab2..2b38fcbb4 100644 --- a/man/tar_prune_list.Rd +++ b/man/tar_prune_list.Rd @@ -98,7 +98,6 @@ Other clean: \code{\link{tar_delete}()}, \code{\link{tar_destroy}()}, \code{\link{tar_invalidate}()}, -\code{\link{tar_prune}()}, -\code{\link{tar_unversion}()} +\code{\link{tar_prune}()} } \concept{clean} diff --git a/man/tar_read.Rd b/man/tar_read.Rd index 151bd881e..039aa083a 100644 --- a/man/tar_read.Rd +++ b/man/tar_read.Rd @@ -42,6 +42,35 @@ Read a target's return value from its file in \verb{_targets/objects/}. For dynamic files (i.e. \code{format = "file"}) the paths are returned. } +\section{Cloud target data versioning}{ + +Some buckets in Amazon S3 or Google Cloud Storage are "versioned", +which means they track historical versions of each data object. +If you use \code{targets} with cloud storage +(\url{https://books.ropensci.org/targets/cloud-storage.html}) +and versioning is turned on, then \code{targets} will record each +version of each target in its metadata. + +However, by default, +\code{targets} \emph{uses} only the latest version in the bucket. +You may instead want to +use the specific version of the target recorded in the local metadata +(for example, if you previously committed the metadata file +\verb{_targets/meta/meta} to version control, and now you want to roll +back the code and data together to an earlier point in time). +To do this, you will +need to modify the \code{resources} argument of \code{\link[=tar_target]{tar_target()}} and/or +\code{\link[=tar_option_set]{tar_option_set()}} via \code{\link[=tar_resources]{tar_resources()}}. In \code{\link[=tar_resources_aws]{tar_resources_aws()}} +or \code{\link[=tar_resources_gcp]{tar_resources_gcp()}}, set the \code{version} argument to \code{"meta"}. +Modifying your code this way in \verb{_targets.R} will control functions that +read \verb{_targets.R} when they run, such as \code{\link[=tar_make]{tar_make()}}, \code{\link[=tar_outdated]{tar_outdated()}}, +and \code{\link[=tar_visnetwork]{tar_visnetwork()}}. To apply \code{version = "meta"} to functions that +do not read \verb{_targets.R}, such as \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}}, +set \code{resources} in \code{\link[=tar_option_set]{tar_option_set()}} in your local R session. +You can do this manually, or if you coded those options in \verb{_targets.R}, +you can manually run \verb{_targets.R} using \code{\link[=tar_load_globals]{tar_load_globals()}}. +} + \section{Storage access}{ Several functions like \code{tar_make()}, \code{tar_read()}, \code{tar_load()}, diff --git a/man/tar_read_raw.Rd b/man/tar_read_raw.Rd index 0e392299c..381db3749 100644 --- a/man/tar_read_raw.Rd +++ b/man/tar_read_raw.Rd @@ -68,6 +68,35 @@ target factories in the \code{tarchetypes} package such as \code{tar_render()} and \code{tar_quarto()}. } +\section{Cloud target data versioning}{ + +Some buckets in Amazon S3 or Google Cloud Storage are "versioned", +which means they track historical versions of each data object. +If you use \code{targets} with cloud storage +(\url{https://books.ropensci.org/targets/cloud-storage.html}) +and versioning is turned on, then \code{targets} will record each +version of each target in its metadata. + +However, by default, +\code{targets} \emph{uses} only the latest version in the bucket. +You may instead want to +use the specific version of the target recorded in the local metadata +(for example, if you previously committed the metadata file +\verb{_targets/meta/meta} to version control, and now you want to roll +back the code and data together to an earlier point in time). +To do this, you will +need to modify the \code{resources} argument of \code{\link[=tar_target]{tar_target()}} and/or +\code{\link[=tar_option_set]{tar_option_set()}} via \code{\link[=tar_resources]{tar_resources()}}. In \code{\link[=tar_resources_aws]{tar_resources_aws()}} +or \code{\link[=tar_resources_gcp]{tar_resources_gcp()}}, set the \code{version} argument to \code{"meta"}. +Modifying your code this way in \verb{_targets.R} will control functions that +read \verb{_targets.R} when they run, such as \code{\link[=tar_make]{tar_make()}}, \code{\link[=tar_outdated]{tar_outdated()}}, +and \code{\link[=tar_visnetwork]{tar_visnetwork()}}. To apply \code{version = "meta"} to functions that +do not read \verb{_targets.R}, such as \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}}, +set \code{resources} in \code{\link[=tar_option_set]{tar_option_set()}} in your local R session. +You can do this manually, or if you coded those options in \verb{_targets.R}, +you can manually run \verb{_targets.R} using \code{\link[=tar_load_globals]{tar_load_globals()}}. +} + \examples{ if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. diff --git a/man/tar_unversion.Rd b/man/tar_unversion.Rd deleted file mode 100644 index 27fd60502..000000000 --- a/man/tar_unversion.Rd +++ /dev/null @@ -1,63 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tar_unversion.R -\name{tar_unversion} -\alias{tar_unversion} -\title{Delete cloud object version IDs from local metadata.} -\usage{ -tar_unversion( - names = tidyselect::everything(), - store = targets::tar_config_get("store") -) -} -\arguments{ -\item{names}{Tidyselect expression to identify the targets to drop -version IDs.} - -\item{store}{Character of length 1, path to the -\code{targets} data store. Defaults to \code{tar_config_get("store")}, -which in turn defaults to \verb{_targets/}. -When you set this argument, the value of \code{tar_config_get("store")} -is temporarily changed for the current function call. -See \code{\link[=tar_config_get]{tar_config_get()}} and \code{\link[=tar_config_set]{tar_config_set()}} for details -about how to set the data store path persistently -for a project.} -} -\value{ -\code{NULL} (invisibly). -} -\description{ -Delete version IDs from local metadata. -} -\section{Deleting cloud target data}{ - -Some buckets in Amazon S3 or Google Cloud Storage are "versioned", -which means they track historical versions of each data object. -If you use \code{targets} with cloud storage -(\url{https://books.ropensci.org/targets/cloud-storage.html}) -and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. - -By default, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove -the current version ID of each target as recorded in the local -metadata. Extra steps are required to remove the \emph{latest} version -of each object, whatever that version may be: -\enumerate{ -\item Make sure your local copy of the metadata is current and -up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or -\code{\link[=tar_meta_sync]{tar_meta_sync()}}. -\item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of -your targets in the local metadata. -\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} -will delete all the versions of the affected targets. -} -} - -\seealso{ -Other clean: -\code{\link{tar_delete}()}, -\code{\link{tar_destroy}()}, -\code{\link{tar_invalidate}()}, -\code{\link{tar_prune_list}()}, -\code{\link{tar_prune}()} -} -\concept{clean} diff --git a/tests/testthat/test-tar_unversion.R b/tests/testthat/test-tar_unversion.R deleted file mode 100644 index a9195306f..000000000 --- a/tests/testthat/test-tar_unversion.R +++ /dev/null @@ -1,71 +0,0 @@ -tar_test("multiplication works", { - skip_cran() - lines <- c( - paste0( - "name|type|data|command|depend|seed|path|time|size|bytes|format|", - "repository|iteration|parent|children|seconds|warnings|error" - ), - "f|function|42ed6c6cf429ec42", - "resources2|object|c7fa586ec71716f7", - "resources|object|40cf4ff97d03671e", - paste0( - "z2|stem|164f6b22e6f7cb6a|0e2890e1b1d6be9c|ef46db3751d8e999|-2091466813", - "|bucket=targets-test-versioned*region=NULL*key=_targets/objects/z2*", - "endpoint=TlVMTA*version=4MeJDr09__xWul7SY4p40bB30UpV_sfT|t19648.", - "5414026703s||50|rds|aws|vector|||0.001||" - ), - paste0( - "x|stem|164f6b22e6f7cb6a|0e2890e1b1d6be9c|ef46db3751d8e999", - "|-1032428690||t19648.5414097547s|ded833868582137a|50", - "|rds|local|vector|||0||" - ), - paste0( - "y|stem|164f6b22e6f7cb6a|0e2890e1b1d6be9c|ef46db3751d8e999|-1963496355|", - "bucket=targets-test-unversioned*region=NULL*key=_targets/objects/y", - "*endpoint=TlVMTA*version=|t19648.5414098716s||50|rds|aws|vector|||0||" - ), - paste0( - "z|stem|164f6b22e6f7cb6a|0e2890e1b1d6be9c|ef46db3751d8e999|-499386612|", - "bucket=targets-test-unversioned*region=NULL*key=_targets/objects/z*", - "endpoint=TlVMTA*version=|t19648.5414154381s||50|rds|aws|vector|||0||" - ), - paste0( - "y2|stem|164f6b22e6f7cb6a|0e2890e1b1d6be9c|ef46db3751d8e999|-62670671|", - "bucket=targets-test-versioned*region=NULL*key=_targets/objects/y2*", - "endpoint=TlVMTA*version=P5eyZdO.JwR__kS5fdxWcBgJLH4BmFIP|", - "t19648.5414172763s||50|rds|aws|vector|||0.001||" - ) - ) - dir_create(path_meta_dir(path_store_default())) - writeLines(lines, path_meta(path_store_default())) - before <- tar_meta() - tar_unversion(names = tidyselect::any_of(c("x", "z2"))) - after <- tar_meta() - expect_equal(before$name, after$name) - names <- c( - c( - "f", - "resources", - "resources2", - "x", - "y", - "y2", - "z" - ) - ) - for (name in names) { - index <- which(before$name == name) - expect_equal(before$path[[index]], after$path[[index]]) - } - before <- before$path[[which(before$name == "z2")]] - after <- after$path[[which(after$name == "z2")]] - for (index in which(!grepl("^version=", before))) { - expect_equal(before[index], after[index]) - } - index <- which(grepl("^version=", before)) - expect_false(before[index] == after[index]) - expect_equal(after[index], "version=") - expect_gt(nchar(before[index]), nchar(after[index])) - expect_equal(nchar(store_aws_version(before)), 32L) - expect_null(store_aws_version(after)) -}) From d32d92797b45a9e38c32107766139d9412a97d6d Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 14:30:10 -0500 Subject: [PATCH 13/48] inventory class --- R/class_inventory.R | 39 +++++++++++++++++++++++++++ tests/testthat/test-class_inventory.R | 22 +++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 R/class_inventory.R create mode 100644 tests/testthat/test-class_inventory.R diff --git a/R/class_inventory.R b/R/class_inventory.R new file mode 100644 index 000000000..d2ccf67a1 --- /dev/null +++ b/R/class_inventory.R @@ -0,0 +1,39 @@ +inventory_init <- function() { + out <- inventory_new() + out$reset() + out +} + +inventory_new <- function() { + inventory_class$new() +} + +inventory_class <- R6::R6Class( + classname = "tar_inventory", + class = FALSE, + portable = FALSE, + cloneable = FALSE, + public = list( + hashes = NULL, + reset = function() { + self$hashes <- new.env(parent = emptyenv()) + }, + list = function() { + names(self$hashes) + }, + hash = function(store) { + store_validate(store) + self$update(store) + self$hashes[[store$file$path]] + }, + update = function(store) { + store_validate(store) + self$hashes[[store$file$path]] <- store$file$hash + invisible() + }, + validate = function() { + tar_assert_envir(self$hashes) + invisible() + } + ) +) diff --git a/tests/testthat/test-class_inventory.R b/tests/testthat/test-class_inventory.R new file mode 100644 index 000000000..f4c431f56 --- /dev/null +++ b/tests/testthat/test-class_inventory.R @@ -0,0 +1,22 @@ +tar_test("inventory class is valid", { + expect_silent(inventory_init()$validate()) +}) + +tar_test("inventory class abstract class usage", { + x <- inventory_init() + store <- store_init() + store$file <- file_init(path = tempfile()) + writeLines("x", store$file$path) + file_ensure_hash(store$file) + expect_true(nzchar(store$file$hash)) + expect_equal(x$list(), character(0L)) + out <- x$hash(store) + out2 <- x$hash(store) + expect_equal(out, out2) + expect_equal(out, store$file$hash) + expect_equal(x$list(), store$file$path) + expect_silent(x$validate()) + x$reset() + expect_equal(x$list(), character(0L)) + expect_silent(x$validate()) +}) From cfee2e3fc135a3b409226239c949b04553c0fd2f Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 14:57:28 -0500 Subject: [PATCH 14/48] cache more nuanced inventory --- R/class_inventory.R | 51 ++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/R/class_inventory.R b/R/class_inventory.R index d2ccf67a1..f5352bb60 100644 --- a/R/class_inventory.R +++ b/R/class_inventory.R @@ -1,3 +1,6 @@ +# This is an abstract inventory. Methods cache_key() and +# cache_prefix() are for testing only. Only the subclasses +# have serious versions of these methods. inventory_init <- function() { out <- inventory_new() out$reset() @@ -14,25 +17,45 @@ inventory_class <- R6::R6Class( portable = FALSE, cloneable = FALSE, public = list( - hashes = NULL, - reset = function() { - self$hashes <- new.env(parent = emptyenv()) + cache = NULL, + cache_key = function(key, bucket, store) { + name <- self$name(key = key, bucket = bucket) + self$cache[[name]] <- paste(store$file$hash, "key") }, - list = function() { - names(self$hashes) + cache_prefix = function(key, bucket, store) { + name <- self$name(key = key, bucket = bucket) + self$cache[[name]] <- paste(store$file$hash, "prefix") }, - hash = function(store) { - store_validate(store) - self$update(store) - self$hashes[[store$file$path]] + name = function(key, bucket) { + paste(bucket, key, sep = "|") + } + hash = function(key, bucket, version, store) { + name <- self$name(key = key, bucket = bucket) + if (!exists(x = name, envir = self$cache)) { + if (is.null(version)) { + self$cache_prefix( + key = key, + bucket = bucket, + store = store + ) + } else { + self$cache_key( + key = key, + bucket = bucket, + store = store + ) + } + } + self$cache[[name]] }, - update = function(store) { - store_validate(store) - self$hashes[[store$file$path]] <- store$file$hash - invisible() + reset = function() { + self$cache <- new.env(parent = emptyenv()) + }, + list = function() { + names(self$cache) }, validate = function() { - tar_assert_envir(self$hashes) + tar_assert_envir(self$cache) invisible() } ) From fce1c78c9c58e24c3468ca187b305049a00723ea Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 14:57:51 -0500 Subject: [PATCH 15/48] reorder methods --- R/class_inventory.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/class_inventory.R b/R/class_inventory.R index f5352bb60..91282869d 100644 --- a/R/class_inventory.R +++ b/R/class_inventory.R @@ -28,7 +28,13 @@ inventory_class <- R6::R6Class( }, name = function(key, bucket) { paste(bucket, key, sep = "|") - } + }, + list = function() { + names(self$cache) + }, + reset = function() { + self$cache <- new.env(parent = emptyenv()) + }, hash = function(key, bucket, version, store) { name <- self$name(key = key, bucket = bucket) if (!exists(x = name, envir = self$cache)) { @@ -48,12 +54,6 @@ inventory_class <- R6::R6Class( } self$cache[[name]] }, - reset = function() { - self$cache <- new.env(parent = emptyenv()) - }, - list = function() { - names(self$cache) - }, validate = function() { tar_assert_envir(self$cache) invisible() From 10b8f29ab99ca3a3a871e6bf5803dbed37d0210f Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 14:57:59 -0500 Subject: [PATCH 16/48] rename a method --- R/class_inventory.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/class_inventory.R b/R/class_inventory.R index 91282869d..3e066b97b 100644 --- a/R/class_inventory.R +++ b/R/class_inventory.R @@ -35,7 +35,7 @@ inventory_class <- R6::R6Class( reset = function() { self$cache <- new.env(parent = emptyenv()) }, - hash = function(key, bucket, version, store) { + get = function(key, bucket, version, store) { name <- self$name(key = key, bucket = bucket) if (!exists(x = name, envir = self$cache)) { if (is.null(version)) { From d5fe53781340fa732b173867add81b6d5b3cd30d Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 15:04:22 -0500 Subject: [PATCH 17/48] Update abstract inventory class test --- tests/testthat/test-class_inventory.R | 36 +++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-class_inventory.R b/tests/testthat/test-class_inventory.R index f4c431f56..4c8a7668f 100644 --- a/tests/testthat/test-class_inventory.R +++ b/tests/testthat/test-class_inventory.R @@ -2,7 +2,7 @@ tar_test("inventory class is valid", { expect_silent(inventory_init()$validate()) }) -tar_test("inventory class abstract class usage", { +tar_test("inventory abstract class basic methods", { x <- inventory_init() store <- store_init() store$file <- file_init(path = tempfile()) @@ -10,11 +10,37 @@ tar_test("inventory class abstract class usage", { file_ensure_hash(store$file) expect_true(nzchar(store$file$hash)) expect_equal(x$list(), character(0L)) - out <- x$hash(store) - out2 <- x$hash(store) + expect_equal(x$name(key = "x/y", bucket = "z"), "z|x/y") +}) + +tar_test("inventory abstract class null versions", { + x <- inventory_init() + store <- store_init() + store$file <- file_init(path = tempfile()) + writeLines("x", store$file$path) + file_ensure_hash(store$file) + out <- x$get(key = "x/y", bucket = "z", version = NULL, store = store) + out2 <- x$get(key = "x/y", bucket = "z", version = "abc123", store = store) + expect_equal(out, out2) + expect_equal(out, paste(store$file$hash, "prefix")) + expect_equal(x$list(), "z|x/y") + expect_silent(x$validate()) + x$reset() + expect_equal(x$list(), character(0L)) + expect_silent(x$validate()) +}) + +tar_test("inventory abstract class null versions", { + x <- inventory_init() + store <- store_init() + store$file <- file_init(path = tempfile()) + writeLines("x", store$file$path) + file_ensure_hash(store$file) + out <- x$get(key = "x/y", bucket = "z", version = "abc123", store = store) + out2 <- x$get(key = "x/y", bucket = "z", version = NULL, store = store) expect_equal(out, out2) - expect_equal(out, store$file$hash) - expect_equal(x$list(), store$file$path) + expect_equal(out, paste(store$file$hash, "key")) + expect_equal(x$list(), "z|x/y") expect_silent(x$validate()) x$reset() expect_equal(x$list(), character(0L)) From d6d532abcf9ee618d8de8d0495ffbc1db0ae9aa7 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 15:41:26 -0500 Subject: [PATCH 18/48] Sketch AWS inventory class --- R/utils_aws.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/utils_aws.R b/R/utils_aws.R index 65fd4ee14..9acaed027 100644 --- a/R/utils_aws.R +++ b/R/utils_aws.R @@ -68,7 +68,6 @@ aws_s3_list_etags <- function( verbose = TRUE, region = NULL, endpoint = NULL, - version = NULL, args = list(), max_tries = NULL, seconds_timeout = NULL, @@ -85,6 +84,8 @@ aws_s3_list_etags <- function( ) args$Bucket <- bucket args$Prefix <- prefix + page_size <- page_size %|||% 1000L + verbose <- verbose %|||% TRUE args <- supported_args(fun = client$list_objects_v2, args = args) if (verbose) { tar_message_run( From 0749a82bb18d119fab162e08e6c89d94f5b74475 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 15:42:33 -0500 Subject: [PATCH 19/48] add aws inventory class (sketch) --- R/class_inventory_aws.R | 63 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 R/class_inventory_aws.R diff --git a/R/class_inventory_aws.R b/R/class_inventory_aws.R new file mode 100644 index 000000000..484db102c --- /dev/null +++ b/R/class_inventory_aws.R @@ -0,0 +1,63 @@ +# This is an abstract inventory. Methods cache_key() and +# cache_prefix() are for testing only. Only the subclasses +# have serious versions of these methods. +inventory_aws_init <- function() { + out <- inventory_new() + out$reset() + out +} + +inventory_aws_new <- function() { + inventory_aws_class$new() +} + +inventory_aws_class <- R6::R6Class( + classname = "tar_inventory_aws", + inherit = inventory_class, + class = FALSE, + portable = FALSE, + cloneable = FALSE, + public = list( + cache_key = function(key, bucket, store) { + path <- store$file$path + aws <- store$resources$aws + head <- aws_s3_head( + key = store_aws_key(path), + bucket = store_aws_bucket(path), + region = store_aws_region(path), + endpoint = store_aws_endpoint(path), + version = store_aws_version_use(store, path), + args = aws$args, + max_tries = aws$max_tries, + seconds_timeout = aws$seconds_timeout, + close_connection = aws$close_connection, + s3_force_path_style = aws$s3_force_path_style + ) + digest_chr64(head$ETag) + name <- self$name(key = key, bucket = bucket) + self$cache[[name]] <- digest_chr64(head$ETag) + }, + cache_prefix = function(key, bucket, store) { + path <- store$file$path + bucket <- store_aws_bucket(path) + aws <- store$resources$aws + results <- aws_s3_list_etags( + prefix = dirname(store_aws_key(path)), + bucket = bucket, + page_size = aws$page_size, + verbose = aws$verbose, + region = store_aws_region(path), + endpoint = store_aws_endpoint(path), + args = aws$args, + max_tries = aws$max_tries, + seconds_timeout = aws$seconds_timeout, + close_connection = aws$close_connection, + s3_force_path_style = aws$s3_force_path_style + ) + for (key in names(results)) { + name <- self$name(key = key, bucket = bucket) + self$cache[[name]] <- digest_chr64(results[[key]]) + } + } + ) +) From f95eb3f848b56c352680a6526e7a454bfe573786 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 15:44:12 -0500 Subject: [PATCH 20/48] Only get version once --- R/class_inventory.R | 3 ++- R/class_inventory_aws.R | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R/class_inventory.R b/R/class_inventory.R index 3e066b97b..b375818ab 100644 --- a/R/class_inventory.R +++ b/R/class_inventory.R @@ -18,7 +18,7 @@ inventory_class <- R6::R6Class( cloneable = FALSE, public = list( cache = NULL, - cache_key = function(key, bucket, store) { + cache_key = function(key, bucket, version, store) { name <- self$name(key = key, bucket = bucket) self$cache[[name]] <- paste(store$file$hash, "key") }, @@ -48,6 +48,7 @@ inventory_class <- R6::R6Class( self$cache_key( key = key, bucket = bucket, + version = version, store = store ) } diff --git a/R/class_inventory_aws.R b/R/class_inventory_aws.R index 484db102c..4f1767704 100644 --- a/R/class_inventory_aws.R +++ b/R/class_inventory_aws.R @@ -18,7 +18,7 @@ inventory_aws_class <- R6::R6Class( portable = FALSE, cloneable = FALSE, public = list( - cache_key = function(key, bucket, store) { + cache_key = function(key, bucket, version, store) { path <- store$file$path aws <- store$resources$aws head <- aws_s3_head( @@ -26,7 +26,7 @@ inventory_aws_class <- R6::R6Class( bucket = store_aws_bucket(path), region = store_aws_region(path), endpoint = store_aws_endpoint(path), - version = store_aws_version_use(store, path), + version = version, args = aws$args, max_tries = aws$max_tries, seconds_timeout = aws$seconds_timeout, From f3d0e91c94745aa7cbf96fab6ff167abc9206b68 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 16:25:52 -0500 Subject: [PATCH 21/48] test aws inventory class --- R/class_aws.R | 3 +- R/class_gcp.R | 3 +- R/class_inventory_aws.R | 9 +- tests/aws/test-class_inventory_aws.R | 161 +++++++++++++++++++++++++++ tests/testthat/test-class_aws.R | 8 +- tests/testthat/test-class_gcp.R | 8 +- 6 files changed, 171 insertions(+), 21 deletions(-) create mode 100644 tests/aws/test-class_inventory_aws.R diff --git a/R/class_aws.R b/R/class_aws.R index a398b4e19..2c761f1c6 100644 --- a/R/class_aws.R +++ b/R/class_aws.R @@ -103,8 +103,7 @@ store_aws_version <- function(path) { store_aws_version_use <- function(store, path) { if_any( - is.null(store$resources$aws$version) || - store$resources$aws$version == "latest", + store$resources$aws$version == "meta", store_aws_version(path), NULL ) diff --git a/R/class_gcp.R b/R/class_gcp.R index fe9eb8f43..b78983a26 100644 --- a/R/class_gcp.R +++ b/R/class_gcp.R @@ -59,8 +59,7 @@ store_gcp_version <- function(path) { store_gcp_version_use <- function(store, path) { if_any( - is.null(store$resources$gcp$version) || - store$resources$gcp$version == "latest", + store$resources$gcp$version == "meta", store_gcp_version(path), NULL ) diff --git a/R/class_inventory_aws.R b/R/class_inventory_aws.R index 4f1767704..b23536e09 100644 --- a/R/class_inventory_aws.R +++ b/R/class_inventory_aws.R @@ -2,7 +2,7 @@ # cache_prefix() are for testing only. Only the subclasses # have serious versions of these methods. inventory_aws_init <- function() { - out <- inventory_new() + out <- inventory_aws_new() out$reset() out } @@ -22,8 +22,8 @@ inventory_aws_class <- R6::R6Class( path <- store$file$path aws <- store$resources$aws head <- aws_s3_head( - key = store_aws_key(path), - bucket = store_aws_bucket(path), + key = key, + bucket = bucket, region = store_aws_region(path), endpoint = store_aws_endpoint(path), version = version, @@ -39,10 +39,9 @@ inventory_aws_class <- R6::R6Class( }, cache_prefix = function(key, bucket, store) { path <- store$file$path - bucket <- store_aws_bucket(path) aws <- store$resources$aws results <- aws_s3_list_etags( - prefix = dirname(store_aws_key(path)), + prefix = dirname(key), bucket = bucket, page_size = aws$page_size, verbose = aws$verbose, diff --git a/tests/aws/test-class_inventory_aws.R b/tests/aws/test-class_inventory_aws.R new file mode 100644 index 000000000..d4c46f27b --- /dev/null +++ b/tests/aws/test-class_inventory_aws.R @@ -0,0 +1,161 @@ +tar_test("inventory_aws class with versioning from the buckets", { + bucket1 <- random_bucket_name() + bucket2 <- random_bucket_name() + client <- paws.storage::s3() + client$create_bucket(Bucket = bucket1) + client$create_bucket(Bucket = bucket2) + client$put_bucket_versioning( + Bucket = bucket2, + VersioningConfiguration = list(Status = "Enabled") + ) + on.exit({ + aws_s3_delete_bucket(bucket1) + aws_s3_delete_bucket(bucket2) + }) + head1 <- list() + head2 <- list() + prefix <- path_objects_dir(path_store_default()) + for (key in file.path(prefix, c("w", "x", "y", "z"))) { + head1[[key]] <- client$put_object( + Body = charToRaw(key), + Key = key, + Bucket = bucket1 + ) + head2[[key]] <- client$put_object( + Body = charToRaw(key), + Key = key, + Bucket = bucket2 + ) + } + inventory1 <- inventory_aws_init() + inventory2 <- inventory_aws_init() + resources <- tar_resources( + aws = tar_resources_aws( + bucket = bucket1, + prefix = path_store_default() + ) + ) + store <- store_init(repository = "aws", resources = resources) + store$file$path <- store_produce_aws_path( + store = store, + name = "x", + path_store = path_store_default() + ) + key <- store_aws_key(store$file$path) + for (index in seq_len(2L)) { + out1 <- inventory1$get( + key = key, + bucket = bucket1, + version = head1[[key]]$VersionId %||% NULL, + store = store + ) + out2 <- inventory2$get( + key = key, + bucket = bucket2, + version = head2[[key]]$VersionId %||% NULL, + store = store + ) + expect_equal(out1, out2) + expect_false(anyNA(out1)) + expect_true(is.character(out1)) + expect_equal(length(out1), 1L) + expect_true(nzchar(out1)) + expect_equal(length(as.list(inventory1$cache)), 4L) + expect_equal(length(as.list(inventory2$cache)), 1L) + } +}) + +tar_test("inventory_aws class with versioning from resource settings", { + bucket1 <- random_bucket_name() + bucket2 <- random_bucket_name() + client <- paws.storage::s3() + client$create_bucket(Bucket = bucket1) + client$create_bucket(Bucket = bucket2) + client$put_bucket_versioning( + Bucket = bucket1, + VersioningConfiguration = list(Status = "Enabled") + ) + client$put_bucket_versioning( + Bucket = bucket2, + VersioningConfiguration = list(Status = "Enabled") + ) + on.exit({ + aws_s3_delete_bucket(bucket1) + aws_s3_delete_bucket(bucket2) + }) + head1 <- list() + head2 <- list() + prefix <- path_objects_dir(path_store_default()) + for (key in file.path(prefix, c("w", "x", "y", "z"))) { + head1[[key]] <- client$put_object( + Body = charToRaw(key), + Key = key, + Bucket = bucket1 + ) + head2[[key]] <- client$put_object( + Body = charToRaw(key), + Key = key, + Bucket = bucket2 + ) + expect_true(nzchar(head1[[key]]$VersionId)) + expect_true(nzchar(head2[[key]]$VersionId)) + } + inventory1 <- inventory_aws_init() + inventory2 <- inventory_aws_init() + resources1 <- tar_resources( + aws = tar_resources_aws( + bucket = bucket1, + prefix = path_store_default(), + version = "latest" + ) + ) + resources2 <- tar_resources( + aws = tar_resources_aws( + bucket = bucket2, + prefix = path_store_default(), + version = "meta" + ) + ) + store1 <- store_init(repository = "aws", resources = resources1) + store2 <- store_init(repository = "aws", resources = resources2) + store1$file$path <- store_produce_aws_path( + store = store1, + name = "x", + path_store = path_store_default() + ) + store2$file$path <- store_produce_aws_path( + store = store2, + name = "x", + path_store = path_store_default() + ) + key <- store_aws_key(store1$file$path) + store1$file$path <- c( + store1$file$path, + paste0("version=", head1[[key]]$VersionId) + ) + store2$file$path <- c( + store2$file$path, + paste0("version=", head2[[key]]$VersionId) + ) + for (index in seq_len(2L)) { + out1 <- inventory1$get( + key = key, + bucket = bucket1, + version = store_aws_version_use(store1, store1$file$path), + store = store1 + ) + out2 <- inventory2$get( + key = key, + bucket = bucket2, + version = store_aws_version_use(store2, store2$file$path), + store = store2 + ) + expect_equal(out1, out2) + expect_false(anyNA(out1)) + expect_true(is.character(out1)) + expect_equal(length(out1), 1L) + expect_true(nzchar(out1)) + expect_equal(length(as.list(inventory1$cache)), 4L) + expect_equal(length(as.list(inventory2$cache)), 1L) + } +}) diff --git a/tests/testthat/test-class_aws.R b/tests/testthat/test-class_aws.R index 9ff6c35fc..758b09877 100644 --- a/tests/testthat/test-class_aws.R +++ b/tests/testthat/test-class_aws.R @@ -67,10 +67,6 @@ tar_test("store_aws_version()", { }) tar_test("store_aws_version_use()", { - target <- tar_target(x, 1, repository = "aws") - path <- c("bucket=b", "version=number") - expect_equal(store_aws_version_use(target$store, path), "number") - expect_null(store_aws_version_use(target$store, letters)) resources <- tar_resources( aws = tar_resources_aws( bucket = "x", @@ -79,7 +75,7 @@ tar_test("store_aws_version_use()", { ) ) target <- tar_target(x, 1, repository = "aws", resources = resources) - expect_equal(store_aws_version_use(target$store, path), "number") + expect_null(store_aws_version_use(target$store, path)) expect_null(store_aws_version_use(target$store, letters)) resources <- tar_resources( aws = tar_resources_aws( @@ -89,7 +85,7 @@ tar_test("store_aws_version_use()", { ) ) target <- tar_target(x, 1, repository = "aws", resources = resources) - expect_null(store_aws_version_use(target$store, path)) + expect_equal(store_aws_version_use(target$store, path), "number") expect_null(store_aws_version_use(target$store, letters)) }) diff --git a/tests/testthat/test-class_gcp.R b/tests/testthat/test-class_gcp.R index 8211083ac..8016d7f17 100644 --- a/tests/testthat/test-class_gcp.R +++ b/tests/testthat/test-class_gcp.R @@ -33,10 +33,6 @@ tar_test("store_gcp_version()", { }) tar_test("store_gcp_version_use()", { - target <- tar_target(x, 1, repository = "gcp") - path <- c("bucket=b", "version=number") - expect_equal(store_gcp_version_use(target$store, path), "number") - expect_null(store_gcp_version_use(target$store, letters)) resources <- tar_resources( gcp = tar_resources_gcp( bucket = "x", @@ -45,7 +41,7 @@ tar_test("store_gcp_version_use()", { ) ) target <- tar_target(x, 1, repository = "gcp", resources = resources) - expect_equal(store_gcp_version_use(target$store, path), "number") + expect_null(store_gcp_version_use(target$store, path)) expect_null(store_gcp_version_use(target$store, letters)) resources <- tar_resources( gcp = tar_resources_gcp( @@ -55,7 +51,7 @@ tar_test("store_gcp_version_use()", { ) ) target <- tar_target(x, 1, repository = "gcp", resources = resources) - expect_null(store_gcp_version_use(target$store, path)) + expect_equal(store_gcp_version_use(target$store, path), "number") expect_null(store_gcp_version_use(target$store, letters)) }) From 0e0ad09a2320c6050fd161ecfb78e936f653a29b Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 16:27:07 -0500 Subject: [PATCH 22/48] coverage --- R/class_inventory_aws.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/class_inventory_aws.R b/R/class_inventory_aws.R index b23536e09..515a5dc72 100644 --- a/R/class_inventory_aws.R +++ b/R/class_inventory_aws.R @@ -1,6 +1,5 @@ -# This is an abstract inventory. Methods cache_key() and -# cache_prefix() are for testing only. Only the subclasses -# have serious versions of these methods. +# Covered in tests/aws/test-class_inventory_aws.R. +# nocov start inventory_aws_init <- function() { out <- inventory_aws_new() out$reset() @@ -60,3 +59,4 @@ inventory_aws_class <- R6::R6Class( } ) ) +# nocov end From 6bc7bf1adc23edd152b495fc40ab390644e162a2 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Fri, 10 Nov 2023 16:27:47 -0500 Subject: [PATCH 23/48] Fix tests --- tests/testthat/test-class_aws.R | 1 + tests/testthat/test-class_gcp.R | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/testthat/test-class_aws.R b/tests/testthat/test-class_aws.R index 758b09877..0593466c8 100644 --- a/tests/testthat/test-class_aws.R +++ b/tests/testthat/test-class_aws.R @@ -67,6 +67,7 @@ tar_test("store_aws_version()", { }) tar_test("store_aws_version_use()", { + path <- c("bucket=b", "version=number") resources <- tar_resources( aws = tar_resources_aws( bucket = "x", diff --git a/tests/testthat/test-class_gcp.R b/tests/testthat/test-class_gcp.R index 8016d7f17..a02c19156 100644 --- a/tests/testthat/test-class_gcp.R +++ b/tests/testthat/test-class_gcp.R @@ -33,6 +33,7 @@ tar_test("store_gcp_version()", { }) tar_test("store_gcp_version_use()", { + path <- c("bucket=b", "version=number") resources <- tar_resources( gcp = tar_resources_gcp( bucket = "x", From f1a2acd607c0fe449996c54675534c7deee5deff Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 07:51:56 -0500 Subject: [PATCH 24/48] revert version setting --- R/class_inventory.R | 2 +- R/class_resources_aws.R | 41 ++++++++++++------------- R/class_resources_gcp.R | 15 +++------ R/tar_read.R | 2 +- R/tar_resources_aws.R | 12 -------- R/tar_resources_gcp.R | 3 -- man/tar_resources_aws.Rd | 11 ------- man/tar_resources_gcp.Rd | 11 ------- tests/aws/test-class_aws_qs.R | 18 +++++++++-- tests/aws/test-delete.R | 6 +++- tests/testthat/test-tar_resources_aws.R | 39 ----------------------- tests/testthat/test-tar_resources_gcp.R | 39 ----------------------- 12 files changed, 47 insertions(+), 152 deletions(-) diff --git a/R/class_inventory.R b/R/class_inventory.R index b375818ab..3e19dda5d 100644 --- a/R/class_inventory.R +++ b/R/class_inventory.R @@ -52,7 +52,7 @@ inventory_class <- R6::R6Class( store = store ) } - } + } self$cache[[name]] }, validate = function() { diff --git a/R/class_resources_aws.R b/R/class_resources_aws.R index d9149d625..88a407c6b 100644 --- a/R/class_resources_aws.R +++ b/R/class_resources_aws.R @@ -4,7 +4,6 @@ resources_aws_init <- function( region = NULL, endpoint = NULL, s3_force_path_style = NULL, - version = "latest", part_size = 5 * (2 ^ 20), page_size = 1000L, max_tries = NULL, @@ -19,7 +18,6 @@ resources_aws_init <- function( region = region, endpoint = endpoint, s3_force_path_style = s3_force_path_style, - version = version, part_size = part_size, page_size = page_size, max_tries = max_tries, @@ -36,7 +34,6 @@ resources_aws_new <- function( region = NULL, endpoint = NULL, s3_force_path_style = NULL, - version = NULL, part_size = NULL, page_size = NULL, max_tries = NULL, @@ -50,7 +47,6 @@ resources_aws_new <- function( force(region) force(endpoint) force(s3_force_path_style) - force(version) force(part_size) force(page_size) force(max_tries) @@ -63,29 +59,32 @@ resources_aws_new <- function( #' @export resources_validate.tar_resources_aws <- function(resources) { - for (field in c("bucket", "prefix", "version")) { - tar_assert_scalar(resources[[field]]) - tar_assert_chr(resources[[field]]) - tar_assert_none_na(resources[[field]]) - tar_assert_nzchar(resources[[field]]) + for (field in c("bucket", "prefix")) { + msg <- paste("invalid AWS S3", field) + tar_assert_scalar(resources[[field]], msg = msg) + tar_assert_chr(resources[[field]], msg = msg) + tar_assert_none_na(resources[[field]], msg = msg) + tar_assert_nzchar(resources[[field]], msg = msg) } for (field in c("region", "endpoint")) { - tar_assert_scalar(resources[[field]] %|||% "x") - tar_assert_chr(resources[[field]] %|||% "x") - tar_assert_none_na(resources[[field]] %|||% "x") + msg <- paste("invalid AWS S3", field) + tar_assert_scalar(resources[[field]] %|||% "x", msg = msg) + tar_assert_chr(resources[[field]] %|||% "x", msg = msg) + tar_assert_none_na(resources[[field]] %|||% "x", msg = msg) } for (field in c("part_size", "page_size", "max_tries", "seconds_timeout")) { - tar_assert_scalar(resources[[field]] %|||% 1L) - tar_assert_dbl(resources[[field]] %|||% 1L) - tar_assert_none_na(resources[[field]] %|||% 1L) - tar_assert_ge(resources[[field]] %|||% 1L, 0L) + msg <- paste("invalid AWS S3", field) + tar_assert_scalar(resources[[field]] %|||% 1L, msg = msg) + tar_assert_dbl(resources[[field]] %|||% 1L, msg = msg) + tar_assert_none_na(resources[[field]] %|||% 1L, msg = msg) + tar_assert_ge(resources[[field]] %|||% 1L, 0L, msg = msg) } for (field in c("close_connection", "s3_force_path_style", "verbose")) { - tar_assert_scalar(resources[[field]] %|||% TRUE) - tar_assert_lgl(resources[[field]] %|||% TRUE) - tar_assert_none_na(resources[[field]] %|||% TRUE) + msg <- paste("invalid AWS S3", field) + tar_assert_scalar(resources[[field]] %|||% TRUE, msg = msg) + tar_assert_lgl(resources[[field]] %|||% TRUE, msg = msg) + tar_assert_none_na(resources[[field]] %|||% TRUE, msg = msg) } - tar_assert_in(resources$version, c("latest", "meta")) resources_aws_validate_args(resources$args) } @@ -100,7 +99,7 @@ resources_aws_validate_args <- function(args) { setdiff(names(formals(tar_resources_aws)), "..."), "bucket", "Bucket", "key", "Key", "prefix", "region", "part_size", "endpoint", - "version", "VersionId", "body", "Body", + "VersionId", "body", "Body", "metadata", "Metadata", "UploadId", "MultipartUpload", "PartNumber" ) diff --git a/R/class_resources_gcp.R b/R/class_resources_gcp.R index 8a3d19a2c..bafe7f303 100644 --- a/R/class_resources_gcp.R +++ b/R/class_resources_gcp.R @@ -1,7 +1,6 @@ resources_gcp_init <- function( bucket = NULL, prefix = tar_path_objects_dir_cloud(), - version = "latest", predefined_acl = "private", max_tries = 5L, verbose = FALSE @@ -9,7 +8,6 @@ resources_gcp_init <- function( resources_gcp_new( bucket = bucket, prefix = prefix, - version = version, predefined_acl = predefined_acl, max_tries = max_tries, verbose = verbose @@ -19,14 +17,12 @@ resources_gcp_init <- function( resources_gcp_new <- function( bucket = NULL, prefix = NULL, - version = NULL, predefined_acl = NULL, max_tries = NULL, verbose = NULL ) { force(bucket) force(prefix) - force(version) force(predefined_acl) force(max_tries) force(verbose) @@ -36,18 +32,17 @@ resources_gcp_new <- function( #' @export resources_validate.tar_resources_gcp <- function(resources) { for (field in c("bucket", "prefix", "predefined_acl")) { - message <- paste("GCP resources require a valid", field) - tar_assert_scalar(resources[[field]], msg = message) - tar_assert_chr(resources[[field]], msg = message) - tar_assert_none_na(resources[[field]], msg = message) - tar_assert_nzchar(resources[[field]], msg = message) + msg <- paste("invalid GCP GCS", field) + tar_assert_scalar(resources[[field]], msg = msg) + tar_assert_chr(resources[[field]], msg = msg) + tar_assert_none_na(resources[[field]], msg = msg) + tar_assert_nzchar(resources[[field]], msg = msg) } tar_assert_scalar(resources$max_tries %|||% 1L) tar_assert_dbl(resources$max_tries %|||% 1L) tar_assert_none_na(resources$max_tries %|||% 1L) tar_assert_ge(resources$max_tries %|||% 1L, 0L) tar_assert_scalar(resources$verbose) - tar_assert_in(resources$version, c("latest", "meta")) tar_assert_lgl(resources$verbose) } diff --git a/R/tar_read.R b/R/tar_read.R index 9a4caae17..5778cd124 100644 --- a/R/tar_read.R +++ b/R/tar_read.R @@ -15,7 +15,7 @@ #' #' However, by default, #' `targets` *uses* only the latest version in the bucket. -#' You may instead want to +#' You may instead want to #' use the specific version of the target recorded in the local metadata #' (for example, if you previously committed the metadata file #' `_targets/meta/meta` to version control, and now you want to roll diff --git a/R/tar_resources_aws.R b/R/tar_resources_aws.R index 4cbbee8b0..05f4e3a01 100644 --- a/R/tar_resources_aws.R +++ b/R/tar_resources_aws.R @@ -42,15 +42,6 @@ #' incompatibility. #' @param s3_force_path_style Logical of length 1, whether to use path-style #' addressing for S3 requests. -#' @param version Character of length 1: `"latest"` to read the latest -#' version of the target in the bucket (default), or `"meta"` to -#' read the version recorded in the metadata. This affects how `targets` -#' downloads target data and makes sure it is up to date. `"latest"` -#' is sufficient for most cases. Use `"meta"` if you are reverting to -#' a historical copy of the metadata (`_targets/meta/meta`) and wish to use -#' `targets` to use the corresponding old copies of versioned data in a -#' versioned bucket. The `version` argument is only applicable if -#' the bucket has versioning enabled. #' @param part_size Positive numeric of length 1, number of bytes #' for each part of a multipart upload. (Except the last part, #' which is the remainder.) In a multipart upload, each part @@ -107,7 +98,6 @@ tar_resources_aws <- function( s3_force_path_style = targets::tar_option_get( "resources" )$aws$s3_force_path_style, - version = targets::tar_option_get("resources")$aws$version, part_size = targets::tar_option_get("resources")$aws$part_size, page_size = targets::tar_option_get("resources")$aws$page_size, max_tries = targets::tar_option_get("resources")$aws$max_tries, @@ -121,7 +111,6 @@ tar_resources_aws <- function( prefix <- path_store_default() } prefix <- prefix %|||% targets::tar_path_objects_dir_cloud() - version <- version %|||% "latest" part_size <- part_size %|||% (5 * (2 ^ 20)) page_size <- page_size %|||% 1000L verbose <- verbose %|||% TRUE @@ -136,7 +125,6 @@ tar_resources_aws <- function( region = region, endpoint = endpoint, s3_force_path_style = s3_force_path_style, - version = version, part_size = part_size, page_size = page_size, max_tries = max_tries, diff --git a/R/tar_resources_gcp.R b/R/tar_resources_gcp.R index 8a98fc8e6..8dce358bf 100644 --- a/R/tar_resources_gcp.R +++ b/R/tar_resources_gcp.R @@ -36,7 +36,6 @@ tar_resources_gcp <- function( bucket = targets::tar_option_get("resources")$gcp$bucket, prefix = targets::tar_option_get("resources")$gcp$prefix, - version = targets::tar_option_get("resources")$gcp$version, predefined_acl = targets::tar_option_get("resources")$gcp$predefined_acl, max_tries = targets::tar_option_get("resources")$gcp$max_tries, verbose = targets::tar_option_get("resources")$gcp$verbose @@ -45,13 +44,11 @@ tar_resources_gcp <- function( tar_warn_prefix() prefix <- path_store_default() } - version <- version %|||% "latest" predefined_acl <- predefined_acl %|||% "private" verbose <- verbose %|||% FALSE out <- resources_gcp_init( bucket = bucket, prefix = prefix, - version = version, predefined_acl = predefined_acl, max_tries = max_tries, verbose = verbose diff --git a/man/tar_resources_aws.Rd b/man/tar_resources_aws.Rd index 239c5a93f..946a9a4d7 100644 --- a/man/tar_resources_aws.Rd +++ b/man/tar_resources_aws.Rd @@ -10,7 +10,6 @@ tar_resources_aws( region = targets::tar_option_get("resources")$aws$region, endpoint = targets::tar_option_get("resources")$aws$endpoint, s3_force_path_style = targets::tar_option_get("resources")$aws$s3_force_path_style, - version = targets::tar_option_get("resources")$aws$version, part_size = targets::tar_option_get("resources")$aws$part_size, page_size = targets::tar_option_get("resources")$aws$page_size, max_tries = targets::tar_option_get("resources")$aws$max_tries, @@ -57,16 +56,6 @@ incompatibility.} \item{s3_force_path_style}{Logical of length 1, whether to use path-style addressing for S3 requests.} -\item{version}{Character of length 1: \code{"latest"} to read the latest -version of the target in the bucket (default), or \code{"meta"} to -read the version recorded in the metadata. This affects how \code{targets} -downloads target data and makes sure it is up to date. \code{"latest"} -is sufficient for most cases. Use \code{"meta"} if you are reverting to -a historical copy of the metadata (\verb{_targets/meta/meta}) and wish to use -\code{targets} to use the corresponding old copies of versioned data in a -versioned bucket. The \code{version} argument is only applicable if -the bucket has versioning enabled.} - \item{part_size}{Positive numeric of length 1, number of bytes for each part of a multipart upload. (Except the last part, which is the remainder.) In a multipart upload, each part diff --git a/man/tar_resources_gcp.Rd b/man/tar_resources_gcp.Rd index bad55f3d7..ad67a2992 100644 --- a/man/tar_resources_gcp.Rd +++ b/man/tar_resources_gcp.Rd @@ -8,7 +8,6 @@ Google Cloud Storage (GCS)} tar_resources_gcp( bucket = targets::tar_option_get("resources")$gcp$bucket, prefix = targets::tar_option_get("resources")$gcp$prefix, - version = targets::tar_option_get("resources")$gcp$version, predefined_acl = targets::tar_option_get("resources")$gcp$predefined_acl, max_tries = targets::tar_option_get("resources")$gcp$max_tries, verbose = targets::tar_option_get("resources")$gcp$verbose @@ -27,16 +26,6 @@ In the future, \code{targets} will begin requiring explicitly user-supplied prefixes. (This last note was added on 2023-08-24: \code{targets} version 1.2.2.9000.)} -\item{version}{Character of length 1: \code{"latest"} to read the latest -version of the target in the bucket (default), or \code{"meta"} to -read the version recorded in the metadata. This affects how \code{targets} -downloads target data and makes sure it is up to date. \code{"latest"} -is sufficient for most cases. Use \code{"meta"} if you are reverting to -a historical copy of the metadata (\verb{_targets/meta/meta}) and wish to use -\code{targets} to use the corresponding old copies of versioned data in a -versioned bucket. The \code{version} argument is only applicable if -the bucket has versioning enabled.} - \item{predefined_acl}{Character of length 1, user access to the object. See \code{?googleCloudStorageR::gcs_upload} for possible values. Defaults to \code{"private"}.} diff --git a/tests/aws/test-class_aws_qs.R b/tests/aws/test-class_aws_qs.R index f5865e530..13b3ed36d 100644 --- a/tests/aws/test-class_aws_qs.R +++ b/tests/aws/test-class_aws_qs.R @@ -473,7 +473,11 @@ tar_test("aws_qs format versioning", { expr <- quote({ tar_option_set( resources = tar_resources( - aws = tar_resources_aws(bucket = !!bucket_name, prefix = "_targets") + aws = tar_resources_aws( + bucket = !!bucket_name, + prefix = "_targets", + version = "meta" + ) ) ) list( @@ -495,7 +499,11 @@ tar_test("aws_qs format versioning", { expr <- quote({ tar_option_set( resources = tar_resources( - aws = tar_resources_aws(bucket = !!bucket_name, prefix = "_targets") + aws = tar_resources_aws( + bucket = !!bucket_name, + prefix = "_targets", + version = "meta" + ) ) ) list( @@ -519,7 +527,11 @@ tar_test("aws_qs format versioning", { expr <- quote({ tar_option_set( resources = tar_resources( - aws = tar_resources_aws(bucket = !!bucket_name, prefix = "_targets") + aws = tar_resources_aws( + bucket = !!bucket_name, + prefix = "_targets", + version = "meta" + ) ) ) list( diff --git a/tests/aws/test-delete.R b/tests/aws/test-delete.R index 2c15f9e42..0e5fab325 100644 --- a/tests/aws/test-delete.R +++ b/tests/aws/test-delete.R @@ -71,7 +71,11 @@ tar_test("same with versioning", { expr <- quote({ tar_option_set( resources = tar_resources( - aws = tar_resources_aws(bucket = !!bucket_name, prefix = "_targets") + aws = tar_resources_aws( + bucket = !!bucket_name, + prefix = "_targets", + version = "meta" + ) ) ) write_file <- function(path) { diff --git a/tests/testthat/test-tar_resources_aws.R b/tests/testthat/test-tar_resources_aws.R index 28bc8f822..da1074784 100644 --- a/tests/testthat/test-tar_resources_aws.R +++ b/tests/testthat/test-tar_resources_aws.R @@ -249,42 +249,3 @@ tar_test("tar_resources_aws() page_size", { out <- tar_resources_aws() expect_equal(out$page_size, 3L) }) - -tar_test("tar_resources_aws() version", { - skip_cran() - skip_on_os("windows") - skip_if_not_installed("paws.storage") - tar_option_set( - resources = tar_resources( - aws = tar_resources_aws( - prefix = "x", - bucket = "x" - ) - ) - ) - out <- tar_resources_aws() - expect_equal(out$version, "latest") - tar_option_set( - resources = tar_resources( - aws = tar_resources_aws( - version = "meta", - prefix = "x", - bucket = "x" - ) - ) - ) - out <- tar_resources_aws() - expect_equal(out$version, "meta") - expect_error( - tar_option_set( - resources = tar_resources( - aws = tar_resources_aws( - version = "nope", - prefix = "x", - bucket = "x" - ) - ) - ), - class = "tar_condition_validate" - ) -}) diff --git a/tests/testthat/test-tar_resources_gcp.R b/tests/testthat/test-tar_resources_gcp.R index e80984f14..0f8fa4110 100644 --- a/tests/testthat/test-tar_resources_gcp.R +++ b/tests/testthat/test-tar_resources_gcp.R @@ -74,42 +74,3 @@ tar_test("tar_resources_gcp() wants a prefix", { class = "tar_condition_deprecate" ) }) - -tar_test("tar_resources_gcp() version", { - skip_cran() - skip_on_os("windows") - skip_if_not_installed("googleCloudStorageR") - tar_option_set( - resources = tar_resources( - gcp = tar_resources_gcp( - prefix = "x", - bucket = "x" - ) - ) - ) - out <- tar_resources_gcp() - expect_equal(out$version, "latest") - tar_option_set( - resources = tar_resources( - gcp = tar_resources_gcp( - version = "meta", - prefix = "x", - bucket = "x" - ) - ) - ) - out <- tar_resources_gcp() - expect_equal(out$version, "meta") - expect_error( - tar_option_set( - resources = tar_resources( - gcp = tar_resources_gcp( - version = "nope", - prefix = "x", - bucket = "x" - ) - ) - ), - class = "tar_condition_validate" - ) -}) From 1ba46b1593bdd49aacf5f247165fcd11ec12172a Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 07:53:15 -0500 Subject: [PATCH 25/48] revert some tests --- tests/aws/test-class_aws_qs.R | 9 +++------ tests/aws/test-class_inventory_aws.R | 6 ++---- tests/aws/test-delete.R | 3 +-- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/tests/aws/test-class_aws_qs.R b/tests/aws/test-class_aws_qs.R index 13b3ed36d..18696db4b 100644 --- a/tests/aws/test-class_aws_qs.R +++ b/tests/aws/test-class_aws_qs.R @@ -475,8 +475,7 @@ tar_test("aws_qs format versioning", { resources = tar_resources( aws = tar_resources_aws( bucket = !!bucket_name, - prefix = "_targets", - version = "meta" + prefix = "_targets" ) ) ) @@ -501,8 +500,7 @@ tar_test("aws_qs format versioning", { resources = tar_resources( aws = tar_resources_aws( bucket = !!bucket_name, - prefix = "_targets", - version = "meta" + prefix = "_targets" ) ) ) @@ -529,8 +527,7 @@ tar_test("aws_qs format versioning", { resources = tar_resources( aws = tar_resources_aws( bucket = !!bucket_name, - prefix = "_targets", - version = "meta" + prefix = "_targets" ) ) ) diff --git a/tests/aws/test-class_inventory_aws.R b/tests/aws/test-class_inventory_aws.R index d4c46f27b..60059c8ab 100644 --- a/tests/aws/test-class_inventory_aws.R +++ b/tests/aws/test-class_inventory_aws.R @@ -105,15 +105,13 @@ tar_test("inventory_aws class with versioning from resource settings", { resources1 <- tar_resources( aws = tar_resources_aws( bucket = bucket1, - prefix = path_store_default(), - version = "latest" + prefix = path_store_default() ) ) resources2 <- tar_resources( aws = tar_resources_aws( bucket = bucket2, - prefix = path_store_default(), - version = "meta" + prefix = path_store_default() ) ) store1 <- store_init(repository = "aws", resources = resources1) diff --git a/tests/aws/test-delete.R b/tests/aws/test-delete.R index 0e5fab325..7d40cdd29 100644 --- a/tests/aws/test-delete.R +++ b/tests/aws/test-delete.R @@ -73,8 +73,7 @@ tar_test("same with versioning", { resources = tar_resources( aws = tar_resources_aws( bucket = !!bucket_name, - prefix = "_targets", - version = "meta" + prefix = "_targets" ) ) ) From 297a95e6f1214c1902dfe7c2d101271571a2772e Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 07:56:45 -0500 Subject: [PATCH 26/48] migrate tests --- R/class_aws.R | 16 ++++------------ R/class_aws_file.R | 2 +- R/class_gcp.R | 18 +++++------------- R/class_gcp_file.R | 2 +- tests/testthat/test-class_aws.R | 24 ------------------------ tests/testthat/test-class_gcp.R | 24 ------------------------ 6 files changed, 11 insertions(+), 75 deletions(-) diff --git a/R/class_aws.R b/R/class_aws.R index 2c761f1c6..0eedb88ac 100644 --- a/R/class_aws.R +++ b/R/class_aws.R @@ -101,14 +101,6 @@ store_aws_version <- function(path) { if_any(length(out) && nzchar(out), out, NULL) } -store_aws_version_use <- function(store, path) { - if_any( - store$resources$aws$version == "meta", - store_aws_version(path), - NULL - ) -} - store_aws_path_field <- function(path, pattern) { path <- store_aws_split_colon(path) keyvalue_field(x = path, pattern = pattern) @@ -146,7 +138,7 @@ store_read_object.tar_aws <- function(store) { file = scratch, region = store_aws_region(path), endpoint = store_aws_endpoint(path), - version = store_aws_version_use(store, path), + version = store_aws_version(path), args = aws$args, max_tries = aws$max_tries, seconds_timeout = aws$seconds_timeout, @@ -165,7 +157,7 @@ store_exist_object.tar_aws <- function(store, name = NULL) { bucket = store_aws_bucket(path), region = store_aws_region(path), endpoint = store_aws_endpoint(path), - version = store_aws_version_use(store, path), + version = store_aws_version(path), args = aws$args, max_tries = aws$max_tries, seconds_timeout = aws$seconds_timeout, @@ -188,7 +180,7 @@ store_delete_objects.tar_aws <- function(store, meta, batch_size, verbose) { subset$path, ~list( Key = store_aws_key(.x), - VersionId = store_aws_version_use(store, .x) + VersionId = store_aws_version(.x) ) ) message <- paste( @@ -275,7 +267,7 @@ store_aws_hash <- function(store) { bucket = store_aws_bucket(path), region = store_aws_region(path), endpoint = store_aws_endpoint(path), - version = store_aws_version_use(store, path), + version = store_aws_version(path), args = aws$args, max_tries = aws$max_tries, seconds_timeout = aws$seconds_timeout, diff --git a/R/class_aws_file.R b/R/class_aws_file.R index 2f85373c1..2626b79d7 100644 --- a/R/class_aws_file.R +++ b/R/class_aws_file.R @@ -66,7 +66,7 @@ store_read_object.tar_aws_file <- function(store) { file = scratch, region = store_aws_region(path), endpoint = store_aws_endpoint(path), - version = store_aws_version_use(store, path), + version = store_aws_version(path), args = aws$args, max_tries = aws$max_tries, seconds_timeout = aws$seconds_timeout, diff --git a/R/class_gcp.R b/R/class_gcp.R index b78983a26..9eeb2f0f1 100644 --- a/R/class_gcp.R +++ b/R/class_gcp.R @@ -57,14 +57,6 @@ store_gcp_version <- function(path) { if_any(length(out) && nzchar(out), out, NULL) } -store_gcp_version_use <- function(store, path) { - if_any( - store$resources$gcp$version == "meta", - store_gcp_version(path), - NULL - ) -} - store_gcp_path_field <- function(path, pattern) { keyvalue_field(x = path, pattern = pattern) } @@ -87,7 +79,7 @@ store_read_object.tar_gcp <- function(store) { key = key, bucket = bucket, file = scratch, - version = store_gcp_version_use(store, path), + version = store_gcp_version(path), verbose = store$resources$gcp$verbose, max_tries = store$resources$gcp$max_tries ) @@ -100,7 +92,7 @@ store_exist_object.tar_gcp <- function(store, name = NULL) { gcp_gcs_exists( key = store_gcp_key(path), bucket = store_gcp_bucket(path), - version = store_gcp_version_use(store, path), + version = store_gcp_version(path), verbose = store$resources$gcp$verbose %|||% FALSE, max_tries = store$resources$gcp$max_tries %|||% 5L ) @@ -111,7 +103,7 @@ store_delete_object.tar_gcp <- function(store, name = NULL) { path <- store$file$path key <- store_gcp_key(path) bucket <- store_gcp_bucket(path) - version <- store_gcp_version_use(store, path) + version <- store_gcp_version(path) message <- paste( "could not delete target %s from gcp bucket %s key %s.", "Either delete the object manually in the gcp web console", @@ -144,7 +136,7 @@ store_delete_objects.tar_gcp <- function(store, meta, batch_size, verbose) { for (index in seq_len(nrow(subset))) { path <- subset$path[[index]] key <- store_gcp_key(path) - version <- store_gcp_version_use(store, path) + version <- store_gcp_version(path) message <- paste( "could not object %s from gcp bucket %s.", "You may need to delete it manually.\nMessage: " @@ -225,7 +217,7 @@ store_gcp_hash <- function(store) { head <- gcp_gcs_head( key = store_gcp_key(path), bucket = store_gcp_bucket(path), - version = store_gcp_version_use(store, path), + version = store_gcp_version(path), verbose = store$resources$gcp$verbose %|||% FALSE, max_tries = store$resources$gcp$max_tries %|||% 5L ) diff --git a/R/class_gcp_file.R b/R/class_gcp_file.R index 50e6fc666..dbfaf50c0 100644 --- a/R/class_gcp_file.R +++ b/R/class_gcp_file.R @@ -53,7 +53,7 @@ store_read_object.tar_gcp_file <- function(store) { key = key, bucket = bucket, file = scratch, - version = store_gcp_version_use(store, path), + version = store_gcp_version(path), verbose = store$resources$gcp$verbose, max_tries = store$resources$gcp$max_tries ) diff --git a/tests/testthat/test-class_aws.R b/tests/testthat/test-class_aws.R index 0593466c8..980382773 100644 --- a/tests/testthat/test-class_aws.R +++ b/tests/testthat/test-class_aws.R @@ -66,30 +66,6 @@ tar_test("store_aws_version()", { expect_null(store_aws_version(letters)) }) -tar_test("store_aws_version_use()", { - path <- c("bucket=b", "version=number") - resources <- tar_resources( - aws = tar_resources_aws( - bucket = "x", - prefix = "y", - version = "latest" - ) - ) - target <- tar_target(x, 1, repository = "aws", resources = resources) - expect_null(store_aws_version_use(target$store, path)) - expect_null(store_aws_version_use(target$store, letters)) - resources <- tar_resources( - aws = tar_resources_aws( - bucket = "x", - prefix = "y", - version = "meta" - ) - ) - target <- tar_target(x, 1, repository = "aws", resources = resources) - expect_equal(store_aws_version_use(target$store, path), "number") - expect_null(store_aws_version_use(target$store, letters)) -}) - tar_test("store_aws_version() endpoint", { path <- c( "bucket=b", diff --git a/tests/testthat/test-class_gcp.R b/tests/testthat/test-class_gcp.R index a02c19156..53eb86c2a 100644 --- a/tests/testthat/test-class_gcp.R +++ b/tests/testthat/test-class_gcp.R @@ -32,30 +32,6 @@ tar_test("store_gcp_version()", { expect_null(store_gcp_version(letters)) }) -tar_test("store_gcp_version_use()", { - path <- c("bucket=b", "version=number") - resources <- tar_resources( - gcp = tar_resources_gcp( - bucket = "x", - prefix = "y", - version = "latest" - ) - ) - target <- tar_target(x, 1, repository = "gcp", resources = resources) - expect_null(store_gcp_version_use(target$store, path)) - expect_null(store_gcp_version_use(target$store, letters)) - resources <- tar_resources( - gcp = tar_resources_gcp( - bucket = "x", - prefix = "y", - version = "meta" - ) - ) - target <- tar_target(x, 1, repository = "gcp", resources = resources) - expect_equal(store_gcp_version_use(target$store, path), "number") - expect_null(store_gcp_version_use(target$store, letters)) -}) - tar_test("package detection", { skip_cran() target <- tar_target(x, "x_value", format = "feather", repository = "gcp") From 8f0691be3197219fbfca6f74c81719a1737139b9 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 08:15:21 -0500 Subject: [PATCH 27/48] redesign abstract inventory --- R/class_aws.R | 2 +- R/class_gcp.R | 2 +- R/class_inventory.R | 56 ++++++++++++--------------- tests/testthat/test-class_inventory.R | 46 +++------------------- 4 files changed, 32 insertions(+), 74 deletions(-) diff --git a/R/class_aws.R b/R/class_aws.R index 0eedb88ac..36a8b2b41 100644 --- a/R/class_aws.R +++ b/R/class_aws.R @@ -274,7 +274,7 @@ store_aws_hash <- function(store) { close_connection = aws$close_connection, s3_force_path_style = aws$s3_force_path_style ) - digest_chr64(head$ETag) + if_any(is.null(head), NULL, digest_chr64(head$ETag)) } # nocov end diff --git a/R/class_gcp.R b/R/class_gcp.R index 9eeb2f0f1..8203dd799 100644 --- a/R/class_gcp.R +++ b/R/class_gcp.R @@ -221,7 +221,7 @@ store_gcp_hash <- function(store) { verbose = store$resources$gcp$verbose %|||% FALSE, max_tries = store$resources$gcp$max_tries %|||% 5L ) - digest_chr64(head$md5) + if_any(is.null(head), NULL, digest_chr64(head$md5)) } # nocov end diff --git a/R/class_inventory.R b/R/class_inventory.R index 3e19dda5d..c850ce347 100644 --- a/R/class_inventory.R +++ b/R/class_inventory.R @@ -1,9 +1,10 @@ -# This is an abstract inventory. Methods cache_key() and -# cache_prefix() are for testing only. Only the subclasses +# This is an abstract inventory. The definitions of +# get_key(), get_bucket(), and set_cache() below +# are abstract and for testing only. Only the subclasses # have serious versions of these methods. inventory_init <- function() { out <- inventory_new() - out$reset() + out$reset_cache() out } @@ -18,43 +19,34 @@ inventory_class <- R6::R6Class( cloneable = FALSE, public = list( cache = NULL, - cache_key = function(key, bucket, version, store) { - name <- self$name(key = key, bucket = bucket) - self$cache[[name]] <- paste(store$file$hash, "key") + get_key = function(store) { + "example_key" }, - cache_prefix = function(key, bucket, store) { - name <- self$name(key = key, bucket = bucket) - self$cache[[name]] <- paste(store$file$hash, "prefix") + get_bucket = function(store) { + "example_bucket" }, - name = function(key, bucket) { + get_name = function(store) { + key <- self$get_key(store) + bucket <- self$get_bucket(store) paste(bucket, key, sep = "|") }, - list = function() { - names(self$cache) - }, - reset = function() { - self$cache <- new.env(parent = emptyenv()) - }, - get = function(key, bucket, version, store) { - name <- self$name(key = key, bucket = bucket) + get_cache = function(store) { + name <- self$get_name(store) if (!exists(x = name, envir = self$cache)) { - if (is.null(version)) { - self$cache_prefix( - key = key, - bucket = bucket, - store = store - ) - } else { - self$cache_key( - key = key, - bucket = bucket, - version = version, - store = store - ) - } + self$set_cache(store) } self$cache[[name]] }, + list_cache = function() { + names(self$cache) + }, + set_cache = function(store) { + name <- self$get_name(store) + self$cache[[name]] <- "example_hash" + }, + reset_cache = function() { + self$cache <- new.env(parent = emptyenv()) + }, validate = function() { tar_assert_envir(self$cache) invisible() diff --git a/tests/testthat/test-class_inventory.R b/tests/testthat/test-class_inventory.R index 4c8a7668f..3fb47281f 100644 --- a/tests/testthat/test-class_inventory.R +++ b/tests/testthat/test-class_inventory.R @@ -5,44 +5,10 @@ tar_test("inventory class is valid", { tar_test("inventory abstract class basic methods", { x <- inventory_init() store <- store_init() - store$file <- file_init(path = tempfile()) - writeLines("x", store$file$path) - file_ensure_hash(store$file) - expect_true(nzchar(store$file$hash)) - expect_equal(x$list(), character(0L)) - expect_equal(x$name(key = "x/y", bucket = "z"), "z|x/y") -}) - -tar_test("inventory abstract class null versions", { - x <- inventory_init() - store <- store_init() - store$file <- file_init(path = tempfile()) - writeLines("x", store$file$path) - file_ensure_hash(store$file) - out <- x$get(key = "x/y", bucket = "z", version = NULL, store = store) - out2 <- x$get(key = "x/y", bucket = "z", version = "abc123", store = store) - expect_equal(out, out2) - expect_equal(out, paste(store$file$hash, "prefix")) - expect_equal(x$list(), "z|x/y") - expect_silent(x$validate()) - x$reset() - expect_equal(x$list(), character(0L)) - expect_silent(x$validate()) -}) - -tar_test("inventory abstract class null versions", { - x <- inventory_init() - store <- store_init() - store$file <- file_init(path = tempfile()) - writeLines("x", store$file$path) - file_ensure_hash(store$file) - out <- x$get(key = "x/y", bucket = "z", version = "abc123", store = store) - out2 <- x$get(key = "x/y", bucket = "z", version = NULL, store = store) - expect_equal(out, out2) - expect_equal(out, paste(store$file$hash, "key")) - expect_equal(x$list(), "z|x/y") - expect_silent(x$validate()) - x$reset() - expect_equal(x$list(), character(0L)) - expect_silent(x$validate()) + expect_equal(x$list_cache(), character(0L)) + out <- x$get_cache(store) + expect_equal(x$list_cache(), "example_bucket|example_key") + expect_equal(out, "example_hash") + x$reset_cache() + expect_equal(x$list_cache(), character(0L)) }) From 7923ea80d2426531beaa3fa89c7a997c701bb307 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 08:17:49 -0500 Subject: [PATCH 28/48] redesign aws inventory --- R/class_inventory_aws.R | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/R/class_inventory_aws.R b/R/class_inventory_aws.R index 515a5dc72..330cc7332 100644 --- a/R/class_inventory_aws.R +++ b/R/class_inventory_aws.R @@ -17,30 +17,18 @@ inventory_aws_class <- R6::R6Class( portable = FALSE, cloneable = FALSE, public = list( - cache_key = function(key, bucket, version, store) { - path <- store$file$path - aws <- store$resources$aws - head <- aws_s3_head( - key = key, - bucket = bucket, - region = store_aws_region(path), - endpoint = store_aws_endpoint(path), - version = version, - args = aws$args, - max_tries = aws$max_tries, - seconds_timeout = aws$seconds_timeout, - close_connection = aws$close_connection, - s3_force_path_style = aws$s3_force_path_style - ) - digest_chr64(head$ETag) - name <- self$name(key = key, bucket = bucket) - self$cache[[name]] <- digest_chr64(head$ETag) + get_key = function(store) { + store_aws_key(store$file$path) + }, + get_bucket = function(store) { + store_aws_bucket(store$file$path) }, - cache_prefix = function(key, bucket, store) { + set_cache = function(key, bucket, store) { path <- store$file$path + bucket <- store_aws_bucket(path) aws <- store$resources$aws results <- aws_s3_list_etags( - prefix = dirname(key), + prefix = dirname(store_aws_key(path)), bucket = bucket, page_size = aws$page_size, verbose = aws$verbose, From 84266b8f49f7ebb6f1ef8b3aad9afea78a0cd639 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 08:36:13 -0500 Subject: [PATCH 29/48] Rewrite and test AWS inventory --- R/class_inventory.R | 14 ++- R/class_inventory_aws.R | 10 +- tests/aws/test-class_inventory_aws.R | 170 +++++--------------------- tests/testthat/test-class_inventory.R | 10 +- 4 files changed, 49 insertions(+), 155 deletions(-) diff --git a/R/class_inventory.R b/R/class_inventory.R index c850ce347..8da0d622e 100644 --- a/R/class_inventory.R +++ b/R/class_inventory.R @@ -19,20 +19,22 @@ inventory_class <- R6::R6Class( cloneable = FALSE, public = list( cache = NULL, + misses = NULL, get_key = function(store) { "example_key" }, get_bucket = function(store) { "example_bucket" }, - get_name = function(store) { - key <- self$get_key(store) - bucket <- self$get_bucket(store) + get_name = function(key, bucket) { paste(bucket, key, sep = "|") }, get_cache = function(store) { - name <- self$get_name(store) + key <- self$get_key(store) + bucket <- self$get_bucket(store) + name <- self$get_name(key = key, bucket = bucket) if (!exists(x = name, envir = self$cache)) { + self$misses <- (self$misses %|||% 0L) + 1L self$set_cache(store) } self$cache[[name]] @@ -41,7 +43,9 @@ inventory_class <- R6::R6Class( names(self$cache) }, set_cache = function(store) { - name <- self$get_name(store) + key <- self$get_key(store) + bucket <- self$get_bucket(store) + name <- self$get_name(key = key, bucket = bucket) self$cache[[name]] <- "example_hash" }, reset_cache = function() { diff --git a/R/class_inventory_aws.R b/R/class_inventory_aws.R index 330cc7332..c34476c62 100644 --- a/R/class_inventory_aws.R +++ b/R/class_inventory_aws.R @@ -2,7 +2,7 @@ # nocov start inventory_aws_init <- function() { out <- inventory_aws_new() - out$reset() + out$reset_cache() out } @@ -23,12 +23,12 @@ inventory_aws_class <- R6::R6Class( get_bucket = function(store) { store_aws_bucket(store$file$path) }, - set_cache = function(key, bucket, store) { + set_cache = function(store) { path <- store$file$path - bucket <- store_aws_bucket(path) + bucket <- self$get_bucket(store) aws <- store$resources$aws results <- aws_s3_list_etags( - prefix = dirname(store_aws_key(path)), + prefix = dirname(self$get_key(store)), bucket = bucket, page_size = aws$page_size, verbose = aws$verbose, @@ -41,7 +41,7 @@ inventory_aws_class <- R6::R6Class( s3_force_path_style = aws$s3_force_path_style ) for (key in names(results)) { - name <- self$name(key = key, bucket = bucket) + name <- self$get_name(key = key, bucket = bucket) self$cache[[name]] <- digest_chr64(results[[key]]) } } diff --git a/tests/aws/test-class_inventory_aws.R b/tests/aws/test-class_inventory_aws.R index 60059c8ab..1be3ce399 100644 --- a/tests/aws/test-class_inventory_aws.R +++ b/tests/aws/test-class_inventory_aws.R @@ -1,159 +1,45 @@ tar_test("inventory_aws class with versioning from the buckets", { - bucket1 <- random_bucket_name() - bucket2 <- random_bucket_name() + bucket <- random_bucket_name() client <- paws.storage::s3() - client$create_bucket(Bucket = bucket1) - client$create_bucket(Bucket = bucket2) - client$put_bucket_versioning( - Bucket = bucket2, - VersioningConfiguration = list(Status = "Enabled") - ) - on.exit({ - aws_s3_delete_bucket(bucket1) - aws_s3_delete_bucket(bucket2) - }) - head1 <- list() - head2 <- list() + client$create_bucket(Bucket = bucket) + on.exit(aws_s3_delete_bucket(bucket)) prefix <- path_objects_dir(path_store_default()) + head <- list() for (key in file.path(prefix, c("w", "x", "y", "z"))) { - head1[[key]] <- client$put_object( + head[[key]] <- client$put_object( Body = charToRaw(key), Key = key, - Bucket = bucket1 - ) - head2[[key]] <- client$put_object( - Body = charToRaw(key), - Key = key, - Bucket = bucket2 + Bucket = bucket ) } - inventory1 <- inventory_aws_init() - inventory2 <- inventory_aws_init() + inventory <- inventory_aws_init() resources <- tar_resources( aws = tar_resources_aws( - bucket = bucket1, + bucket = bucket, prefix = path_store_default() ) ) store <- store_init(repository = "aws", resources = resources) - store$file$path <- store_produce_aws_path( - store = store, - name = "x", - path_store = path_store_default() - ) - key <- store_aws_key(store$file$path) - for (index in seq_len(2L)) { - out1 <- inventory1$get( - key = key, - bucket = bucket1, - version = head1[[key]]$VersionId %||% NULL, - store = store - ) - out2 <- inventory2$get( - key = key, - bucket = bucket2, - version = head2[[key]]$VersionId %||% NULL, - store = store - ) - expect_equal(out1, out2) - expect_false(anyNA(out1)) - expect_true(is.character(out1)) - expect_equal(length(out1), 1L) - expect_true(nzchar(out1)) - expect_equal(length(as.list(inventory1$cache)), 4L) - expect_equal(length(as.list(inventory2$cache)), 1L) - } -}) - -tar_test("inventory_aws class with versioning from resource settings", { - bucket1 <- random_bucket_name() - bucket2 <- random_bucket_name() - client <- paws.storage::s3() - client$create_bucket(Bucket = bucket1) - client$create_bucket(Bucket = bucket2) - client$put_bucket_versioning( - Bucket = bucket1, - VersioningConfiguration = list(Status = "Enabled") - ) - client$put_bucket_versioning( - Bucket = bucket2, - VersioningConfiguration = list(Status = "Enabled") - ) - on.exit({ - aws_s3_delete_bucket(bucket1) - aws_s3_delete_bucket(bucket2) - }) - head1 <- list() - head2 <- list() - prefix <- path_objects_dir(path_store_default()) - for (key in file.path(prefix, c("w", "x", "y", "z"))) { - head1[[key]] <- client$put_object( - Body = charToRaw(key), - Key = key, - Bucket = bucket1 - ) - head2[[key]] <- client$put_object( - Body = charToRaw(key), - Key = key, - Bucket = bucket2 - ) - expect_true(nzchar(head1[[key]]$VersionId)) - expect_true(nzchar(head2[[key]]$VersionId)) - } - inventory1 <- inventory_aws_init() - inventory2 <- inventory_aws_init() - resources1 <- tar_resources( - aws = tar_resources_aws( - bucket = bucket1, - prefix = path_store_default() - ) - ) - resources2 <- tar_resources( - aws = tar_resources_aws( - bucket = bucket2, - prefix = path_store_default() - ) - ) - store1 <- store_init(repository = "aws", resources = resources1) - store2 <- store_init(repository = "aws", resources = resources2) - store1$file$path <- store_produce_aws_path( - store = store1, - name = "x", - path_store = path_store_default() - ) - store2$file$path <- store_produce_aws_path( - store = store2, - name = "x", - path_store = path_store_default() - ) - key <- store_aws_key(store1$file$path) - store1$file$path <- c( - store1$file$path, - paste0("version=", head1[[key]]$VersionId) - ) - store2$file$path <- c( - store2$file$path, - paste0("version=", head2[[key]]$VersionId) - ) - for (index in seq_len(2L)) { - out1 <- inventory1$get( - key = key, - bucket = bucket1, - version = store_aws_version_use(store1, store1$file$path), - store = store1 - ) - out2 <- inventory2$get( - key = key, - bucket = bucket2, - version = store_aws_version_use(store2, store2$file$path), - store = store2 + expect_equal(inventory$list_cache(), character(0L)) + expect_null(inventory$misses) + for (key in rev(file.path(prefix, c("w", "x", "y", "z")))) { + store$file$path <- store_produce_aws_path( + store = store, + name = basename(key), + path_store = path_store_default() + ) + out <- inventory$get_cache(store) + expect_equal(inventory$misses, 1L) + expect_equal(out, digest_chr64(head[[key]]$ETag)) + expect_equal( + sort(inventory$list_cache()), + sort( + paste0( + bucket, + "|", + file.path(prefix, c("w", "x", "y", "z")) + ) + ) ) - expect_equal(out1, out2) - expect_false(anyNA(out1)) - expect_true(is.character(out1)) - expect_equal(length(out1), 1L) - expect_true(nzchar(out1)) - expect_equal(length(as.list(inventory1$cache)), 4L) - expect_equal(length(as.list(inventory2$cache)), 1L) } }) diff --git a/tests/testthat/test-class_inventory.R b/tests/testthat/test-class_inventory.R index 3fb47281f..56db7b183 100644 --- a/tests/testthat/test-class_inventory.R +++ b/tests/testthat/test-class_inventory.R @@ -6,9 +6,13 @@ tar_test("inventory abstract class basic methods", { x <- inventory_init() store <- store_init() expect_equal(x$list_cache(), character(0L)) - out <- x$get_cache(store) - expect_equal(x$list_cache(), "example_bucket|example_key") - expect_equal(out, "example_hash") + expect_null(x$misses) + for (index in seq_len(4L)) { + out <- x$get_cache(store) + expect_equal(x$misses, 1L) + expect_equal(out, "example_hash") + expect_equal(x$list_cache(), "example_bucket|example_key") + } x$reset_cache() expect_equal(x$list_cache(), character(0L)) }) From b05392b0456d586b3666d0115dc3c141e13c9108 Mon Sep 17 00:00:00 2001 From: wlandau Date: Mon, 13 Nov 2023 08:53:51 -0500 Subject: [PATCH 30/48] gcp inventory --- tests/aws/test-class_inventory_aws.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/aws/test-class_inventory_aws.R b/tests/aws/test-class_inventory_aws.R index 1be3ce399..6d2287cf9 100644 --- a/tests/aws/test-class_inventory_aws.R +++ b/tests/aws/test-class_inventory_aws.R @@ -1,4 +1,4 @@ -tar_test("inventory_aws class with versioning from the buckets", { +tar_test("inventory_aws class", { bucket <- random_bucket_name() client <- paws.storage::s3() client$create_bucket(Bucket = bucket) From a358a68f77aecfdffe13772d3058b8ae1b42acb8 Mon Sep 17 00:00:00 2001 From: wlandau Date: Mon, 13 Nov 2023 08:57:22 -0500 Subject: [PATCH 31/48] gcp inventory --- R/class_inventory_gcp.R | 43 ++++++++++++++++++++++++ tests/gcp/test-class_inventory_gcp.R | 49 ++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 R/class_inventory_gcp.R create mode 100644 tests/gcp/test-class_inventory_gcp.R diff --git a/R/class_inventory_gcp.R b/R/class_inventory_gcp.R new file mode 100644 index 000000000..666a50ad1 --- /dev/null +++ b/R/class_inventory_gcp.R @@ -0,0 +1,43 @@ +# Covered in tests/gcp/test-class_inventory_gcp.R. +# nocov start +inventory_gcp_init <- function() { + out <- inventory_gcp_new() + out$reset_cache() + out +} + +inventory_gcp_new <- function() { + inventory_gcp_class$new() +} + +inventory_gcp_class <- R6::R6Class( + classname = "tar_inventory_gcp", + inherit = inventory_class, + class = FALSE, + portable = FALSE, + cloneable = FALSE, + public = list( + get_key = function(store) { + store_gcp_key(store$file$path) + }, + get_bucket = function(store) { + store_gcp_bucket(store$file$path) + }, + set_cache = function(store) { + path <- store$file$path + bucket <- self$get_bucket(store) + gcp <- store$resources$gcp + results <- gcp_gcs_list_md5s( + prefix = dirname(self$get_key(store)), + bucket = bucket, + verbose = gcp$verbose, + max_tries = gcp$max_tries + ) + for (key in names(results)) { + name <- self$get_name(key = key, bucket = bucket) + self$cache[[name]] <- digest_chr64(results[[key]]) + } + } + ) +) +# nocov end diff --git a/tests/gcp/test-class_inventory_gcp.R b/tests/gcp/test-class_inventory_gcp.R new file mode 100644 index 000000000..c753cc710 --- /dev/null +++ b/tests/gcp/test-class_inventory_gcp.R @@ -0,0 +1,49 @@ +tar_test("inventory_gcp class", { + skip_if_no_gcp() + gcp_gcs_auth(max_tries = 5) + bucket <- random_bucket_name() + project <- Sys.getenv("GCE_DEFAULT_PROJECT_ID") + googleCloudStorageR::gcs_create_bucket(bucket, projectId = project) + on.exit(gcp_gcs_delete_bucket(bucket)) + prefix <- path_objects_dir(path_store_default()) + head <- list() + for (key in file.path(prefix, c("w", "x", "y", "z"))) { + file <- tempfile() + writeLines(key, file) + head[[key]] <- googleCloudStorageR::gcs_upload( + file = file, + name = key, + bucket = bucket + ) + } + inventory <- inventory_gcp_init() + resources <- tar_resources( + gcp = tar_resources_gcp( + bucket = bucket, + prefix = path_store_default() + ) + ) + store <- store_init(repository = "gcp", resources = resources) + expect_equal(inventory$list_cache(), character(0L)) + expect_null(inventory$misses) + for (key in rev(file.path(prefix, c("w", "x", "y", "z")))) { + store$file$path <- store_produce_gcp_path( + store = store, + name = basename(key), + path_store = path_store_default() + ) + out <- inventory$get_cache(store) + expect_equal(inventory$misses, 1L) + expect_equal(out, digest_chr64(head[[key]]$md5)) + expect_equal( + sort(inventory$list_cache()), + sort( + paste0( + bucket, + "|", + file.path(prefix, c("w", "x", "y", "z")) + ) + ) + ) + } +}) From 85f84f4d10d562277b086f7d953641b8330ff5a9 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 09:11:39 -0500 Subject: [PATCH 32/48] Bring back tar_unversion() --- NAMESPACE | 1 + R/tar_read.R | 35 ++++++++++++++++------------------- _pkgdown.yml | 1 + man/tar_delete.Rd | 39 +++++++++++++++++++-------------------- man/tar_destroy.Rd | 39 +++++++++++++++++++-------------------- man/tar_invalidate.Rd | 3 ++- man/tar_load.Rd | 36 +++++++++++++++++------------------- man/tar_load_raw.Rd | 36 +++++++++++++++++------------------- man/tar_prune.Rd | 41 ++++++++++++++++++++--------------------- man/tar_prune_list.Rd | 3 ++- man/tar_read.Rd | 36 +++++++++++++++++------------------- man/tar_read_raw.Rd | 36 +++++++++++++++++------------------- 12 files changed, 148 insertions(+), 158 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index eede48c4d..9b3f96669 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -495,6 +495,7 @@ export(tar_timestamp_raw) export(tar_toggle) export(tar_traceback) export(tar_unscript) +export(tar_unversion) export(tar_validate) export(tar_visnetwork) export(tar_warn_deprecate) diff --git a/R/tar_read.R b/R/tar_read.R index 5778cd124..683a42381 100644 --- a/R/tar_read.R +++ b/R/tar_read.R @@ -11,26 +11,23 @@ #' If you use `targets` with cloud storage #' () #' and versioning is turned on, then `targets` will record each -#' version of each target in its metadata. +#' version of each target in its metadata. Functions like [tar_read()] +#' and [tar_load()] load the version recorded in the local metadata, +#' which may not be the same as the "current" version of the +#' object in the bucket. #' -#' However, by default, -#' `targets` *uses* only the latest version in the bucket. -#' You may instead want to -#' use the specific version of the target recorded in the local metadata -#' (for example, if you previously committed the metadata file -#' `_targets/meta/meta` to version control, and now you want to roll -#' back the code and data together to an earlier point in time). -#' To do this, you will -#' need to modify the `resources` argument of [tar_target()] and/or -#' [tar_option_set()] via [tar_resources()]. In [tar_resources_aws()] -#' or [tar_resources_gcp()], set the `version` argument to `"meta"`. -#' Modifying your code this way in `_targets.R` will control functions that -#' read `_targets.R` when they run, such as [tar_make()], [tar_outdated()], -#' and [tar_visnetwork()]. To apply `version = "meta"` to functions that -#' do not read `_targets.R`, such as [tar_read()] and [tar_load()], -#' set `resources` in [tar_option_set()] in your local R session. -#' You can do this manually, or if you coded those options in `_targets.R`, -#' you can manually run `_targets.R` using [tar_load_globals()]. +#' Likewise, functions [tar_delete()] and [tar_destroy()] only remove +#' the version ID of each target as recorded in the local +#' metadata. Extra steps are required to remove the *latest* version +#' of each object, whatever that version may be: +#' +#' 1. Make sure your local copy of the metadata is current and +#' up to date. You may need to run [tar_meta_download()] or +#' [tar_meta_sync()]. +#' 2. Run [tar_unversion()] to remove the recorded version IDs of +#' your targets in the local metadata. +#' 3. With the version IDs gone, [tar_delete()] and [tar_destroy()] +#' will delete all the versions of the affected targets. #' @return The target's return value from its file in #' `_targets/objects/`, or the paths to the custom files and directories #' if `format = "file"` was set. diff --git a/_pkgdown.yml b/_pkgdown.yml index de686c103..52e3e28b1 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -101,6 +101,7 @@ reference: - '`tar_prune`' - '`tar_prune_list`' - '`tar_unscript`' + - '`tar_unversion`' - title: Progress contents: - '`tar_poll`' diff --git a/man/tar_delete.Rd b/man/tar_delete.Rd index b7370f993..63a623778 100644 --- a/man/tar_delete.Rd +++ b/man/tar_delete.Rd @@ -95,26 +95,24 @@ which means they track historical versions of each data object. If you use \code{targets} with cloud storage (\url{https://books.ropensci.org/targets/cloud-storage.html}) and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. +version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, +which may not be the same as the "current" version of the +object in the bucket. -However, by default, -\code{targets} \emph{uses} only the latest version in the bucket. -You may instead want to -use the specific version of the target recorded in the local metadata -(for example, if you previously committed the metadata file -\verb{_targets/meta/meta} to version control, and now you want to roll -back the code and data together to an earlier point in time). -To do this, you will -need to modify the \code{resources} argument of \code{\link[=tar_target]{tar_target()}} and/or -\code{\link[=tar_option_set]{tar_option_set()}} via \code{\link[=tar_resources]{tar_resources()}}. In \code{\link[=tar_resources_aws]{tar_resources_aws()}} -or \code{\link[=tar_resources_gcp]{tar_resources_gcp()}}, set the \code{version} argument to \code{"meta"}. -Modifying your code this way in \verb{_targets.R} will control functions that -read \verb{_targets.R} when they run, such as \code{\link[=tar_make]{tar_make()}}, \code{\link[=tar_outdated]{tar_outdated()}}, -and \code{\link[=tar_visnetwork]{tar_visnetwork()}}. To apply \code{version = "meta"} to functions that -do not read \verb{_targets.R}, such as \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}}, -set \code{resources} in \code{\link[=tar_option_set]{tar_option_set()}} in your local R session. -You can do this manually, or if you coded those options in \verb{_targets.R}, -you can manually run \verb{_targets.R} using \code{\link[=tar_load_globals]{tar_load_globals()}}. +Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +the version ID of each target as recorded in the local +metadata. Extra steps are required to remove the \emph{latest} version +of each object, whatever that version may be: +\enumerate{ +\item Make sure your local copy of the metadata is current and +up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or +\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of +your targets in the local metadata. +\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} +will delete all the versions of the affected targets. +} } \examples{ @@ -138,6 +136,7 @@ Other clean: \code{\link{tar_destroy}()}, \code{\link{tar_invalidate}()}, \code{\link{tar_prune_list}()}, -\code{\link{tar_prune}()} +\code{\link{tar_prune}()}, +\code{\link{tar_unversion}()} } \concept{clean} diff --git a/man/tar_destroy.Rd b/man/tar_destroy.Rd index 218e29cf8..0fc42ff78 100644 --- a/man/tar_destroy.Rd +++ b/man/tar_destroy.Rd @@ -142,26 +142,24 @@ which means they track historical versions of each data object. If you use \code{targets} with cloud storage (\url{https://books.ropensci.org/targets/cloud-storage.html}) and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. +version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, +which may not be the same as the "current" version of the +object in the bucket. -However, by default, -\code{targets} \emph{uses} only the latest version in the bucket. -You may instead want to -use the specific version of the target recorded in the local metadata -(for example, if you previously committed the metadata file -\verb{_targets/meta/meta} to version control, and now you want to roll -back the code and data together to an earlier point in time). -To do this, you will -need to modify the \code{resources} argument of \code{\link[=tar_target]{tar_target()}} and/or -\code{\link[=tar_option_set]{tar_option_set()}} via \code{\link[=tar_resources]{tar_resources()}}. In \code{\link[=tar_resources_aws]{tar_resources_aws()}} -or \code{\link[=tar_resources_gcp]{tar_resources_gcp()}}, set the \code{version} argument to \code{"meta"}. -Modifying your code this way in \verb{_targets.R} will control functions that -read \verb{_targets.R} when they run, such as \code{\link[=tar_make]{tar_make()}}, \code{\link[=tar_outdated]{tar_outdated()}}, -and \code{\link[=tar_visnetwork]{tar_visnetwork()}}. To apply \code{version = "meta"} to functions that -do not read \verb{_targets.R}, such as \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}}, -set \code{resources} in \code{\link[=tar_option_set]{tar_option_set()}} in your local R session. -You can do this manually, or if you coded those options in \verb{_targets.R}, -you can manually run \verb{_targets.R} using \code{\link[=tar_load_globals]{tar_load_globals()}}. +Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +the version ID of each target as recorded in the local +metadata. Extra steps are required to remove the \emph{latest} version +of each object, whatever that version may be: +\enumerate{ +\item Make sure your local copy of the metadata is current and +up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or +\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of +your targets in the local metadata. +\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} +will delete all the versions of the affected targets. +} } \examples{ @@ -179,6 +177,7 @@ Other clean: \code{\link{tar_delete}()}, \code{\link{tar_invalidate}()}, \code{\link{tar_prune_list}()}, -\code{\link{tar_prune}()} +\code{\link{tar_prune}()}, +\code{\link{tar_unversion}()} } \concept{clean} diff --git a/man/tar_invalidate.Rd b/man/tar_invalidate.Rd index 471bea689..a2e819f7a 100644 --- a/man/tar_invalidate.Rd +++ b/man/tar_invalidate.Rd @@ -87,6 +87,7 @@ Other clean: \code{\link{tar_delete}()}, \code{\link{tar_destroy}()}, \code{\link{tar_prune_list}()}, -\code{\link{tar_prune}()} +\code{\link{tar_prune}()}, +\code{\link{tar_unversion}()} } \concept{clean} diff --git a/man/tar_load.Rd b/man/tar_load.Rd index 85f538ccf..4989bccb3 100644 --- a/man/tar_load.Rd +++ b/man/tar_load.Rd @@ -97,26 +97,24 @@ which means they track historical versions of each data object. If you use \code{targets} with cloud storage (\url{https://books.ropensci.org/targets/cloud-storage.html}) and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. +version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, +which may not be the same as the "current" version of the +object in the bucket. -However, by default, -\code{targets} \emph{uses} only the latest version in the bucket. -You may instead want to -use the specific version of the target recorded in the local metadata -(for example, if you previously committed the metadata file -\verb{_targets/meta/meta} to version control, and now you want to roll -back the code and data together to an earlier point in time). -To do this, you will -need to modify the \code{resources} argument of \code{\link[=tar_target]{tar_target()}} and/or -\code{\link[=tar_option_set]{tar_option_set()}} via \code{\link[=tar_resources]{tar_resources()}}. In \code{\link[=tar_resources_aws]{tar_resources_aws()}} -or \code{\link[=tar_resources_gcp]{tar_resources_gcp()}}, set the \code{version} argument to \code{"meta"}. -Modifying your code this way in \verb{_targets.R} will control functions that -read \verb{_targets.R} when they run, such as \code{\link[=tar_make]{tar_make()}}, \code{\link[=tar_outdated]{tar_outdated()}}, -and \code{\link[=tar_visnetwork]{tar_visnetwork()}}. To apply \code{version = "meta"} to functions that -do not read \verb{_targets.R}, such as \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}}, -set \code{resources} in \code{\link[=tar_option_set]{tar_option_set()}} in your local R session. -You can do this manually, or if you coded those options in \verb{_targets.R}, -you can manually run \verb{_targets.R} using \code{\link[=tar_load_globals]{tar_load_globals()}}. +Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +the version ID of each target as recorded in the local +metadata. Extra steps are required to remove the \emph{latest} version +of each object, whatever that version may be: +\enumerate{ +\item Make sure your local copy of the metadata is current and +up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or +\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of +your targets in the local metadata. +\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} +will delete all the versions of the affected targets. +} } \examples{ diff --git a/man/tar_load_raw.Rd b/man/tar_load_raw.Rd index 2a7f15e98..ed930dc24 100644 --- a/man/tar_load_raw.Rd +++ b/man/tar_load_raw.Rd @@ -93,26 +93,24 @@ which means they track historical versions of each data object. If you use \code{targets} with cloud storage (\url{https://books.ropensci.org/targets/cloud-storage.html}) and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. +version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, +which may not be the same as the "current" version of the +object in the bucket. -However, by default, -\code{targets} \emph{uses} only the latest version in the bucket. -You may instead want to -use the specific version of the target recorded in the local metadata -(for example, if you previously committed the metadata file -\verb{_targets/meta/meta} to version control, and now you want to roll -back the code and data together to an earlier point in time). -To do this, you will -need to modify the \code{resources} argument of \code{\link[=tar_target]{tar_target()}} and/or -\code{\link[=tar_option_set]{tar_option_set()}} via \code{\link[=tar_resources]{tar_resources()}}. In \code{\link[=tar_resources_aws]{tar_resources_aws()}} -or \code{\link[=tar_resources_gcp]{tar_resources_gcp()}}, set the \code{version} argument to \code{"meta"}. -Modifying your code this way in \verb{_targets.R} will control functions that -read \verb{_targets.R} when they run, such as \code{\link[=tar_make]{tar_make()}}, \code{\link[=tar_outdated]{tar_outdated()}}, -and \code{\link[=tar_visnetwork]{tar_visnetwork()}}. To apply \code{version = "meta"} to functions that -do not read \verb{_targets.R}, such as \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}}, -set \code{resources} in \code{\link[=tar_option_set]{tar_option_set()}} in your local R session. -You can do this manually, or if you coded those options in \verb{_targets.R}, -you can manually run \verb{_targets.R} using \code{\link[=tar_load_globals]{tar_load_globals()}}. +Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +the version ID of each target as recorded in the local +metadata. Extra steps are required to remove the \emph{latest} version +of each object, whatever that version may be: +\enumerate{ +\item Make sure your local copy of the metadata is current and +up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or +\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of +your targets in the local metadata. +\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} +will delete all the versions of the affected targets. +} } \examples{ diff --git a/man/tar_prune.Rd b/man/tar_prune.Rd index b0baa20b8..0ae439da3 100644 --- a/man/tar_prune.Rd +++ b/man/tar_prune.Rd @@ -131,26 +131,24 @@ which means they track historical versions of each data object. If you use \code{targets} with cloud storage (\url{https://books.ropensci.org/targets/cloud-storage.html}) and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. - -However, by default, -\code{targets} \emph{uses} only the latest version in the bucket. -You may instead want to -use the specific version of the target recorded in the local metadata -(for example, if you previously committed the metadata file -\verb{_targets/meta/meta} to version control, and now you want to roll -back the code and data together to an earlier point in time). -To do this, you will -need to modify the \code{resources} argument of \code{\link[=tar_target]{tar_target()}} and/or -\code{\link[=tar_option_set]{tar_option_set()}} via \code{\link[=tar_resources]{tar_resources()}}. In \code{\link[=tar_resources_aws]{tar_resources_aws()}} -or \code{\link[=tar_resources_gcp]{tar_resources_gcp()}}, set the \code{version} argument to \code{"meta"}. -Modifying your code this way in \verb{_targets.R} will control functions that -read \verb{_targets.R} when they run, such as \code{\link[=tar_make]{tar_make()}}, \code{\link[=tar_outdated]{tar_outdated()}}, -and \code{\link[=tar_visnetwork]{tar_visnetwork()}}. To apply \code{version = "meta"} to functions that -do not read \verb{_targets.R}, such as \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}}, -set \code{resources} in \code{\link[=tar_option_set]{tar_option_set()}} in your local R session. -You can do this manually, or if you coded those options in \verb{_targets.R}, -you can manually run \verb{_targets.R} using \code{\link[=tar_load_globals]{tar_load_globals()}}. +version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, +which may not be the same as the "current" version of the +object in the bucket. + +Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +the version ID of each target as recorded in the local +metadata. Extra steps are required to remove the \emph{latest} version +of each object, whatever that version may be: +\enumerate{ +\item Make sure your local copy of the metadata is current and +up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or +\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of +your targets in the local metadata. +\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} +will delete all the versions of the affected targets. +} } \examples{ @@ -178,6 +176,7 @@ Other clean: \code{\link{tar_delete}()}, \code{\link{tar_destroy}()}, \code{\link{tar_invalidate}()}, -\code{\link{tar_prune_list}()} +\code{\link{tar_prune_list}()}, +\code{\link{tar_unversion}()} } \concept{clean} diff --git a/man/tar_prune_list.Rd b/man/tar_prune_list.Rd index 2b38fcbb4..154dd0ab2 100644 --- a/man/tar_prune_list.Rd +++ b/man/tar_prune_list.Rd @@ -98,6 +98,7 @@ Other clean: \code{\link{tar_delete}()}, \code{\link{tar_destroy}()}, \code{\link{tar_invalidate}()}, -\code{\link{tar_prune}()} +\code{\link{tar_prune}()}, +\code{\link{tar_unversion}()} } \concept{clean} diff --git a/man/tar_read.Rd b/man/tar_read.Rd index 039aa083a..3bb93d02a 100644 --- a/man/tar_read.Rd +++ b/man/tar_read.Rd @@ -49,26 +49,24 @@ which means they track historical versions of each data object. If you use \code{targets} with cloud storage (\url{https://books.ropensci.org/targets/cloud-storage.html}) and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. +version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, +which may not be the same as the "current" version of the +object in the bucket. -However, by default, -\code{targets} \emph{uses} only the latest version in the bucket. -You may instead want to -use the specific version of the target recorded in the local metadata -(for example, if you previously committed the metadata file -\verb{_targets/meta/meta} to version control, and now you want to roll -back the code and data together to an earlier point in time). -To do this, you will -need to modify the \code{resources} argument of \code{\link[=tar_target]{tar_target()}} and/or -\code{\link[=tar_option_set]{tar_option_set()}} via \code{\link[=tar_resources]{tar_resources()}}. In \code{\link[=tar_resources_aws]{tar_resources_aws()}} -or \code{\link[=tar_resources_gcp]{tar_resources_gcp()}}, set the \code{version} argument to \code{"meta"}. -Modifying your code this way in \verb{_targets.R} will control functions that -read \verb{_targets.R} when they run, such as \code{\link[=tar_make]{tar_make()}}, \code{\link[=tar_outdated]{tar_outdated()}}, -and \code{\link[=tar_visnetwork]{tar_visnetwork()}}. To apply \code{version = "meta"} to functions that -do not read \verb{_targets.R}, such as \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}}, -set \code{resources} in \code{\link[=tar_option_set]{tar_option_set()}} in your local R session. -You can do this manually, or if you coded those options in \verb{_targets.R}, -you can manually run \verb{_targets.R} using \code{\link[=tar_load_globals]{tar_load_globals()}}. +Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +the version ID of each target as recorded in the local +metadata. Extra steps are required to remove the \emph{latest} version +of each object, whatever that version may be: +\enumerate{ +\item Make sure your local copy of the metadata is current and +up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or +\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of +your targets in the local metadata. +\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} +will delete all the versions of the affected targets. +} } \section{Storage access}{ diff --git a/man/tar_read_raw.Rd b/man/tar_read_raw.Rd index 381db3749..ed58f0f93 100644 --- a/man/tar_read_raw.Rd +++ b/man/tar_read_raw.Rd @@ -75,26 +75,24 @@ which means they track historical versions of each data object. If you use \code{targets} with cloud storage (\url{https://books.ropensci.org/targets/cloud-storage.html}) and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. +version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, +which may not be the same as the "current" version of the +object in the bucket. -However, by default, -\code{targets} \emph{uses} only the latest version in the bucket. -You may instead want to -use the specific version of the target recorded in the local metadata -(for example, if you previously committed the metadata file -\verb{_targets/meta/meta} to version control, and now you want to roll -back the code and data together to an earlier point in time). -To do this, you will -need to modify the \code{resources} argument of \code{\link[=tar_target]{tar_target()}} and/or -\code{\link[=tar_option_set]{tar_option_set()}} via \code{\link[=tar_resources]{tar_resources()}}. In \code{\link[=tar_resources_aws]{tar_resources_aws()}} -or \code{\link[=tar_resources_gcp]{tar_resources_gcp()}}, set the \code{version} argument to \code{"meta"}. -Modifying your code this way in \verb{_targets.R} will control functions that -read \verb{_targets.R} when they run, such as \code{\link[=tar_make]{tar_make()}}, \code{\link[=tar_outdated]{tar_outdated()}}, -and \code{\link[=tar_visnetwork]{tar_visnetwork()}}. To apply \code{version = "meta"} to functions that -do not read \verb{_targets.R}, such as \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}}, -set \code{resources} in \code{\link[=tar_option_set]{tar_option_set()}} in your local R session. -You can do this manually, or if you coded those options in \verb{_targets.R}, -you can manually run \verb{_targets.R} using \code{\link[=tar_load_globals]{tar_load_globals()}}. +Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +the version ID of each target as recorded in the local +metadata. Extra steps are required to remove the \emph{latest} version +of each object, whatever that version may be: +\enumerate{ +\item Make sure your local copy of the metadata is current and +up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or +\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of +your targets in the local metadata. +\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} +will delete all the versions of the affected targets. +} } \examples{ From 8882518be72f6947494b49cfcf88150a1f290ff4 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 09:20:43 -0500 Subject: [PATCH 33/48] news and docs --- NEWS.md | 15 ++++-- R/class_runtime.R | 7 ++- R/tar_unversion.R | 36 +++++++++++++++ man/tar_unversion.Rd | 66 +++++++++++++++++++++++++++ tests/testthat/test-tar_unversion.R | 71 +++++++++++++++++++++++++++++ 5 files changed, 190 insertions(+), 5 deletions(-) create mode 100644 R/tar_unversion.R create mode 100644 man/tar_unversion.Rd create mode 100644 tests/testthat/test-tar_unversion.R diff --git a/NEWS.md b/NEWS.md index 8fad9703a..0ed69df58 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,20 +6,27 @@ Because of the changes below, upgrading to this version of `targets` will unavoi * Use SHA512 during the creation of target-specific pseudo-random number generator seeds (#1139). This change decreases the risk of overlapping/correlated random number generator streams. See the "RNG overlap" section of the `tar_seed_create()` help file for details and justification. Unfortunately, this change will invalidate all currently built targets because the seeds will be different. To avoid rerunning your whole pipeline, set `cue = tar_cue(seed = FALSE)` in `tar_target()`. * For cloud storage: instead of the hash of the local file, use the ETag for AWS S3 targets and the MD5 hash for GCP GCS targets (#1172). Sanitize with `targets:::digest_chr64()` in both cases before storing the result in the metadata. +* For a cloud target to be truly up to date, the hash in the metadata now needs to match the *current* object in the bucket, not the version recorded in the metadata (#1172). In other words, `targets` now tries to ensure that the up-to-date data objects in the cloud are in their newest versions. So if you roll back the metadata to an older version, you will still be able to access historical data versions with e.g. `tar_read()`, but the pipeline will no longer be up to date. -## Other changes +## Other changes to seeds * Add a new exported function `tar_seed_create()` which creates target-specific pseudo-random number generator seeds. * Add an "RNG overlap" section in the `tar_seed_create()` help file to justify and defend how `targets` and `tarchetypes` approach pseudo-random numbers. * Add function `tar_seed_set()` which sets a seed and sets all the RNG algorithms to their defaults in the R installation of the user. Each target now uses `tar_seed_set()` function to set its seed before running its R command (#1139). * Deprecate `tar_seed()` in favor of the new `tar_seed_get()` function. -* Migrate to the changes in `clustermq` 0.9.0 (@mschubert). + +## Other cloud storage improvements + +* For all cloud targets, check hashes in batched LIST requests instead of individual HEAD requests (#1172). Dramatically speeds up the process of checking if cloud targets are up to date. * For AWS S3 targets, `tar_delete()`, `tar_destroy()`, and `tar_prune()` now use efficient batched calls to `delete_objects()` instead of costly individual calls to `delete_object()` (#1171). * Add a new `verbose` argument to `tar_delete()`, `tar_destroy()`, and `tar_prune()`. * Add a new `batch_size` argument to `tar_delete()`, `tar_destroy()`, and `tar_prune()`. -* Add new arguments `page_size`, `version`, and `verbose` to `tar_resources_aws()` (#1172). -* Add new argument `version` to `tar_resources_gcp()` (#1172). +* Add new arguments `page_size` and `verbose` to `tar_resources_aws()` (#1172). +* Add a new `tar_unversion()` function to remove version IDs from the metadata of cloud targets. This makes it easier to interact with just the current version of each target, as opposed to the version ID recorded in the local metadata. +## Other changes + +* Migrate to the changes in `clustermq` 0.9.0 (@mschubert). # targets 1.3.2 diff --git a/R/class_runtime.R b/R/class_runtime.R index 0a5f6f614..327290bda 100644 --- a/R/class_runtime.R +++ b/R/class_runtime.R @@ -10,7 +10,8 @@ runtime_new <- function( file_exist = NULL, file_info = NULL, file_info_exist = NULL, - nanonext = NULL + nanonext = NULL, + inventories = NULL ) { force(target) force(frames) @@ -24,6 +25,7 @@ runtime_new <- function( force(file_info) force(file_info_exist) force(nanonext) + force(inventories) environment() } @@ -78,6 +80,9 @@ runtime_validate <- function(x) { tar_assert_scalar(x$nanonext) tar_assert_lgl(x$nanonext) } + if (!is.null(x$inventories)) { + tar_assert_list(x$inventories) + } } #' @title Get the `tar_runtime` object. diff --git a/R/tar_unversion.R b/R/tar_unversion.R new file mode 100644 index 000000000..807d000c2 --- /dev/null +++ b/R/tar_unversion.R @@ -0,0 +1,36 @@ +#' @title Delete cloud object version IDs from local metadata. +#' @export +#' @family clean +#' @description Delete version IDs from local metadata. +#' @inheritSection tar_read Cloud target data versioning +#' @return `NULL` (invisibly). +#' @param names Tidyselect expression to identify the targets to drop +#' version IDs. +#' @inheritParams tar_validate +tar_unversion <- function( + names = tidyselect::everything(), + store = targets::tar_config_get("store") +) { + tar_assert_allow_meta("tar_unversion", store) + tar_assert_store(store = store) + tar_assert_path(path_meta(store)) + meta <- meta_init(path_store = store) + data <- as.data.frame(meta$database$read_condensed_data()) + names_quosure <- rlang::enquo(names) + names <- tar_tidyselect_eval(names_quosure, data$name) + tar_assert_chr(names, "names arg of tar_unversion() must eval to chr") + replacement <- "version=" + pattern <- paste0("^", replacement, ".*") + unversion <- data$name %in% names & + !is.na(data$repository) & + data$repository != "local" + for (index in which(unversion)) { + data$path[[index]] <- gsub( + pattern = pattern, + replacement = replacement, + x = data$path[[index]] + ) + } + meta$database$overwrite_storage(data) + invisible() +} \ No newline at end of file diff --git a/man/tar_unversion.Rd b/man/tar_unversion.Rd new file mode 100644 index 000000000..d032ba8d2 --- /dev/null +++ b/man/tar_unversion.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tar_unversion.R +\name{tar_unversion} +\alias{tar_unversion} +\title{Delete cloud object version IDs from local metadata.} +\usage{ +tar_unversion( + names = tidyselect::everything(), + store = targets::tar_config_get("store") +) +} +\arguments{ +\item{names}{Tidyselect expression to identify the targets to drop +version IDs.} + +\item{store}{Character of length 1, path to the +\code{targets} data store. Defaults to \code{tar_config_get("store")}, +which in turn defaults to \verb{_targets/}. +When you set this argument, the value of \code{tar_config_get("store")} +is temporarily changed for the current function call. +See \code{\link[=tar_config_get]{tar_config_get()}} and \code{\link[=tar_config_set]{tar_config_set()}} for details +about how to set the data store path persistently +for a project.} +} +\value{ +\code{NULL} (invisibly). +} +\description{ +Delete version IDs from local metadata. +} +\section{Cloud target data versioning}{ + +Some buckets in Amazon S3 or Google Cloud Storage are "versioned", +which means they track historical versions of each data object. +If you use \code{targets} with cloud storage +(\url{https://books.ropensci.org/targets/cloud-storage.html}) +and versioning is turned on, then \code{targets} will record each +version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, +which may not be the same as the "current" version of the +object in the bucket. + +Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +the version ID of each target as recorded in the local +metadata. Extra steps are required to remove the \emph{latest} version +of each object, whatever that version may be: +\enumerate{ +\item Make sure your local copy of the metadata is current and +up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or +\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of +your targets in the local metadata. +\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} +will delete all the versions of the affected targets. +} +} + +\seealso{ +Other clean: +\code{\link{tar_delete}()}, +\code{\link{tar_destroy}()}, +\code{\link{tar_invalidate}()}, +\code{\link{tar_prune_list}()}, +\code{\link{tar_prune}()} +} +\concept{clean} diff --git a/tests/testthat/test-tar_unversion.R b/tests/testthat/test-tar_unversion.R new file mode 100644 index 000000000..1dbf73a3b --- /dev/null +++ b/tests/testthat/test-tar_unversion.R @@ -0,0 +1,71 @@ +tar_test("tar_unversion()", { + skip_cran() + lines <- c( + paste0( + "name|type|data|command|depend|seed|path|time|size|bytes|format|", + "repository|iteration|parent|children|seconds|warnings|error" + ), + "f|function|42ed6c6cf429ec42", + "resources2|object|c7fa586ec71716f7", + "resources|object|40cf4ff97d03671e", + paste0( + "z2|stem|164f6b22e6f7cb6a|0e2890e1b1d6be9c|ef46db3751d8e999|-2091466813", + "|bucket=targets-test-versioned*region=NULL*key=_targets/objects/z2*", + "endpoint=TlVMTA*version=4MeJDr09__xWul7SY4p40bB30UpV_sfT|t19648.", + "5414026703s||50|rds|aws|vector|||0.001||" + ), + paste0( + "x|stem|164f6b22e6f7cb6a|0e2890e1b1d6be9c|ef46db3751d8e999", + "|-1032428690||t19648.5414097547s|ded833868582137a|50", + "|rds|local|vector|||0||" + ), + paste0( + "y|stem|164f6b22e6f7cb6a|0e2890e1b1d6be9c|ef46db3751d8e999|-1963496355|", + "bucket=targets-test-unversioned*region=NULL*key=_targets/objects/y", + "*endpoint=TlVMTA*version=|t19648.5414098716s||50|rds|aws|vector|||0||" + ), + paste0( + "z|stem|164f6b22e6f7cb6a|0e2890e1b1d6be9c|ef46db3751d8e999|-499386612|", + "bucket=targets-test-unversioned*region=NULL*key=_targets/objects/z*", + "endpoint=TlVMTA*version=|t19648.5414154381s||50|rds|aws|vector|||0||" + ), + paste0( + "y2|stem|164f6b22e6f7cb6a|0e2890e1b1d6be9c|ef46db3751d8e999|-62670671|", + "bucket=targets-test-versioned*region=NULL*key=_targets/objects/y2*", + "endpoint=TlVMTA*version=P5eyZdO.JwR__kS5fdxWcBgJLH4BmFIP|", + "t19648.5414172763s||50|rds|aws|vector|||0.001||" + ) + ) + dir_create(path_meta_dir(path_store_default())) + writeLines(lines, path_meta(path_store_default())) + before <- tar_meta() + tar_unversion(names = tidyselect::any_of(c("x", "z2"))) + after <- tar_meta() + expect_equal(before$name, after$name) + names <- c( + c( + "f", + "resources", + "resources2", + "x", + "y", + "y2", + "z" + ) + ) + for (name in names) { + index <- which(before$name == name) + expect_equal(before$path[[index]], after$path[[index]]) + } + before <- before$path[[which(before$name == "z2")]] + after <- after$path[[which(after$name == "z2")]] + for (index in which(!grepl("^version=", before))) { + expect_equal(before[index], after[index]) + } + index <- which(grepl("^version=", before)) + expect_false(before[index] == after[index]) + expect_equal(after[index], "version=") + expect_gt(nchar(before[index]), nchar(after[index])) + expect_equal(nchar(store_aws_version(before)), 32L) + expect_null(store_aws_version(after)) +}) From e28a40fd0d8140f08123105c22d3cbdb9f12b435 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 09:22:28 -0500 Subject: [PATCH 34/48] test inventories list in tar_runtime --- tests/testthat/test-class_runtime.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/testthat/test-class_runtime.R b/tests/testthat/test-class_runtime.R index a8659f274..04028577d 100644 --- a/tests/testthat/test-class_runtime.R +++ b/tests/testthat/test-class_runtime.R @@ -161,3 +161,14 @@ tar_test("detect bad fun", { x$fun <- "" expect_error(runtime_validate(x), class = "tar_condition_validate") }) + +tar_test("runtime inventories", { + x <- runtime_new() + expect_silent(runtime_validate(x)) + x$inventories <- list() + expect_silent(runtime_validate(x)) + x$inventories$aws <- inventory_init() + expect_silent(runtime_validate(x)) + x$inventories <- "" + expect_error(runtime_validate(x), class = "tar_condition_validate") +}) From 12e6f95a5e0c08a584dca2af865bbe13518eb3a1 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 09:54:53 -0500 Subject: [PATCH 35/48] Clean up callr utils and populate inventory list --- NAMESPACE | 6 ++-- R/class_pipeline.R | 23 ++++-------- R/class_runtime.R | 21 +++++++++++ R/tar_make_interactive.R | 2 +- R/utils_callr.R | 54 +++++++++++----------------- man/tar_as_pipeline.Rd | 18 ---------- man/tar_callr_inner_try.Rd | 3 +- man/tar_pipeline_validate_lite.Rd | 15 -------- tests/testthat/test-class_pipeline.R | 6 ++-- 9 files changed, 56 insertions(+), 92 deletions(-) delete mode 100644 man/tar_as_pipeline.Rd delete mode 100644 man/tar_pipeline_validate_lite.Rd diff --git a/NAMESPACE b/NAMESPACE index 9b3f96669..32bedb342 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,8 @@ S3method(hash_object,character) S3method(hash_object,default) S3method(imports_init,default) S3method(imports_init,tar_imports) +S3method(pipeline_from_list,default) +S3method(pipeline_from_list,tar_pipeline) S3method(print,tar_cue) S3method(print,tar_pattern) S3method(print,tar_pipeline) @@ -206,8 +208,6 @@ S3method(store_write_path,tar_store_custom) S3method(store_write_path,tar_store_file) S3method(store_write_path,tar_torch) S3method(store_write_path,tar_url) -S3method(tar_as_pipeline,default) -S3method(tar_as_pipeline,tar_pipeline) S3method(tar_make_interactive_load_target,tar_bud) S3method(tar_make_interactive_load_target,tar_target) S3method(target_bootstrap,tar_builder) @@ -314,7 +314,6 @@ export(rstudio_addin_tar_target) export(rstudio_addin_tar_visnetwork) export(starts_with) export(tar_active) -export(tar_as_pipeline) export(tar_assert_chr) export(tar_assert_dbl) export(tar_assert_df) @@ -444,7 +443,6 @@ export(tar_path_target) export(tar_pattern) export(tar_pid) export(tar_pipeline) -export(tar_pipeline_validate_lite) export(tar_poll) export(tar_print) export(tar_process) diff --git a/R/class_pipeline.R b/R/class_pipeline.R index f791247e7..5f0e15aa6 100644 --- a/R/class_pipeline.R +++ b/R/class_pipeline.R @@ -282,43 +282,32 @@ pipeline_validate_conflicts <- function(pipeline) { } pipeline_validate <- function(pipeline) { - tar_pipeline_validate_lite(pipeline) + pipeline_validate_lite(pipeline) pipeline_validate_targets(pipeline$targets) pipeline_validate_dag(pipeline_produce_igraph(pipeline)) counter_validate(pipeline$loaded) counter_validate(pipeline$transient) } -#' @title Abridged pipeline validation function. -#' @export -#' @keywords internal -#' @description Internal function. Do not invoke directly. -#' @param pipeline A pipeline object. -tar_pipeline_validate_lite <- function(pipeline) { +pipeline_validate_lite <- function(pipeline) { tar_assert_inherits(pipeline, "tar_pipeline", msg = "invalid pipeline.") tar_assert_correct_fields(pipeline, pipeline_new) pipeline_validate_conflicts(pipeline) } -#' @title Convert to a pipeline object. -#' @export -#' @keywords internal -#' @description Not a user-side function. Do not invoke directly. -#' @return An object of class `"tar_pipeline"`. -#' @param x A list of target objects or a pipeline object. -tar_as_pipeline <- function(x) { - UseMethod("tar_as_pipeline") +pipeline_from_list <- function(x) { + UseMethod("pipeline_from_list") } #' @export #' @keywords internal -tar_as_pipeline.tar_pipeline <- function(x) { +pipeline_from_list.tar_pipeline <- function(x) { x } #' @export #' @keywords internal -tar_as_pipeline.default <- function(x) { +pipeline_from_list.default <- function(x) { out <- unlist(list(x), recursive = TRUE) out <- fltr(out, ~inherits(x = .x, what = "tar_target")) pipeline_init(out) diff --git a/R/class_runtime.R b/R/class_runtime.R index 327290bda..f21d9f1a7 100644 --- a/R/class_runtime.R +++ b/R/class_runtime.R @@ -85,6 +85,27 @@ runtime_validate <- function(x) { } } +runtime_set_file_info <- function(runtime, store) { + objects <- list.files( + path = targets::tar_path_objects_dir(store), + all.files = TRUE, + full.names = TRUE, + no.. = TRUE + ) + file_info <- as.list(file_info(objects)[, c("size", "mtime_numeric")]) + names(file_info$size) <- objects + names(file_info$mtime_numeric) <- objects + runtime$file_info <- file_info + runtime$file_exist <- targets::tar_counter(names = objects) + runtime$file_info_exist <- targets::tar_counter(names = objects) +} + +runtime_reset <- function(x) { + for (field in names(x)) { + x[[field]] <- NULL + } +} + #' @title Get the `tar_runtime` object. #' @export #' @keywords internal diff --git a/R/tar_make_interactive.R b/R/tar_make_interactive.R index 254611f02..81fea2f94 100644 --- a/R/tar_make_interactive.R +++ b/R/tar_make_interactive.R @@ -13,7 +13,7 @@ #' } tar_make_interactive <- function(code) { targets <- eval(parse(text = code), envir = tar_option_get("envir")) - pipeline <- tar_as_pipeline(targets) + pipeline <- pipeline_from_list(targets) pipeline_reset_deployments(pipeline) queue <- if_any( pipeline_uses_priorities(pipeline), diff --git a/R/utils_callr.R b/R/utils_callr.R index a1f2c3d19..c14e91acc 100644 --- a/R/utils_callr.R +++ b/R/utils_callr.R @@ -109,7 +109,8 @@ callr_inner <- function( ) } -#' @title Invoke a `targets` task from inside a `callr` function. +#' @title Invoke a `targets` task from inside a `callr` function +#' (without error handling). #' @export #' @keywords internal #' @description Not a user-side function. Do not invoke directly. @@ -139,42 +140,29 @@ tar_callr_inner_try <- function( store, fun ) { - if (is.null(envir)) { - envir <- parent - } - old_envir <- targets::tar_option_get("envir") - targets::tar_option_set(envir = envir) - tar_runtime <- targets::tar_runtime_object() + old_options <- options(options) + old_envir <- tar_options$get_envir() + on.exit({ + options(old_options) + runtime_reset(tar_runtime) + tar_options$set_envir(old_envir) + }) + callr_set_runtime(script = script, store = store, fun = fun) + envir <- if_any(is.null(envir), parent, envir) + tar_options$set_envir(envir = envir) + targets <- eval(parse(file = script, keep.source = TRUE), envir = envir) + targets_arguments$pipeline <- pipeline_from_list(targets) + pipeline_validate_lite(targets_arguments$pipeline) + do.call(targets_function, targets_arguments) +} + +callr_set_runtime <- function(script, store, fun) { tar_runtime$script <- script tar_runtime$store <- store tar_runtime$working_directory <- getwd() tar_runtime$fun <- fun - objects <- list.files( - path = targets::tar_path_objects_dir(store), - all.files = TRUE, - full.names = TRUE, - no.. = TRUE - ) - tar_runtime$file_exist <- targets::tar_counter(names = objects) - tar_runtime$file_info_exist <- targets::tar_counter(names = objects) - file_info <- as.list(file_info(objects)[, c("size", "mtime_numeric")]) - names(file_info$size) <- objects - names(file_info$mtime_numeric) <- objects - tar_runtime$file_info <- file_info - on.exit(targets::tar_option_set(envir = old_envir)) - on.exit(tar_runtime$script <- NULL, add = TRUE) - on.exit(tar_runtime$store <- NULL, add = TRUE) - on.exit(tar_runtime$working_directory <- NULL, add = TRUE) - on.exit(tar_runtime$fun <- NULL, add = TRUE) - on.exit(tar_runtime$file_exist <- NULL, add = TRUE) - on.exit(tar_runtime$file_info <- NULL, add = TRUE) - on.exit(tar_runtime$file_info_exist <- NULL, add = TRUE) - old <- options(options) - on.exit(options(old), add = TRUE) - targets <- eval(parse(file = script, keep.source = TRUE), envir = envir) - targets_arguments$pipeline <- targets::tar_as_pipeline(targets) - targets::tar_pipeline_validate_lite(targets_arguments$pipeline) - do.call(targets_function, targets_arguments) + tar_runtime$inventories <- list() + runtime_set_file_info(tar_runtime, store) } callr_prepare_arguments <- function(callr_function, callr_arguments) { diff --git a/man/tar_as_pipeline.Rd b/man/tar_as_pipeline.Rd deleted file mode 100644 index 38839566c..000000000 --- a/man/tar_as_pipeline.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/class_pipeline.R -\name{tar_as_pipeline} -\alias{tar_as_pipeline} -\title{Convert to a pipeline object.} -\usage{ -tar_as_pipeline(x) -} -\arguments{ -\item{x}{A list of target objects or a pipeline object.} -} -\value{ -An object of class \code{"tar_pipeline"}. -} -\description{ -Not a user-side function. Do not invoke directly. -} -\keyword{internal} diff --git a/man/tar_callr_inner_try.Rd b/man/tar_callr_inner_try.Rd index bc4475682..6c84aa66c 100644 --- a/man/tar_callr_inner_try.Rd +++ b/man/tar_callr_inner_try.Rd @@ -2,7 +2,8 @@ % Please edit documentation in R/utils_callr.R \name{tar_callr_inner_try} \alias{tar_callr_inner_try} -\title{Invoke a \code{targets} task from inside a \code{callr} function.} +\title{Invoke a \code{targets} task from inside a \code{callr} function +(without error handling).} \usage{ tar_callr_inner_try( targets_function, diff --git a/man/tar_pipeline_validate_lite.Rd b/man/tar_pipeline_validate_lite.Rd deleted file mode 100644 index 1fba5e7e2..000000000 --- a/man/tar_pipeline_validate_lite.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/class_pipeline.R -\name{tar_pipeline_validate_lite} -\alias{tar_pipeline_validate_lite} -\title{Abridged pipeline validation function.} -\usage{ -tar_pipeline_validate_lite(pipeline) -} -\arguments{ -\item{pipeline}{A pipeline object.} -} -\description{ -Internal function. Do not invoke directly. -} -\keyword{internal} diff --git a/tests/testthat/test-class_pipeline.R b/tests/testthat/test-class_pipeline.R index 50edfbea4..fde8c0edf 100644 --- a/tests/testthat/test-class_pipeline.R +++ b/tests/testthat/test-class_pipeline.R @@ -240,19 +240,19 @@ tar_test("print method", { tar_test("validate a non-pipeline", { expect_error(pipeline_validate(stem_new()), class = "tar_condition_validate") expect_error( - tar_pipeline_validate_lite(stem_new()), + pipeline_validate_lite(stem_new()), class = "tar_condition_validate" ) }) tar_test("validate a nonempty pipeline", { expect_silent(pipeline_validate(pipeline_order())) - expect_silent(tar_pipeline_validate_lite(pipeline_order())) + expect_silent(pipeline_validate_lite(pipeline_order())) }) tar_test("validate an empty pipeline", { expect_silent(pipeline_validate(pipeline_init())) - expect_silent(tar_pipeline_validate_lite(pipeline_init())) + expect_silent(pipeline_validate_lite(pipeline_init())) }) tar_test("pipeline_validate(pipeline) with a bad target", { From 6a655f1cce4c160343caa4390657f2cfe9f3bdd5 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 10:09:20 -0500 Subject: [PATCH 36/48] Un-export hidden functions --- NAMESPACE | 2 -- R/class_resources_aws.R | 2 +- R/class_resources_gcp.R | 2 +- R/class_runtime.R | 6 ++--- R/tar_resources_aws.R | 4 ++-- R/utils_path.R | 19 ++------------- man/tar_path_objects_dir.Rd | 18 -------------- man/tar_path_objects_dir_cloud.Rd | 19 --------------- tests/testthat/test-class_runtime.R | 37 +++++++++++++++++++++++++++++ 9 files changed, 46 insertions(+), 63 deletions(-) delete mode 100644 man/tar_path_objects_dir.Rd delete mode 100644 man/tar_path_objects_dir_cloud.Rd diff --git a/NAMESPACE b/NAMESPACE index 32bedb342..906b813dd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -434,8 +434,6 @@ export(tar_option_reset) export(tar_option_set) export(tar_outdated) export(tar_path) -export(tar_path_objects_dir) -export(tar_path_objects_dir_cloud) export(tar_path_script) export(tar_path_script_support) export(tar_path_store) diff --git a/R/class_resources_aws.R b/R/class_resources_aws.R index 88a407c6b..874e0e5e5 100644 --- a/R/class_resources_aws.R +++ b/R/class_resources_aws.R @@ -1,6 +1,6 @@ resources_aws_init <- function( bucket = NULL, - prefix = tar_path_objects_dir_cloud(), + prefix = path_objects_dir_cloud(), region = NULL, endpoint = NULL, s3_force_path_style = NULL, diff --git a/R/class_resources_gcp.R b/R/class_resources_gcp.R index bafe7f303..42d1f0a6a 100644 --- a/R/class_resources_gcp.R +++ b/R/class_resources_gcp.R @@ -1,6 +1,6 @@ resources_gcp_init <- function( bucket = NULL, - prefix = tar_path_objects_dir_cloud(), + prefix = path_objects_dir_cloud(), predefined_acl = "private", max_tries = 5L, verbose = FALSE diff --git a/R/class_runtime.R b/R/class_runtime.R index f21d9f1a7..efcaff0ea 100644 --- a/R/class_runtime.R +++ b/R/class_runtime.R @@ -87,7 +87,7 @@ runtime_validate <- function(x) { runtime_set_file_info <- function(runtime, store) { objects <- list.files( - path = targets::tar_path_objects_dir(store), + path = path_objects_dir(store), all.files = TRUE, full.names = TRUE, no.. = TRUE @@ -96,8 +96,8 @@ runtime_set_file_info <- function(runtime, store) { names(file_info$size) <- objects names(file_info$mtime_numeric) <- objects runtime$file_info <- file_info - runtime$file_exist <- targets::tar_counter(names = objects) - runtime$file_info_exist <- targets::tar_counter(names = objects) + runtime$file_exist <- tar_counter(names = objects) + runtime$file_info_exist <- tar_counter(names = objects) } runtime_reset <- function(x) { diff --git a/R/tar_resources_aws.R b/R/tar_resources_aws.R index 05f4e3a01..c97fa0bda 100644 --- a/R/tar_resources_aws.R +++ b/R/tar_resources_aws.R @@ -110,12 +110,12 @@ tar_resources_aws <- function( tar_warn_prefix() prefix <- path_store_default() } - prefix <- prefix %|||% targets::tar_path_objects_dir_cloud() + prefix <- prefix %|||% path_objects_dir_cloud() part_size <- part_size %|||% (5 * (2 ^ 20)) page_size <- page_size %|||% 1000L verbose <- verbose %|||% TRUE args <- list(...) - default_args <- targets::tar_option_get("resources")$aws$args + default_args <- tar_options$get_resources()$aws$args for (name in names(default_args)) { args[[name]] <- args[[name]] %|||% default_args[[name]] } diff --git a/R/utils_path.R b/R/utils_path.R index c3b4ca86b..b87a215a1 100644 --- a/R/utils_path.R +++ b/R/utils_path.R @@ -34,14 +34,7 @@ path_objects <- function(path_store, name) { file.path(path_objects_dir(path_store), name) } -#' @title Path to directory of saved targets -#' @export -#' @keywords internal -#' @description Internal function. Not for users. -#' @param path_store Path to the data store. -#' @examples -#' tar_path_objects_dir("_targets") -tar_path_objects_dir <- function(path_store) { +path_objects_dir <- function(path_store) { path_objects_dir(path_store = path_store) } @@ -49,15 +42,7 @@ path_objects_dir <- function(path_store) { file.path(path_store, "objects") } -#' @title Default pseudo-directory path of target data in the cloud -#' @export -#' @keywords internal -#' @description Not a user-side function. Do not invoke directly. -#' @return Character of length, -#' default pseudo-directory path of target data in the cloud. -#' @examples -#' tar_path_objects_dir_cloud() -tar_path_objects_dir_cloud <- function() { +path_objects_dir_cloud <- function() { file.path(path_store_default(), "objects", fsep = "/") } diff --git a/man/tar_path_objects_dir.Rd b/man/tar_path_objects_dir.Rd deleted file mode 100644 index f50948ed4..000000000 --- a/man/tar_path_objects_dir.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_path.R -\name{tar_path_objects_dir} -\alias{tar_path_objects_dir} -\title{Path to directory of saved targets} -\usage{ -tar_path_objects_dir(path_store) -} -\arguments{ -\item{path_store}{Path to the data store.} -} -\description{ -Internal function. Not for users. -} -\examples{ -tar_path_objects_dir("_targets") -} -\keyword{internal} diff --git a/man/tar_path_objects_dir_cloud.Rd b/man/tar_path_objects_dir_cloud.Rd deleted file mode 100644 index 01f1ece6c..000000000 --- a/man/tar_path_objects_dir_cloud.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_path.R -\name{tar_path_objects_dir_cloud} -\alias{tar_path_objects_dir_cloud} -\title{Default pseudo-directory path of target data in the cloud} -\usage{ -tar_path_objects_dir_cloud() -} -\value{ -Character of length, -default pseudo-directory path of target data in the cloud. -} -\description{ -Not a user-side function. Do not invoke directly. -} -\examples{ -tar_path_objects_dir_cloud() -} -\keyword{internal} diff --git a/tests/testthat/test-class_runtime.R b/tests/testthat/test-class_runtime.R index 04028577d..58d5eef14 100644 --- a/tests/testthat/test-class_runtime.R +++ b/tests/testthat/test-class_runtime.R @@ -144,6 +144,43 @@ tar_test("validate non-null store", { expect_silent(runtime_validate(x)) }) +tar_test("runtime reset", { + x <- runtime_new() + x$store <- "store" + x$script <- "script" + expect_equal(x$store, "store") + expect_equal(x$script, "script") + runtime_reset(x) + expect_null(x$store) + expect_null(x$script) +}) + +tar_test("runtime_set_file_info()", { + x <- runtime_new() + store <- path_store_default() + dir_create(path_objects_dir(store)) + writeLines("x", path_objects(store, "x")) + writeLines("y", path_objects(store, "y")) + runtime_set_file_info(x, store) + for (field in c("size", "mtime_numeric")) { + expect_true(is.numeric(x$file_info[[field]])) + expect_equal( + sort(names(x$file_info[[field]])), + sort(c(path_objects(store, "x"), path_objects(store, "y"))) + ) + } + for (field in c("file_exist", "file_info_exist")) { + expect_true(is.environment(x[[field]])) + expect_silent(counter_validate(x[[field]])) + expect_equal(x[[field]]$count, 2L) + expect_equal(as.logical(as.list(x[[field]]$envir)), c(TRUE, TRUE)) + expect_equal( + sort(names(as.list(x[[field]]$envir))), + sort(c(path_objects(store, "x"), path_objects(store, "y"))) + ) + } +}) + tar_test("detect bad store", { x <- runtime_new() x$store <- FALSE From 8b977a3fd9bfd6edbf1261304480f9ebdbd626ba Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 10:16:55 -0500 Subject: [PATCH 37/48] paws.common --- DESCRIPTION | 1 + R/tar_unversion.R | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 42b0ab7f4..5cce4f4ab 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -92,6 +92,7 @@ Suggests: nanonext (>= 0.9.0), rmarkdown (>= 2.4), parallelly (>= 1.35.0), + paws.common (>= 0.5.4), paws.storage (>= 0.2.0), pingr (>= 2.0.1), pkgload (>= 1.1.0), diff --git a/R/tar_unversion.R b/R/tar_unversion.R index 807d000c2..4c55c99fa 100644 --- a/R/tar_unversion.R +++ b/R/tar_unversion.R @@ -33,4 +33,4 @@ tar_unversion <- function( } meta$database$overwrite_storage(data) invisible() -} \ No newline at end of file +} From 5c84cbb6b9b7beadb7c926cc5295e8ee0ae23d78 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 10:20:40 -0500 Subject: [PATCH 38/48] docs --- R/tar_read.R | 25 ++++++++++++++++--------- man/tar_delete.Rd | 25 ++++++++++++++++--------- man/tar_destroy.Rd | 25 ++++++++++++++++--------- man/tar_load.Rd | 25 ++++++++++++++++--------- man/tar_load_raw.Rd | 25 ++++++++++++++++--------- man/tar_prune.Rd | 25 ++++++++++++++++--------- man/tar_read.Rd | 25 ++++++++++++++++--------- man/tar_read_raw.Rd | 25 ++++++++++++++++--------- man/tar_unversion.Rd | 25 ++++++++++++++++--------- 9 files changed, 144 insertions(+), 81 deletions(-) diff --git a/R/tar_read.R b/R/tar_read.R index 683a42381..319f68b7f 100644 --- a/R/tar_read.R +++ b/R/tar_read.R @@ -11,23 +11,30 @@ #' If you use `targets` with cloud storage #' () #' and versioning is turned on, then `targets` will record each -#' version of each target in its metadata. Functions like [tar_read()] +#' version of each target in its metadata. +#' +#' Functions like [tar_read()] #' and [tar_load()] load the version recorded in the local metadata, #' which may not be the same as the "current" version of the -#' object in the bucket. -#' -#' Likewise, functions [tar_delete()] and [tar_destroy()] only remove +#' object in the bucket. Likewise, functions [tar_delete()] +#' and [tar_destroy()] only remove #' the version ID of each target as recorded in the local -#' metadata. Extra steps are required to remove the *latest* version -#' of each object, whatever that version may be: +#' metadata. +#' +#' If you want to interact with the *latest* version of an object +#' instead of the version ID recorded in the local metadata, +#' then you will need to delete the object from the metadata. #' #' 1. Make sure your local copy of the metadata is current and #' up to date. You may need to run [tar_meta_download()] or -#' [tar_meta_sync()]. +#' [tar_meta_sync()] first. #' 2. Run [tar_unversion()] to remove the recorded version IDs of #' your targets in the local metadata. -#' 3. With the version IDs gone, [tar_delete()] and [tar_destroy()] -#' will delete all the versions of the affected targets. +#' 3. With the version IDs gone from the local metadata, +#' functions like [tar_read()] and [tar_destroy()] will use the +#' *latest* version of each target data object. +#' 4. Optional: to back up the local metadata file with the version IDs +#' deleted, use [tar_meta_upload()]. #' @return The target's return value from its file in #' `_targets/objects/`, or the paths to the custom files and directories #' if `format = "file"` was set. diff --git a/man/tar_delete.Rd b/man/tar_delete.Rd index 63a623778..1eed63b48 100644 --- a/man/tar_delete.Rd +++ b/man/tar_delete.Rd @@ -95,23 +95,30 @@ which means they track historical versions of each data object. If you use \code{targets} with cloud storage (\url{https://books.ropensci.org/targets/cloud-storage.html}) and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +version of each target in its metadata. + +Functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, which may not be the same as the "current" version of the -object in the bucket. - -Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +object in the bucket. Likewise, functions \code{\link[=tar_delete]{tar_delete()}} +and \code{\link[=tar_destroy]{tar_destroy()}} only remove the version ID of each target as recorded in the local -metadata. Extra steps are required to remove the \emph{latest} version -of each object, whatever that version may be: +metadata. + +If you want to interact with the \emph{latest} version of an object +instead of the version ID recorded in the local metadata, +then you will need to delete the object from the metadata. \enumerate{ \item Make sure your local copy of the metadata is current and up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or -\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\code{\link[=tar_meta_sync]{tar_meta_sync()}} first. \item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of your targets in the local metadata. -\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} -will delete all the versions of the affected targets. +\item With the version IDs gone from the local metadata, +functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_destroy]{tar_destroy()}} will use the +\emph{latest} version of each target data object. +\item Optional: to back up the local metadata file with the version IDs +deleted, use \code{\link[=tar_meta_upload]{tar_meta_upload()}}. } } diff --git a/man/tar_destroy.Rd b/man/tar_destroy.Rd index 0fc42ff78..55c8f3daa 100644 --- a/man/tar_destroy.Rd +++ b/man/tar_destroy.Rd @@ -142,23 +142,30 @@ which means they track historical versions of each data object. If you use \code{targets} with cloud storage (\url{https://books.ropensci.org/targets/cloud-storage.html}) and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +version of each target in its metadata. + +Functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, which may not be the same as the "current" version of the -object in the bucket. - -Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +object in the bucket. Likewise, functions \code{\link[=tar_delete]{tar_delete()}} +and \code{\link[=tar_destroy]{tar_destroy()}} only remove the version ID of each target as recorded in the local -metadata. Extra steps are required to remove the \emph{latest} version -of each object, whatever that version may be: +metadata. + +If you want to interact with the \emph{latest} version of an object +instead of the version ID recorded in the local metadata, +then you will need to delete the object from the metadata. \enumerate{ \item Make sure your local copy of the metadata is current and up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or -\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\code{\link[=tar_meta_sync]{tar_meta_sync()}} first. \item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of your targets in the local metadata. -\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} -will delete all the versions of the affected targets. +\item With the version IDs gone from the local metadata, +functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_destroy]{tar_destroy()}} will use the +\emph{latest} version of each target data object. +\item Optional: to back up the local metadata file with the version IDs +deleted, use \code{\link[=tar_meta_upload]{tar_meta_upload()}}. } } diff --git a/man/tar_load.Rd b/man/tar_load.Rd index 4989bccb3..66cdee405 100644 --- a/man/tar_load.Rd +++ b/man/tar_load.Rd @@ -97,23 +97,30 @@ which means they track historical versions of each data object. If you use \code{targets} with cloud storage (\url{https://books.ropensci.org/targets/cloud-storage.html}) and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +version of each target in its metadata. + +Functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, which may not be the same as the "current" version of the -object in the bucket. - -Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +object in the bucket. Likewise, functions \code{\link[=tar_delete]{tar_delete()}} +and \code{\link[=tar_destroy]{tar_destroy()}} only remove the version ID of each target as recorded in the local -metadata. Extra steps are required to remove the \emph{latest} version -of each object, whatever that version may be: +metadata. + +If you want to interact with the \emph{latest} version of an object +instead of the version ID recorded in the local metadata, +then you will need to delete the object from the metadata. \enumerate{ \item Make sure your local copy of the metadata is current and up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or -\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\code{\link[=tar_meta_sync]{tar_meta_sync()}} first. \item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of your targets in the local metadata. -\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} -will delete all the versions of the affected targets. +\item With the version IDs gone from the local metadata, +functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_destroy]{tar_destroy()}} will use the +\emph{latest} version of each target data object. +\item Optional: to back up the local metadata file with the version IDs +deleted, use \code{\link[=tar_meta_upload]{tar_meta_upload()}}. } } diff --git a/man/tar_load_raw.Rd b/man/tar_load_raw.Rd index ed930dc24..7cbf88b8a 100644 --- a/man/tar_load_raw.Rd +++ b/man/tar_load_raw.Rd @@ -93,23 +93,30 @@ which means they track historical versions of each data object. If you use \code{targets} with cloud storage (\url{https://books.ropensci.org/targets/cloud-storage.html}) and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +version of each target in its metadata. + +Functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, which may not be the same as the "current" version of the -object in the bucket. - -Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +object in the bucket. Likewise, functions \code{\link[=tar_delete]{tar_delete()}} +and \code{\link[=tar_destroy]{tar_destroy()}} only remove the version ID of each target as recorded in the local -metadata. Extra steps are required to remove the \emph{latest} version -of each object, whatever that version may be: +metadata. + +If you want to interact with the \emph{latest} version of an object +instead of the version ID recorded in the local metadata, +then you will need to delete the object from the metadata. \enumerate{ \item Make sure your local copy of the metadata is current and up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or -\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\code{\link[=tar_meta_sync]{tar_meta_sync()}} first. \item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of your targets in the local metadata. -\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} -will delete all the versions of the affected targets. +\item With the version IDs gone from the local metadata, +functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_destroy]{tar_destroy()}} will use the +\emph{latest} version of each target data object. +\item Optional: to back up the local metadata file with the version IDs +deleted, use \code{\link[=tar_meta_upload]{tar_meta_upload()}}. } } diff --git a/man/tar_prune.Rd b/man/tar_prune.Rd index 0ae439da3..962e5ccae 100644 --- a/man/tar_prune.Rd +++ b/man/tar_prune.Rd @@ -131,23 +131,30 @@ which means they track historical versions of each data object. If you use \code{targets} with cloud storage (\url{https://books.ropensci.org/targets/cloud-storage.html}) and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +version of each target in its metadata. + +Functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, which may not be the same as the "current" version of the -object in the bucket. - -Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +object in the bucket. Likewise, functions \code{\link[=tar_delete]{tar_delete()}} +and \code{\link[=tar_destroy]{tar_destroy()}} only remove the version ID of each target as recorded in the local -metadata. Extra steps are required to remove the \emph{latest} version -of each object, whatever that version may be: +metadata. + +If you want to interact with the \emph{latest} version of an object +instead of the version ID recorded in the local metadata, +then you will need to delete the object from the metadata. \enumerate{ \item Make sure your local copy of the metadata is current and up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or -\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\code{\link[=tar_meta_sync]{tar_meta_sync()}} first. \item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of your targets in the local metadata. -\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} -will delete all the versions of the affected targets. +\item With the version IDs gone from the local metadata, +functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_destroy]{tar_destroy()}} will use the +\emph{latest} version of each target data object. +\item Optional: to back up the local metadata file with the version IDs +deleted, use \code{\link[=tar_meta_upload]{tar_meta_upload()}}. } } diff --git a/man/tar_read.Rd b/man/tar_read.Rd index 3bb93d02a..74c69ccdb 100644 --- a/man/tar_read.Rd +++ b/man/tar_read.Rd @@ -49,23 +49,30 @@ which means they track historical versions of each data object. If you use \code{targets} with cloud storage (\url{https://books.ropensci.org/targets/cloud-storage.html}) and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +version of each target in its metadata. + +Functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, which may not be the same as the "current" version of the -object in the bucket. - -Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +object in the bucket. Likewise, functions \code{\link[=tar_delete]{tar_delete()}} +and \code{\link[=tar_destroy]{tar_destroy()}} only remove the version ID of each target as recorded in the local -metadata. Extra steps are required to remove the \emph{latest} version -of each object, whatever that version may be: +metadata. + +If you want to interact with the \emph{latest} version of an object +instead of the version ID recorded in the local metadata, +then you will need to delete the object from the metadata. \enumerate{ \item Make sure your local copy of the metadata is current and up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or -\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\code{\link[=tar_meta_sync]{tar_meta_sync()}} first. \item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of your targets in the local metadata. -\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} -will delete all the versions of the affected targets. +\item With the version IDs gone from the local metadata, +functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_destroy]{tar_destroy()}} will use the +\emph{latest} version of each target data object. +\item Optional: to back up the local metadata file with the version IDs +deleted, use \code{\link[=tar_meta_upload]{tar_meta_upload()}}. } } diff --git a/man/tar_read_raw.Rd b/man/tar_read_raw.Rd index ed58f0f93..4d1f1cb2a 100644 --- a/man/tar_read_raw.Rd +++ b/man/tar_read_raw.Rd @@ -75,23 +75,30 @@ which means they track historical versions of each data object. If you use \code{targets} with cloud storage (\url{https://books.ropensci.org/targets/cloud-storage.html}) and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +version of each target in its metadata. + +Functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, which may not be the same as the "current" version of the -object in the bucket. - -Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +object in the bucket. Likewise, functions \code{\link[=tar_delete]{tar_delete()}} +and \code{\link[=tar_destroy]{tar_destroy()}} only remove the version ID of each target as recorded in the local -metadata. Extra steps are required to remove the \emph{latest} version -of each object, whatever that version may be: +metadata. + +If you want to interact with the \emph{latest} version of an object +instead of the version ID recorded in the local metadata, +then you will need to delete the object from the metadata. \enumerate{ \item Make sure your local copy of the metadata is current and up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or -\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\code{\link[=tar_meta_sync]{tar_meta_sync()}} first. \item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of your targets in the local metadata. -\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} -will delete all the versions of the affected targets. +\item With the version IDs gone from the local metadata, +functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_destroy]{tar_destroy()}} will use the +\emph{latest} version of each target data object. +\item Optional: to back up the local metadata file with the version IDs +deleted, use \code{\link[=tar_meta_upload]{tar_meta_upload()}}. } } diff --git a/man/tar_unversion.Rd b/man/tar_unversion.Rd index d032ba8d2..20e33e469 100644 --- a/man/tar_unversion.Rd +++ b/man/tar_unversion.Rd @@ -35,23 +35,30 @@ which means they track historical versions of each data object. If you use \code{targets} with cloud storage (\url{https://books.ropensci.org/targets/cloud-storage.html}) and versioning is turned on, then \code{targets} will record each -version of each target in its metadata. Functions like \code{\link[=tar_read]{tar_read()}} +version of each target in its metadata. + +Functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_load]{tar_load()}} load the version recorded in the local metadata, which may not be the same as the "current" version of the -object in the bucket. - -Likewise, functions \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} only remove +object in the bucket. Likewise, functions \code{\link[=tar_delete]{tar_delete()}} +and \code{\link[=tar_destroy]{tar_destroy()}} only remove the version ID of each target as recorded in the local -metadata. Extra steps are required to remove the \emph{latest} version -of each object, whatever that version may be: +metadata. + +If you want to interact with the \emph{latest} version of an object +instead of the version ID recorded in the local metadata, +then you will need to delete the object from the metadata. \enumerate{ \item Make sure your local copy of the metadata is current and up to date. You may need to run \code{\link[=tar_meta_download]{tar_meta_download()}} or -\code{\link[=tar_meta_sync]{tar_meta_sync()}}. +\code{\link[=tar_meta_sync]{tar_meta_sync()}} first. \item Run \code{\link[=tar_unversion]{tar_unversion()}} to remove the recorded version IDs of your targets in the local metadata. -\item With the version IDs gone, \code{\link[=tar_delete]{tar_delete()}} and \code{\link[=tar_destroy]{tar_destroy()}} -will delete all the versions of the affected targets. +\item With the version IDs gone from the local metadata, +functions like \code{\link[=tar_read]{tar_read()}} and \code{\link[=tar_destroy]{tar_destroy()}} will use the +\emph{latest} version of each target data object. +\item Optional: to back up the local metadata file with the version IDs +deleted, use \code{\link[=tar_meta_upload]{tar_meta_upload()}}. } } From 5253243f60329f9bd9a3336cb7448aaa9f5a0d58 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 10:34:55 -0500 Subject: [PATCH 39/48] Avoid local file hashes in cloud targets --- NAMESPACE | 1 + R/class_aws_file.R | 2 +- R/class_cloud.R | 3 +-- R/class_file.R | 9 +++++++++ R/class_gcp_file.R | 2 +- R/class_store.R | 2 +- tests/testthat/test-class_file.R | 21 +++++++++++++++++++++ 7 files changed, 35 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 906b813dd..9b575ca20 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -63,6 +63,7 @@ S3method(store_assert_repository_setting,aws) S3method(store_assert_repository_setting,default) S3method(store_assert_repository_setting,gcp) S3method(store_assert_repository_setting,local) +S3method(store_cache_path,default) S3method(store_cache_path,tar_cloud) S3method(store_cache_path,tar_external) S3method(store_class_format,feather) diff --git a/R/class_aws_file.R b/R/class_aws_file.R index 2626b79d7..f1c890d27 100644 --- a/R/class_aws_file.R +++ b/R/class_aws_file.R @@ -49,7 +49,7 @@ store_hash_early.tar_aws_file <- function(store) { # nolint store$file$path <- store_aws_file_stage(store$file$path) on.exit(store$file$path <- old) tar_assert_path(store$file$path) - file_update_hash(store$file) + file_update_info(store$file) } #' @export diff --git a/R/class_cloud.R b/R/class_cloud.R index 7f13fc9c0..8ad23f523 100644 --- a/R/class_cloud.R +++ b/R/class_cloud.R @@ -33,8 +33,7 @@ store_cache_path.tar_cloud <- function(store, path) { store_hash_late.tar_cloud <- function(store) { tar_assert_path(store$file$stage) file <- file_init(path = store$file$stage) - file_update_hash(file) - store$file$hash <- file$hash + file_update_info(file) store$file$bytes <- file$bytes store$file$time <- file$time } diff --git a/R/class_file.R b/R/class_file.R index 4becd6b25..7de097b33 100644 --- a/R/class_file.R +++ b/R/class_file.R @@ -49,6 +49,15 @@ file_exists_stage <- function(file) { all(file.exists(file$stage)) } +file_update_info <- function(file) { + files <- file_list_files(file$path) + info <- file_info(files) + file$time <- file_time(info) + file$bytes <- file_bytes(info) + file$size <- file_size(file$bytes) + invisible() +} + file_update_hash <- function(file) { files <- file_list_files(file$path) info <- file_info(files) diff --git a/R/class_gcp_file.R b/R/class_gcp_file.R index dbfaf50c0..891e47348 100644 --- a/R/class_gcp_file.R +++ b/R/class_gcp_file.R @@ -39,7 +39,7 @@ store_hash_early.tar_gcp_file <- function(store) { # nolint store$file$path <- store_gcp_file_stage(store$file$path) on.exit(store$file$path <- old) tar_assert_path(store$file$path) - file_update_hash(store$file) + file_update_info(store$file) } #' @export diff --git a/R/class_store.R b/R/class_store.R index 2e2b1ecc0..9b3702223 100644 --- a/R/class_store.R +++ b/R/class_store.R @@ -141,7 +141,7 @@ store_cache_path <- function(store, path) { UseMethod("store_cache_path") } -# @export +#' @export store_cache_path.default <- function(store, path) { cache <- tar_runtime$file_exist if (!is.null(cache)) { diff --git a/tests/testthat/test-class_file.R b/tests/testthat/test-class_file.R index 03eb02f11..99afaf2cb 100644 --- a/tests/testthat/test-class_file.R +++ b/tests/testthat/test-class_file.R @@ -119,6 +119,27 @@ tar_test("file_update_hash()", { expect_equal(length(file$time), 1L) }) +tar_test("file_update_info()", { + tmp <- tempfile() + file <- file_init(path = tmp) + writeLines("xyz", tmp) + file_update_info(file) + expect_true(is.na(file$hash)) + expect_gt(file$bytes, 0) + expect_true(is.character(file$time)) + expect_true(grepl("^t", file$time)) + expect_true(grepl("s$", file$time)) + expect_false(anyNA(file_time_posixct(file$time))) + expect_true(inherits(file_time_posixct(file$time), "POSIXct")) + expect_true(is.character(file$size)) + expect_equal(nchar(file$size), 16L) + expect_true(is.numeric(file$bytes)) + expect_true(is.finite(file$bytes)) + expect_equal(length(file$bytes), 1L) + expect_equal(length(file$time), 1L) + expect_equal(length(file$time), 1L) +}) + tar_test("file_update_hash() where two files exist", { tmp <- c(tempfile(), tempfile()) file <- file_init(path = tmp) From 8827ba731eab12c9e0b3a0fb293c6f819e3c9a5c Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 10:47:20 -0500 Subject: [PATCH 40/48] Avoid duplicate hashes for cloud files --- NAMESPACE | 2 ++ R/class_aws_file.R | 4 ++++ R/class_gcp_file.R | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 9b575ca20..659522088 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -126,7 +126,9 @@ S3method(store_hash_early,tar_gcp_file) S3method(store_hash_early,tar_store_file) S3method(store_hash_early,tar_url) S3method(store_hash_late,default) +S3method(store_hash_late,tar_aws_file) S3method(store_hash_late,tar_cloud) +S3method(store_hash_late,tar_gcp_file) S3method(store_hash_late,tar_store_file) S3method(store_hash_late,tar_url) S3method(store_marshal_object,default) diff --git a/R/class_aws_file.R b/R/class_aws_file.R index f1c890d27..aec88047c 100644 --- a/R/class_aws_file.R +++ b/R/class_aws_file.R @@ -52,6 +52,10 @@ store_hash_early.tar_aws_file <- function(store) { # nolint file_update_info(store$file) } +#' @export +store_hash_late.tar_aws_file <- function(store) { # nolint +} + #' @export store_read_object.tar_aws_file <- function(store) { path <- store$file$path diff --git a/R/class_gcp_file.R b/R/class_gcp_file.R index 891e47348..402fe1cf7 100644 --- a/R/class_gcp_file.R +++ b/R/class_gcp_file.R @@ -42,6 +42,10 @@ store_hash_early.tar_gcp_file <- function(store) { # nolint file_update_info(store$file) } +#' @export +store_hash_late.tar_gcp_file <- function(store) { # nolint +} + #' @export store_read_object.tar_gcp_file <- function(store) { path <- store$file$path From b32fd8ab42f784a00261cab747cd7ae8cdc70dd3 Mon Sep 17 00:00:00 2001 From: wlandau Date: Mon, 13 Nov 2023 10:56:44 -0500 Subject: [PATCH 41/48] paws.common --- R/class_aws.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/class_aws.R b/R/class_aws.R index 36a8b2b41..ec91e2dcb 100644 --- a/R/class_aws.R +++ b/R/class_aws.R @@ -280,5 +280,5 @@ store_aws_hash <- function(store) { #' @export store_get_packages.tar_aws <- function(store) { - c("paws.storage", NextMethod()) + c("paws.common", "paws.storage", NextMethod()) } From 87fd352695aef8be9c337fdcd125440ba1226b14 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 12:06:42 -0500 Subject: [PATCH 42/48] Download conditional on prefix --- R/class_aws.R | 21 ++++++--------------- R/class_inventory.R | 18 ++++++++++++++---- R/class_inventory_aws.R | 2 +- R/class_inventory_gcp.R | 2 +- tests/aws/test-class_inventory_aws.R | 12 +++++++++++- 5 files changed, 33 insertions(+), 22 deletions(-) diff --git a/R/class_aws.R b/R/class_aws.R index ec91e2dcb..dc85934f5 100644 --- a/R/class_aws.R +++ b/R/class_aws.R @@ -260,21 +260,12 @@ store_has_correct_hash.tar_aws <- function(store) { } store_aws_hash <- function(store) { - path <- store$file$path - aws <- store$resources$aws - head <- aws_s3_head( - key = store_aws_key(path), - bucket = store_aws_bucket(path), - region = store_aws_region(path), - endpoint = store_aws_endpoint(path), - version = store_aws_version(path), - args = aws$args, - max_tries = aws$max_tries, - seconds_timeout = aws$seconds_timeout, - close_connection = aws$close_connection, - s3_force_path_style = aws$s3_force_path_style - ) - if_any(is.null(head), NULL, digest_chr64(head$ETag)) + + browser() + + tar_runtime$inventories$aws <- tar_runtime$inventories$aws %|||% + inventory_aws_init() + tar_runtime$inventories$aws$get_cache(store = store) } # nocov end diff --git a/R/class_inventory.R b/R/class_inventory.R index 8da0d622e..f5bd800a7 100644 --- a/R/class_inventory.R +++ b/R/class_inventory.R @@ -4,7 +4,7 @@ # have serious versions of these methods. inventory_init <- function() { out <- inventory_new() - out$reset_cache() + out$reset() out } @@ -19,7 +19,9 @@ inventory_class <- R6::R6Class( cloneable = FALSE, public = list( cache = NULL, + prefixes = NULL, misses = NULL, + downloads = NULL, get_key = function(store) { "example_key" }, @@ -31,12 +33,17 @@ inventory_class <- R6::R6Class( }, get_cache = function(store) { key <- self$get_key(store) + prefix <- dirname(key) bucket <- self$get_bucket(store) name <- self$get_name(key = key, bucket = bucket) - if (!exists(x = name, envir = self$cache)) { - self$misses <- (self$misses %|||% 0L) + 1L + miss <- !exists(x = name, envir = self$cache) + download <- !counter_exists_name(counter = self$prefixes, name = prefix) + if (download) { + counter_set_name(counter = self$prefixes, name = prefix) self$set_cache(store) } + self$misses <- self$misses + as.integer(miss) + self$downloads <- self$downloads + as.integer(download) self$cache[[name]] }, list_cache = function() { @@ -48,8 +55,11 @@ inventory_class <- R6::R6Class( name <- self$get_name(key = key, bucket = bucket) self$cache[[name]] <- "example_hash" }, - reset_cache = function() { + reset = function() { self$cache <- new.env(parent = emptyenv()) + self$prefixes <- counter_init() + self$misses <- 0L + self$downloads <- 0L }, validate = function() { tar_assert_envir(self$cache) diff --git a/R/class_inventory_aws.R b/R/class_inventory_aws.R index c34476c62..a340c7575 100644 --- a/R/class_inventory_aws.R +++ b/R/class_inventory_aws.R @@ -2,7 +2,7 @@ # nocov start inventory_aws_init <- function() { out <- inventory_aws_new() - out$reset_cache() + out$reset() out } diff --git a/R/class_inventory_gcp.R b/R/class_inventory_gcp.R index 666a50ad1..cd072a4d9 100644 --- a/R/class_inventory_gcp.R +++ b/R/class_inventory_gcp.R @@ -2,7 +2,7 @@ # nocov start inventory_gcp_init <- function() { out <- inventory_gcp_new() - out$reset_cache() + out$reset() out } diff --git a/tests/aws/test-class_inventory_aws.R b/tests/aws/test-class_inventory_aws.R index 6d2287cf9..e6970f33a 100644 --- a/tests/aws/test-class_inventory_aws.R +++ b/tests/aws/test-class_inventory_aws.R @@ -21,7 +21,8 @@ tar_test("inventory_aws class", { ) store <- store_init(repository = "aws", resources = resources) expect_equal(inventory$list_cache(), character(0L)) - expect_null(inventory$misses) + expect_equal(inventory$downloads, 0L) + expect_equal(inventory$misses, 0L) for (key in rev(file.path(prefix, c("w", "x", "y", "z")))) { store$file$path <- store_produce_aws_path( store = store, @@ -30,6 +31,7 @@ tar_test("inventory_aws class", { ) out <- inventory$get_cache(store) expect_equal(inventory$misses, 1L) + expect_equal(inventory$downloads, 1L) expect_equal(out, digest_chr64(head[[key]]$ETag)) expect_equal( sort(inventory$list_cache()), @@ -42,4 +44,12 @@ tar_test("inventory_aws class", { ) ) } + store$file$path <- store_produce_aws_path( + store = store, + name = "nope", + path_store = path_store_default() + ) + expect_null(inventory$get_cache(store)) + expect_equal(inventory$downloads, 1L) + expect_equal(inventory$misses, 2L) }) From 362268860309aff9bc900ecfca5837224d951b47 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 12:08:01 -0500 Subject: [PATCH 43/48] Same with gcp inventories --- tests/gcp/test-class_inventory_gcp.R | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/gcp/test-class_inventory_gcp.R b/tests/gcp/test-class_inventory_gcp.R index c753cc710..76112c5ed 100644 --- a/tests/gcp/test-class_inventory_gcp.R +++ b/tests/gcp/test-class_inventory_gcp.R @@ -25,7 +25,8 @@ tar_test("inventory_gcp class", { ) store <- store_init(repository = "gcp", resources = resources) expect_equal(inventory$list_cache(), character(0L)) - expect_null(inventory$misses) + expect_equal(inventory$downloads, 0L) + expect_equal(inventory$misses, 0L) for (key in rev(file.path(prefix, c("w", "x", "y", "z")))) { store$file$path <- store_produce_gcp_path( store = store, @@ -34,6 +35,7 @@ tar_test("inventory_gcp class", { ) out <- inventory$get_cache(store) expect_equal(inventory$misses, 1L) + expect_equal(inventory$downloads, 1L) expect_equal(out, digest_chr64(head[[key]]$md5)) expect_equal( sort(inventory$list_cache()), @@ -46,4 +48,12 @@ tar_test("inventory_gcp class", { ) ) } + store$file$path <- store_produce_gcp_path( + store = store, + name = "nope", + path_store = path_store_default() + ) + expect_null(inventory$get_cache(store)) + expect_equal(inventory$downloads, 1L) + expect_equal(inventory$misses, 2L) }) From 370aca6cada78700f70880aebb648d49d8460c81 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 12:12:56 -0500 Subject: [PATCH 44/48] Use inventories in AWS targets --- R/class_aws.R | 3 --- R/utils_aws.R | 3 +-- R/utils_gcp.R | 3 +-- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/R/class_aws.R b/R/class_aws.R index dc85934f5..e6c0d5ca0 100644 --- a/R/class_aws.R +++ b/R/class_aws.R @@ -260,9 +260,6 @@ store_has_correct_hash.tar_aws <- function(store) { } store_aws_hash <- function(store) { - - browser() - tar_runtime$inventories$aws <- tar_runtime$inventories$aws %|||% inventory_aws_init() tar_runtime$inventories$aws$get_cache(store = store) diff --git a/R/utils_aws.R b/R/utils_aws.R index 9acaed027..fc1765695 100644 --- a/R/utils_aws.R +++ b/R/utils_aws.R @@ -92,8 +92,7 @@ aws_s3_list_etags <- function( "Listing objects in AWS S3 bucket ", bucket, " prefix ", - prefix, - "..." + prefix ) } pages <- paws.common::paginate( diff --git a/R/utils_gcp.R b/R/utils_gcp.R index c66d72d97..a1086b6b9 100644 --- a/R/utils_gcp.R +++ b/R/utils_gcp.R @@ -72,8 +72,7 @@ gcp_gcs_list_md5s <- function( "Listing objects in GCS bucket ", bucket, " prefix ", - prefix, - "..." + prefix ) } results <- googleCloudStorageR::gcs_list_objects( From 9db9e1097916753a628be6b6c682d54400f5e9e5 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 12:33:08 -0500 Subject: [PATCH 45/48] Update test --- tests/aws/test-class_aws_qs.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/aws/test-class_aws_qs.R b/tests/aws/test-class_aws_qs.R index 18696db4b..461b37748 100644 --- a/tests/aws/test-class_aws_qs.R +++ b/tests/aws/test-class_aws_qs.R @@ -537,9 +537,9 @@ tar_test("aws_qs format versioning", { }) expr <- tar_tidy_eval(expr, environment(), TRUE) eval(as.call(list(`tar_script`, expr, ask = FALSE))) - expect_equal(targets::tar_outdated(callr_function = NULL), character(0)) + expect_equal(targets::tar_outdated(callr_function = NULL), "x") tar_make(callr_function = NULL) - expect_equal(tar_progress(x)$progress, "skipped") + expect_equal(tar_progress(x)$progress, "built") }) tar_test("cloud target paths are not in the file path cache", { From 5604ac8a9af2acfe2fc0fd8dae9add5e6b459ed1 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 12:39:18 -0500 Subject: [PATCH 46/48] fix tests --- tests/testthat/test-class_aws.R | 2 +- tests/testthat/test-class_aws_file.R | 5 ++++- tests/testthat/test-class_inventory.R | 6 ++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-class_aws.R b/tests/testthat/test-class_aws.R index 980382773..f44f0547f 100644 --- a/tests/testthat/test-class_aws.R +++ b/tests/testthat/test-class_aws.R @@ -102,7 +102,7 @@ tar_test("package detection", { skip_cran() target <- tar_target(x, "x_value", format = "feather", repository = "aws") out <- sort(store_get_packages(target$store)) - exp <- sort(c("paws.storage", "arrow")) + exp <- sort(c("paws.storage", "paws.common", "arrow")) expect_equal(out, exp) }) diff --git a/tests/testthat/test-class_aws_file.R b/tests/testthat/test-class_aws_file.R index 995536621..65ce1755b 100644 --- a/tests/testthat/test-class_aws_file.R +++ b/tests/testthat/test-class_aws_file.R @@ -39,7 +39,10 @@ tar_test("aws_file packages", { format = "file", repository = "aws" ) - expect_equal(store_get_packages(target$store), "paws.storage") + expect_equal( + sort(store_get_packages(target$store)), + sort(c("paws.common", "paws.storage")) + ) }) tar_test("inherits from tar_external", { diff --git a/tests/testthat/test-class_inventory.R b/tests/testthat/test-class_inventory.R index 56db7b183..dd365f530 100644 --- a/tests/testthat/test-class_inventory.R +++ b/tests/testthat/test-class_inventory.R @@ -6,13 +6,15 @@ tar_test("inventory abstract class basic methods", { x <- inventory_init() store <- store_init() expect_equal(x$list_cache(), character(0L)) - expect_null(x$misses) + expect_equal(x$misses, 0L) + expect_equal(x$downloads, 0L) for (index in seq_len(4L)) { out <- x$get_cache(store) expect_equal(x$misses, 1L) + expect_equal(x$downloads, 1L) expect_equal(out, "example_hash") expect_equal(x$list_cache(), "example_bucket|example_key") } - x$reset_cache() + x$reset() expect_equal(x$list_cache(), character(0L)) }) From 21f57305f87d42e3750a01d0a1c05a4a58f80b0d Mon Sep 17 00:00:00 2001 From: wlandau Date: Mon, 13 Nov 2023 13:16:11 -0500 Subject: [PATCH 47/48] Fix #1172 --- DESCRIPTION | 2 +- NEWS.md | 2 +- R/class_gcp.R | 12 +++--------- R/utils_gcp.R | 2 +- tests/gcp/test-class_gcp_qs.R | 4 ++-- 5 files changed, 8 insertions(+), 14 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5cce4f4ab..8f647ad76 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,7 +12,7 @@ Description: Pipeline tools coordinate the pieces of computationally The methodology in this package borrows from GNU 'Make' (2015, ISBN:978-9881443519) and 'drake' (2018, ). -Version: 1.3.2.9002 +Version: 1.3.2.9003 License: MIT + file LICENSE URL: https://docs.ropensci.org/targets/, https://github.com/ropensci/targets BugReports: https://github.com/ropensci/targets/issues diff --git a/NEWS.md b/NEWS.md index 0ed69df58..3ae1e3207 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# targets 1.3.2.9002 (development) +# targets 1.3.2.9003 (development) ## Invalidating changes diff --git a/R/class_gcp.R b/R/class_gcp.R index 8203dd799..31167a358 100644 --- a/R/class_gcp.R +++ b/R/class_gcp.R @@ -213,15 +213,9 @@ store_has_correct_hash.tar_gcp <- function(store) { } store_gcp_hash <- function(store) { - path <- store$file$path - head <- gcp_gcs_head( - key = store_gcp_key(path), - bucket = store_gcp_bucket(path), - version = store_gcp_version(path), - verbose = store$resources$gcp$verbose %|||% FALSE, - max_tries = store$resources$gcp$max_tries %|||% 5L - ) - if_any(is.null(head), NULL, digest_chr64(head$md5)) + tar_runtime$inventories$gcp <- tar_runtime$inventories$gcp %|||% + inventory_gcp_init() + tar_runtime$inventories$gcp$get_cache(store = store) } # nocov end diff --git a/R/utils_gcp.R b/R/utils_gcp.R index a1086b6b9..7daf6d3cb 100644 --- a/R/utils_gcp.R +++ b/R/utils_gcp.R @@ -58,7 +58,7 @@ gcp_gcs_list_md5s <- function( verbose = TRUE, max_tries = NULL ) { - verbose <- verbose %|||% FALSE + verbose <- verbose %|||% TRUE old_try_attempts <- getOption("googleAuthR.tryAttempts") on.exit(options(googleAuthR.tryAttempts = old_try_attempts), add = TRUE) if_any( diff --git a/tests/gcp/test-class_gcp_qs.R b/tests/gcp/test-class_gcp_qs.R index 4d6326677..87f5de81f 100644 --- a/tests/gcp/test-class_gcp_qs.R +++ b/tests/gcp/test-class_gcp_qs.R @@ -434,7 +434,7 @@ tar_test("gcp_qs format versioning", { }) expr <- tar_tidy_eval(expr, environment(), TRUE) eval(as.call(list(`tar_script`, expr, ask = FALSE))) - expect_equal(targets::tar_outdated(callr_function = NULL), character(0)) + expect_equal(targets::tar_outdated(callr_function = NULL), "x") tar_make(callr_function = NULL) - expect_equal(tar_progress(x)$progress, "skipped") + expect_equal(tar_progress(x)$progress, "built") }) From eea7a334335005a2ed3cd98ef3b3f86241097f2d Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 13 Nov 2023 13:46:18 -0500 Subject: [PATCH 48/48] lint --- R/tar_read.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/tar_read.R b/R/tar_read.R index 319f68b7f..7e0a07207 100644 --- a/R/tar_read.R +++ b/R/tar_read.R @@ -12,7 +12,7 @@ #' () #' and versioning is turned on, then `targets` will record each #' version of each target in its metadata. -#' +#' #' Functions like [tar_read()] #' and [tar_load()] load the version recorded in the local metadata, #' which may not be the same as the "current" version of the @@ -20,7 +20,7 @@ #' and [tar_destroy()] only remove #' the version ID of each target as recorded in the local #' metadata. -#' +#' #' If you want to interact with the *latest* version of an object #' instead of the version ID recorded in the local metadata, #' then you will need to delete the object from the metadata.