From b978993df807af4e1abf328deefab63c338a5e37 Mon Sep 17 00:00:00 2001 From: wlandau Date: Sun, 10 Nov 2024 10:04:44 -0500 Subject: [PATCH] Fix #1371, robustify error = 'null' --- DESCRIPTION | 2 +- NEWS.md | 3 ++- R/class_builder.R | 2 +- R/class_options.R | 4 ++-- R/class_target.R | 23 ++++++++++++++++++-- R/tar_target.R | 32 +++++++++++++++++----------- R/tar_target_raw.R | 2 +- R/utils_condition.R | 5 ++++- man/tar_option_set.Rd | 15 +++++++++---- man/tar_target.Rd | 15 +++++++++---- tests/testthat/test-class_builder.R | 1 + tests/testthat/test-class_options.R | 4 ++-- tests/testthat/test-tar_option_set.R | 4 ++-- tests/testthat/test-tar_target.R | 19 +++++++++++++++++ 14 files changed, 98 insertions(+), 33 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ecd5f072..b9c1dad9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,7 +12,7 @@ Description: Pipeline tools coordinate the pieces of computationally The methodology in this package borrows from GNU 'Make' (2015, ISBN:978-9881443519) and 'drake' (2018, ). -Version: 1.8.0.9010 +Version: 1.8.0.9011 License: MIT + file LICENSE URL: https://docs.ropensci.org/targets/, https://github.com/ropensci/targets BugReports: https://github.com/ropensci/targets/issues diff --git a/NEWS.md b/NEWS.md index 289541c4..2306eaba 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# targets 1.8.0.9010 (development) +# targets 1.8.0.9011 (development) * Un-break workflows that use `format = "file_fast"` (#1339, @koefoeden). * Fix deadlock in `error = "trim"` (#1340, @koefoeden). @@ -14,6 +14,7 @@ * Improve speed and reduce memory consumption by avoiding deep copies of inner environments of target definition objects (#1368). * Reduce memory consumption by storing buds and branches as lightweight references when `memory` is `"transient"` (#1364). * Replace the `memory` class with the new `lookup` class. +* Implement `memory = "auto"` to select transient memory for dynamic branches and persistent memory for other targets (#1371). # targets 1.8.0 diff --git a/R/class_builder.R b/R/class_builder.R index ab801026..831f783b 100644 --- a/R/class_builder.R +++ b/R/class_builder.R @@ -506,7 +506,7 @@ builder_update_object <- function(target) { builder_expect_storage <- function(target) { error_null <- identical(target$settings$error, "null") && metrics_has_error(target$metrics) - !metrics_terminated_early(target$metrics) || error_null + !metrics_terminated_early(target$metrics) && !error_null } builder_ensure_object <- function(target, storage) { diff --git a/R/class_options.R b/R/class_options.R index b4914cc5..528450d3 100644 --- a/R/class_options.R +++ b/R/class_options.R @@ -314,7 +314,7 @@ options_class <- R6::R6Class( self$error %|||% "stop" }, get_memory = function() { - self$memory %|||% "persistent" + self$memory %|||% "auto" }, get_garbage_collection = function() { self$garbage_collection %|||% 1000L @@ -521,7 +521,7 @@ options_class <- R6::R6Class( ) }, validate_memory = function(memory) { - tar_assert_flag(memory, c("persistent", "transient")) + tar_assert_flag(memory, c("auto", "persistent", "transient")) }, validate_garbage_collection = function(garbage_collection) { tar_assert_dbl(garbage_collection) diff --git a/R/class_target.R b/R/class_target.R index 114ce5b3..79d33a5f 100644 --- a/R/class_target.R +++ b/R/class_target.R @@ -10,7 +10,7 @@ target_init <- function( pattern = NULL, iteration = "vector", error = "stop", - memory = "persistent", + memory = "auto", garbage_collection = FALSE, deployment = "worker", priority = 0, @@ -23,6 +23,9 @@ target_init <- function( seed <- tar_seed_create(name) deps <- deps <- deps %|||% deps_function(embody_expr(expr)) command <- command_init(expr, packages, library, string) + if (identical(memory, "auto")) { + memory <- if_any(is.null(pattern), "persistent", "transient") + } cue <- cue %|||% cue_init() if (any(grepl("^aws_", format))) { format <- gsub("^aws_", "", format) @@ -117,8 +120,24 @@ target_ensure_deps <- function(target, pipeline) { ) } +target_value_null <- function(target) { + value_init( + object = NULL, + iteration = target$settings$iteration + ) +} + target_load_value <- function(target, pipeline) { - target$value <- target_read_value(target, pipeline) + target$value <- tryCatch( + target_read_value(target, pipeline), + error = function(condition) { + if_any( + identical(target$settings$error, "null"), + target_value_null(target), + tar_throw_run(conditionMessage(condition)) + ) + } + ) pipeline_set_target(pipeline, target) pipeline_register_loaded(pipeline, target_get_name(target)) } diff --git a/R/tar_target.R b/R/tar_target.R index e29e85a1..5960206b 100644 --- a/R/tar_target.R +++ b/R/tar_target.R @@ -238,7 +238,9 @@ #' * `"continue"`: the whole pipeline keeps going. #' * `"null"`: The errored target continues and returns `NULL`. #' The data hash is deliberately wrong so the target is not -#' up to date for the next run of the pipeline. +#' up to date for the next run of the pipeline. In addition, +#' as of version 1.8.0.9011, a value of `NULL` is given +#' to upstream dependencies with `error = "null"` if loading fails. #' * `"abridge"`: any currently running targets keep running, #' but no new targets launch after that. #' * `"trim"`: all currently running targets stay running. A queued @@ -254,19 +256,25 @@ #' to begin running. #' (Visit #' to learn how to debug targets using saved workspaces.) -#' @param memory Character of length 1, memory strategy. -#' If `"persistent"`, the target stays in memory -#' until the end of the pipeline (unless `storage` is `"worker"`, -#' in which case `targets` unloads the value from memory -#' right after storing it in order to avoid sending -#' copious data over a network). -#' If `"transient"`, the target gets unloaded -#' after every new target completes. -#' Either way, the target gets automatically loaded into memory -#' whenever another target needs the value. +#' @param memory Character of length 1, memory strategy. Possible values: +#' +#' * `"auto"`: new in `targets` version 1.8.0.9011, `memory = "auto"` +#' is equivalent to `memory = "transient"` for dynamic branching +#' (a non-null `pattern` argument) and `memory = "persistent"` +#' for targets that do not use dynamic branching. +#' * `"persistent"`: the target stays in memory +#' until the end of the pipeline (unless `storage` is `"worker"`, +#' in which case `targets` unloads the value from memory +#' right after storing it in order to avoid sending +#' copious data over a network). +#' * `"transient"`: the target gets unloaded +#' after every new target completes. +#' Either way, the target gets automatically loaded into memory +#' whenever another target needs the value. +#' #' For cloud-based dynamic files #' (e.g. `format = "file"` with `repository = "aws"`), -#' this memory strategy applies to the +#' the `memory` option applies to the #' temporary local copy of the file: #' `"persistent"` means it remains until the end of the pipeline #' and is then deleted, diff --git a/R/tar_target_raw.R b/R/tar_target_raw.R index cd7b6ddf..31c513f0 100644 --- a/R/tar_target_raw.R +++ b/R/tar_target_raw.R @@ -51,7 +51,7 @@ tar_target_raw <- function( c("stop", "continue", "null", "abridge", "trim", "workspace") ) deprecate_error_workspace(error) - tar_assert_flag(memory, c("persistent", "transient")) + tar_assert_flag(memory, c("auto", "persistent", "transient")) garbage_collection <- isTRUE(garbage_collection) tar_assert_lgl(garbage_collection) tar_assert_scalar(garbage_collection) diff --git a/R/utils_condition.R b/R/utils_condition.R index 61c15aab..ddb8f769 100644 --- a/R/utils_condition.R +++ b/R/utils_condition.R @@ -213,6 +213,9 @@ safe_condition_class <- function(class) { } is_unsafe_condition_class <- function(class) { - condition <- try(rlang::inform(message = "x", class = class), silent = TRUE) + condition <- try( + suppressMessages(rlang::inform(message = "x", class = class)), + silent = TRUE + ) inherits(condition, "try-error") } diff --git a/man/tar_option_set.Rd b/man/tar_option_set.Rd index bd4f7236..84d257b4 100644 --- a/man/tar_option_set.Rd +++ b/man/tar_option_set.Rd @@ -196,19 +196,26 @@ to begin running. to learn how to debug targets using saved workspaces.) }} -\item{memory}{Character of length 1, memory strategy. -If \code{"persistent"}, the target stays in memory +\item{memory}{Character of length 1, memory strategy. Possible values: +\itemize{ +\item \code{"auto"}: new in \code{targets} version 1.8.0.9011, \code{memory = "auto"} +is equivalent to \code{memory = "transient"} for dynamic branching +(a non-null \code{pattern} argument) and \code{memory = "persistent"} +for targets that do not use dynamic branching. +\item \code{"persistent"}: the target stays in memory until the end of the pipeline (unless \code{storage} is \code{"worker"}, in which case \code{targets} unloads the value from memory right after storing it in order to avoid sending copious data over a network). -If \code{"transient"}, the target gets unloaded +\item \code{"transient"}: the target gets unloaded after every new target completes. Either way, the target gets automatically loaded into memory whenever another target needs the value. +} + For cloud-based dynamic files (e.g. \code{format = "file"} with \code{repository = "aws"}), -this memory strategy applies to the +the \code{memory} option applies to the temporary local copy of the file: \code{"persistent"} means it remains until the end of the pipeline and is then deleted, diff --git a/man/tar_target.Rd b/man/tar_target.Rd index abf2a01e..b8ee9b93 100644 --- a/man/tar_target.Rd +++ b/man/tar_target.Rd @@ -179,19 +179,26 @@ to begin running. to learn how to debug targets using saved workspaces.) }} -\item{memory}{Character of length 1, memory strategy. -If \code{"persistent"}, the target stays in memory +\item{memory}{Character of length 1, memory strategy. Possible values: +\itemize{ +\item \code{"auto"}: new in \code{targets} version 1.8.0.9011, \code{memory = "auto"} +is equivalent to \code{memory = "transient"} for dynamic branching +(a non-null \code{pattern} argument) and \code{memory = "persistent"} +for targets that do not use dynamic branching. +\item \code{"persistent"}: the target stays in memory until the end of the pipeline (unless \code{storage} is \code{"worker"}, in which case \code{targets} unloads the value from memory right after storing it in order to avoid sending copious data over a network). -If \code{"transient"}, the target gets unloaded +\item \code{"transient"}: the target gets unloaded after every new target completes. Either way, the target gets automatically loaded into memory whenever another target needs the value. +} + For cloud-based dynamic files (e.g. \code{format = "file"} with \code{repository = "aws"}), -this memory strategy applies to the +the \code{memory} option applies to the temporary local copy of the file: \code{"persistent"} means it remains until the end of the pipeline and is then deleted, diff --git a/tests/testthat/test-class_builder.R b/tests/testthat/test-class_builder.R index 9da303f2..fde3bfab 100644 --- a/tests/testthat/test-class_builder.R +++ b/tests/testthat/test-class_builder.R @@ -592,6 +592,7 @@ tar_test("error = \"null\" with branching", { skip_cran() tar_script({ library(targets) + tar_option_set(memory = "transient") f <- function(x) { stopifnot(x < 1.5) x diff --git a/tests/testthat/test-class_options.R b/tests/testthat/test-class_options.R index 9a2b69f3..c4749d25 100644 --- a/tests/testthat/test-class_options.R +++ b/tests/testthat/test-class_options.R @@ -272,11 +272,11 @@ tar_test("deprecated error = \"workspace\"", { tar_test("memory", { x <- options_init() - expect_equal(x$get_memory(), "persistent") + expect_equal(x$get_memory(), "auto") x$set_memory("transient") expect_equal(x$get_memory(), "transient") x$reset() - expect_equal(x$get_memory(), "persistent") + expect_equal(x$get_memory(), "auto") expect_error(x$set_memory("invalid"), class = "tar_condition_validate") }) diff --git a/tests/testthat/test-tar_option_set.R b/tests/testthat/test-tar_option_set.R index d7946d4d..6895857e 100644 --- a/tests/testthat/test-tar_option_set.R +++ b/tests/testthat/test-tar_option_set.R @@ -153,11 +153,11 @@ tar_test("deprecated error = \"workspace\"", { }) tar_test("memory", { - expect_equal(tar_option_get("memory"), "persistent") + expect_equal(tar_option_get("memory"), "auto") tar_option_set(memory = "transient") expect_equal(tar_option_get("memory"), "transient") tar_option_reset() - expect_equal(tar_option_get("memory"), "persistent") + expect_equal(tar_option_get("memory"), "auto") expect_error( tar_option_set(memory = "invalid"), class = "tar_condition_validate" diff --git a/tests/testthat/test-tar_target.R b/tests/testthat/test-tar_target.R index 0aa020db..39213886 100644 --- a/tests/testthat/test-tar_target.R +++ b/tests/testthat/test-tar_target.R @@ -75,3 +75,22 @@ tar_test("url format has local repository", { target <- tar_target(x, 1, repository = "aws", format = "url") expect_equal(target$settings$repository, "local") }) + +tar_test("memory = 'auto' sets the correct setting", { + expect_equal( + tar_target(x, 1, memory = "auto")$settings$memory, + "persistent" + ) + expect_equal( + tar_target(x, 1, pattern = map(y), memory = "auto")$settings$memory, + "transient" + ) + expect_equal( + tar_target(x, 1, memory = "persistent")$settings$memory, + "persistent" + ) + expect_equal( + tar_target(x, 1, pattern = map(y), memory = "transient")$settings$memory, + "transient" + ) +})