diff --git a/DESCRIPTION b/DESCRIPTION index e8ff5ca2..3b9852d2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,7 +12,7 @@ Description: Pipeline tools coordinate the pieces of computationally The methodology in this package borrows from GNU 'Make' (2015, ISBN:978-9881443519) and 'drake' (2018, ). -Version: 1.8.0.9013 +Version: 1.8.0.9014 License: MIT + file LICENSE URL: https://docs.ropensci.org/targets/, https://github.com/ropensci/targets BugReports: https://github.com/ropensci/targets/issues @@ -97,7 +97,7 @@ Suggests: paws.storage (>= 0.4.0), pkgload (>= 1.1.0), processx (>= 3.4.3), - qs (>= 0.24.1), + qs2, reprex (>= 2.0.0), rstudioapi (>= 0.11), R.utils (>= 2.6.0), diff --git a/NEWS.md b/NEWS.md index bf7fa0ed..4a51cb30 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# targets 1.8.0.9013 (development) +# targets 1.8.0.9014 (development) * Un-break workflows that use `format = "file_fast"` (#1339, @koefoeden). * Fix deadlock in `error = "trim"` (#1340, @koefoeden). @@ -19,6 +19,7 @@ * Omit whole stem targets from branch subpipelines when `retrieval` is `"main"` and only a bud is actually used. The same cannot be done with branches because each branch may need to be (un)marshaled individually. * Compress branches into references when `retrieval` is `"worker"` and the whole pattern is part of the subpipeline. * Avoid duplicated branch aggregation: just send the branches over the network. +* Back-compatibly switch `format = "qs"` from `qs` to `qs2` (#1373). # targets 1.8.0 diff --git a/R/class_qs.R b/R/class_qs.R index 4bb31737..757f7ee8 100644 --- a/R/class_qs.R +++ b/R/class_qs.R @@ -11,18 +11,37 @@ store_assert_format_setting.qs <- function(format) { #' @export store_read_path.tar_qs <- function(store, path) { - qs::qread(file = path, use_alt_rep = TRUE) + tryCatch( + qs2::qs_read( + file = path, + validate_checksum = FALSE, + nthreads = store$resources$qs$nthreads %|||% 1L + ), + # nocov start + error = function(condition) { + getNamespace("qs")$qread( + file = path, + use_alt_rep = TRUE, + nthreads = store$resources$qs$nthreads %|||% 1L + ) + } + # nocov end + ) } #' @export store_write_path.tar_qs <- function(store, object, path) { - preset <- store$resources$qs$preset %|||% - store$resources$preset %|||% - "high" - qs::qsave(x = object, file = path, preset = preset) + qs2::qs_save( + object = object, + file = path, + preset = preset, + compress_level = store$resources$qs$compress_level %|||% 3L, + shuffle = store$resources$qs$shuffle %|||% TRUE, + nthreads = store$resources$qs$nthreads %|||% 1L + ) } #' @export store_get_packages.tar_qs <- function(store) { - "qs" + "qs2" } diff --git a/R/class_resources_qs.R b/R/class_resources_qs.R index 4c9c569f..dafd88d9 100644 --- a/R/class_resources_qs.R +++ b/R/class_resources_qs.R @@ -1,16 +1,24 @@ resources_qs_init <- function( - preset = "high" + compress_level = 3L, + shuffle = TRUE, + nthreads = 1L ) { resources_qs_new( - preset = preset + compress_level = compress_level, + shuffle = shuffle, + nthreads = nthreads ) } resources_qs_new <- function( - preset = NULL + compress_level = NULL, + shuffle = NULL, + nthreads = NULL ) { out <- new.env(parent = emptyenv(), hash = FALSE) - out$preset <- preset + out$compress_level <- compress_level + out$shuffle <- shuffle + out$nthreads <- nthreads enclass(out, resources_qs_s3_class) } @@ -18,8 +26,15 @@ resources_qs_s3_class <- c("tar_resources_qs", "tar_resources") #' @export resources_validate.tar_resources_qs <- function(resources) { - tar_assert_scalar(resources$preset) - tar_assert_chr(resources$preset) + for (field in c("compress_level", "nthreads")) { + tar_assert_scalar(resources[[field]]) + tar_assert_int(resources[[field]]) + tar_assert_finite(resources[[field]]) + tar_assert_none_na(resources[[field]]) + } + tar_assert_scalar(resources$shuffle) + tar_assert_lgl(resources$shuffle) + tar_assert_none_na(resources$shuffle) } #' @export diff --git a/R/tar_resources.R b/R/tar_resources.R index e47e1d15..42951ba7 100644 --- a/R/tar_resources.R +++ b/R/tar_resources.R @@ -91,9 +91,8 @@ #' Applies to all formats ending with the `"_parquet"` suffix. #' For details on formats, see the `format` argument of [tar_target()]. #' @param qs Output of function `tar_resources_qs()`. -#' Non-default arguments to `qs::qread()` and -#' `qs::qsave()` for `qs`-based storage formats. -#' Applies to all formats ending with the `"_qs"` suffix. +#' Non-default arguments to `qs2::qs_read()` and +#' `qs2::qs_save()` for targets with `format = "qs"`. #' For details on formats, see the `format` argument of [tar_target()]. #' @param repository_cas Output of function [tar_resources_repository_cas()] #' with configuration details for [tar_repository_cas()] storage diff --git a/R/tar_resources_qs.R b/R/tar_resources_qs.R index 9b01674a..43d9ed00 100644 --- a/R/tar_resources_qs.R +++ b/R/tar_resources_qs.R @@ -8,8 +8,15 @@ #' @inheritSection tar_resources Resources #' @return Object of class `"tar_resources_qs"`, to be supplied #' to the qs argument of `tar_resources()`. -#' @param preset Character of length 1, `preset` -#' argument of `qs::qsave()`. Defaults to `"high"`. +#' @param compress_level Positive integer, `compress_level` argument of +#' [qs2::qs_save()] to control the compression level. +#' @param shuffle `TRUE` to use byte shuffling in +#' [qs2::qs_save()] to improve compression at the cost of some +#' computation time, `FALSE` to forgo byte shuffling. +#' @param nthreads Positive integer, number of threads to use for +#' functions in the `qs2` package to save and read the data. +#' @param preset Deprecated in `targets` version 1.8.0.9014 (2024-11-11) +#' and not used. #' @examples #' # Somewhere in you target script file (usually _targets.R): #' tar_target( @@ -21,11 +28,15 @@ #' ) #' ) tar_resources_qs <- function( - preset = targets::tar_option_get("resources")$qs$preset + compress_level = targets::tar_option_get("resources")$qs$compress_level, + shuffle = targets::tar_option_get("resources")$qs$shuffle, + nthreads = targets::tar_option_get("resources")$qs$nthreads, + preset = NULL ) { - preset <- preset %|||% "high" out <- resources_qs_init( - preset = preset + compress_level = compress_level %|||% 3L, + shuffle = shuffle %|||% TRUE, + nthreads = nthreads %|||% 1L ) resources_validate(out) out diff --git a/R/tar_target.R b/R/tar_target.R index 5960206b..52c66755 100644 --- a/R/tar_target.R +++ b/R/tar_target.R @@ -47,9 +47,15 @@ #' existing files (and/or directories), then the format becomes #' `"file"` before [tar_make()] saves the target. Otherwise, #' the format becomes `"qs"`. -#' * `"qs"`: Uses `qs::qsave()` and `qs::qread()`. Should work for -#' most objects, much faster than `"rds"`. Optionally set the -#' preset for `qsave()` through `tar_resources()` and `tar_resources_qs()`. +#' * `"qs"`: Uses `qs2::qs_save()` and `qs2::qs_read()`. Should work for +#' most objects, much faster than `"rds"`. Optionally configure settings +#' through `tar_resources()` and `tar_resources_qs()`. +#' +#' Prior to `targets` version 1.8.0.9014, `format = "qs"` used the `qs` +#' package. `qs` has since been superseded in favor of `qs2`, and so +#' later versions of `targets` use `qs2` to save new data. To read +#' existing data, `targets` first attempts [qs2::qs_read()], and then if +#' that fails, it falls back on [qs::qread()]. #' * `"feather"`: Uses `arrow::write_feather()` and #' `arrow::read_feather()` (version 2.0). Much faster than `"rds"`, #' but the value must be a data frame. Optionally set diff --git a/man/tar_option_set.Rd b/man/tar_option_set.Rd index 1f7d1ff2..8e8cc4b3 100644 --- a/man/tar_option_set.Rd +++ b/man/tar_option_set.Rd @@ -428,9 +428,15 @@ of the target. If the return value is a character vector of existing files (and/or directories), then the format becomes \code{"file"} before \code{\link[=tar_make]{tar_make()}} saves the target. Otherwise, the format becomes \code{"qs"}. -\item \code{"qs"}: Uses \code{qs::qsave()} and \code{qs::qread()}. Should work for -most objects, much faster than \code{"rds"}. Optionally set the -preset for \code{qsave()} through \code{tar_resources()} and \code{tar_resources_qs()}. +\item \code{"qs"}: Uses \code{qs2::qs_save()} and \code{qs2::qs_read()}. Should work for +most objects, much faster than \code{"rds"}. Optionally configure settings +through \code{tar_resources()} and \code{tar_resources_qs()}. + +Prior to \code{targets} version 1.8.0.9014, \code{format = "qs"} used the \code{qs} +package. \code{qs} has since been superseded in favor of \code{qs2}, and so +later versions of \code{targets} use \code{qs2} to save new data. To read +existing data, \code{targets} first attempts \code{\link[qs2:qs_read]{qs2::qs_read()}}, and then if +that fails, it falls back on \code{\link[qs:qread]{qs::qread()}}. \item \code{"feather"}: Uses \code{arrow::write_feather()} and \code{arrow::read_feather()} (version 2.0). Much faster than \code{"rds"}, but the value must be a data frame. Optionally set diff --git a/man/tar_resources.Rd b/man/tar_resources.Rd index b2128ba6..e2f65140 100644 --- a/man/tar_resources.Rd +++ b/man/tar_resources.Rd @@ -96,9 +96,8 @@ Applies to all formats ending with the \code{"_parquet"} suffix. For details on formats, see the \code{format} argument of \code{\link[=tar_target]{tar_target()}}.} \item{qs}{Output of function \code{tar_resources_qs()}. -Non-default arguments to \code{qs::qread()} and -\code{qs::qsave()} for \code{qs}-based storage formats. -Applies to all formats ending with the \code{"_qs"} suffix. +Non-default arguments to \code{qs2::qs_read()} and +\code{qs2::qs_save()} for targets with \code{format = "qs"}. For details on formats, see the \code{format} argument of \code{\link[=tar_target]{tar_target()}}.} \item{repository_cas}{Output of function \code{\link[=tar_resources_repository_cas]{tar_resources_repository_cas()}} diff --git a/man/tar_resources_qs.Rd b/man/tar_resources_qs.Rd index 30d508a2..3d7ec1d6 100644 --- a/man/tar_resources_qs.Rd +++ b/man/tar_resources_qs.Rd @@ -4,11 +4,26 @@ \alias{tar_resources_qs} \title{Target resources: qs storage formats} \usage{ -tar_resources_qs(preset = targets::tar_option_get("resources")$qs$preset) +tar_resources_qs( + compress_level = targets::tar_option_get("resources")$qs$compress_level, + shuffle = targets::tar_option_get("resources")$qs$shuffle, + nthreads = targets::tar_option_get("resources")$qs$nthreads, + preset = NULL +) } \arguments{ -\item{preset}{Character of length 1, \code{preset} -argument of \code{qs::qsave()}. Defaults to \code{"high"}.} +\item{compress_level}{Positive integer, \code{compress_level} argument of +\code{\link[qs2:qs_save]{qs2::qs_save()}} to control the compression level.} + +\item{shuffle}{\code{TRUE} to use byte shuffling in +\code{\link[qs2:qs_save]{qs2::qs_save()}} to improve compression at the cost of some +computation time, \code{FALSE} to forgo byte shuffling.} + +\item{nthreads}{Positive integer, number of threads to use for +functions in the \code{qs2} package to save and read the data.} + +\item{preset}{Deprecated in \code{targets} version 1.8.0.9014 (2024-11-11) +and not used.} } \value{ Object of class \code{"tar_resources_qs"}, to be supplied diff --git a/man/tar_target.Rd b/man/tar_target.Rd index e69eb6c5..ac66cf31 100644 --- a/man/tar_target.Rd +++ b/man/tar_target.Rd @@ -368,9 +368,15 @@ of the target. If the return value is a character vector of existing files (and/or directories), then the format becomes \code{"file"} before \code{\link[=tar_make]{tar_make()}} saves the target. Otherwise, the format becomes \code{"qs"}. -\item \code{"qs"}: Uses \code{qs::qsave()} and \code{qs::qread()}. Should work for -most objects, much faster than \code{"rds"}. Optionally set the -preset for \code{qsave()} through \code{tar_resources()} and \code{tar_resources_qs()}. +\item \code{"qs"}: Uses \code{qs2::qs_save()} and \code{qs2::qs_read()}. Should work for +most objects, much faster than \code{"rds"}. Optionally configure settings +through \code{tar_resources()} and \code{tar_resources_qs()}. + +Prior to \code{targets} version 1.8.0.9014, \code{format = "qs"} used the \code{qs} +package. \code{qs} has since been superseded in favor of \code{qs2}, and so +later versions of \code{targets} use \code{qs2} to save new data. To read +existing data, \code{targets} first attempts \code{\link[qs2:qs_read]{qs2::qs_read()}}, and then if +that fails, it falls back on \code{\link[qs:qread]{qs::qread()}}. \item \code{"feather"}: Uses \code{arrow::write_feather()} and \code{arrow::read_feather()} (version 2.0). Much faster than \code{"rds"}, but the value must be a data frame. Optionally set diff --git a/tests/testthat/test-class_resources_qs.R b/tests/testthat/test-class_resources_qs.R index 564622af..e5809334 100644 --- a/tests/testthat/test-class_resources_qs.R +++ b/tests/testthat/test-class_resources_qs.R @@ -1,15 +1,15 @@ tar_test("create tar_resources_qs object", { - x <- resources_qs_init(preset = "high") + x <- resources_qs_init() expect_silent(resources_validate(x)) }) tar_test("prohibit empty tar_resources_qs object", { - x <- resources_qs_init(preset = NULL) + x <- resources_qs_init(compress_level = NULL) expect_error(resources_validate(x), class = "tar_condition_validate") }) tar_test("print tar_resources_qs object", { - x <- resources_qs_init(preset = "high") + x <- resources_qs_init() out <- utils::capture.output(print(x)) expect_true(any(grepl("tar_resources_qs", out))) }) diff --git a/tests/testthat/test-tar_resources_qs.R b/tests/testthat/test-tar_resources_qs.R index 9681efe3..76749438 100644 --- a/tests/testthat/test-tar_resources_qs.R +++ b/tests/testthat/test-tar_resources_qs.R @@ -3,14 +3,14 @@ tar_test("tar_resources_qs()", { expect_silent(resources_validate(out)) }) -tar_test("tar_resources_qs() default preset", { +tar_test("tar_resources_qs() non-default compression level", { tar_option_set( resources = tar_resources( qs = tar_resources_qs( - preset = "low" + compress_level = 4L ) ) ) out <- tar_resources_qs() - expect_equal(out$preset, "low") + expect_equal(out$compress_level, 4L) })