From 1281be5518b80a68d4cecae9ca3651fa95e86d7d Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 5 Apr 2024 10:09:39 -0400 Subject: [PATCH 01/12] 64-bit hashing --- R/class_pattern.R | 2 +- R/class_value.R | 2 +- R/utils_digest.R | 17 ----------------- tests/testthat/test-class_group.R | 4 ++-- tests/testthat/test-class_list.R | 6 +++--- tests/testthat/test-class_vector.R | 4 ++-- 6 files changed, 9 insertions(+), 26 deletions(-) diff --git a/R/class_pattern.R b/R/class_pattern.R index 8c792bcec..2619202d5 100644 --- a/R/class_pattern.R +++ b/R/class_pattern.R @@ -361,7 +361,7 @@ pattern_combine_niblings_siblings <- function(niblings, siblings) { pattern_name_branches <- function(parent, niblings) { tuples <- do.call(paste, niblings) - suffixes <- map_chr(tuples, digest_chr32) + suffixes <- map_chr(tuples, digest_chr64) paste0(parent, "_", suffixes) } diff --git a/R/class_value.R b/R/class_value.R index 0cd96f80b..945808d88 100644 --- a/R/class_value.R +++ b/R/class_value.R @@ -13,7 +13,7 @@ value_new <- function(object = NULL) { } value_hash_slice <- function(value, index) { - digest_obj32(value_produce_slice_kernel(value, index)) + digest_obj64(value_produce_slice_kernel(value, index)) } value_hash_slices <- function(value) { diff --git a/R/utils_digest.R b/R/utils_digest.R index e4563cc97..2ef1d97c9 100644 --- a/R/utils_digest.R +++ b/R/utils_digest.R @@ -1,7 +1,3 @@ -digest_chr32 <- function(object, ...) { - vdigest32(object, serialize = FALSE, file = FALSE, seed = 0L, ...) -} - digest_chr64 <- function(object, ...) { vdigest64(object, serialize = FALSE, file = FALSE, seed = 0L, ...) } @@ -19,17 +15,6 @@ digest_file64 <- function(object, ...) { ) } -digest_obj32 <- function(object, ...) { - vdigest32( - object = list(object), - serialize = TRUE, - serializeVersion = 3L, - file = FALSE, - seed = 0L, - ... - ) -} - digest_obj64 <- function(object, ...) { vdigest64( object = list(object), @@ -41,8 +26,6 @@ digest_obj64 <- function(object, ...) { ) } -vdigest32 <- digest::getVDigest(algo = "xxhash32") - vdigest64 <- digest::getVDigest(algo = "xxhash64") vdigest64_file <- digest::getVDigest(algo = "xxhash64", errormode = "warn") diff --git a/tests/testthat/test-class_group.R b/tests/testthat/test-class_group.R index 3d76834fa..b10931711 100644 --- a/tests/testthat/test-class_group.R +++ b/tests/testthat/test-class_group.R @@ -33,7 +33,7 @@ tar_test("value_hash_slice(group)", { for (index in seq_len(3)) { exp_object <- object[object$tar_group == index, ] exp_object$tar_group <- NULL - exp <- digest_obj32(exp_object) + exp <- digest_obj64(exp_object) expect_equiv(value_hash_slice(x, index), exp) } }) @@ -47,7 +47,7 @@ tar_test("value_hash_slices(group)", { exp <- map_chr(seq_len(3), function(index) { exp_object <- object[object$tar_group == index, ] exp_object$tar_group <- NULL - digest_obj32(exp_object) + digest_obj64(exp_object) }) expect_equal(value_hash_slices(x), exp) }) diff --git a/tests/testthat/test-class_list.R b/tests/testthat/test-class_list.R index 37d7f7e97..1fb71c217 100644 --- a/tests/testthat/test-class_list.R +++ b/tests/testthat/test-class_list.R @@ -26,14 +26,14 @@ tar_test("value_produce_slice(list)", { tar_test("value_hash_slice(list)", { x <- value_init(object = "abc", iteration = "list") x$object <- data_frame(x = seq_len(26), y = letters) - expect_equal(value_hash_slice(x, 1L), digest_obj32(seq_len(26))) - expect_equal(value_hash_slice(x, 2L), digest_obj32(letters)) + expect_equal(value_hash_slice(x, 1L), digest_obj64(seq_len(26))) + expect_equal(value_hash_slice(x, 2L), digest_obj64(letters)) }) tar_test("value_hash_slices(list)", { x <- value_init(object = "abc", iteration = "list") x$object <- data_frame(x = seq_len(26), y = letters) - exp <- c(digest_obj32(seq_len(26)), digest_obj32(letters)) + exp <- c(digest_obj64(seq_len(26)), digest_obj64(letters)) expect_equal(value_hash_slices(x), exp) }) diff --git a/tests/testthat/test-class_vector.R b/tests/testthat/test-class_vector.R index 87b7dab0d..054706d2d 100644 --- a/tests/testthat/test-class_vector.R +++ b/tests/testthat/test-class_vector.R @@ -21,7 +21,7 @@ tar_test("value_hash_slice(vector)", { x$object <- object for (index in seq_len(nrow(object))) { slice <- vctrs::vec_slice(x = object, i = index) - expect_equal(value_hash_slice(x, index), digest_obj32(slice)) + expect_equal(value_hash_slice(x, index), digest_obj64(slice)) } }) @@ -33,7 +33,7 @@ tar_test("value_hash_slices(vector)", { expect_equal(length(out), 26) for (index in seq_len(nrow(object))) { slice <- vctrs::vec_slice(x = object, i = index) - exp <- digest_obj32(slice) + exp <- digest_obj64(slice) expect_equal(out[index], exp) } }) From ffb43eaf03dbd4d36fdec9b35eed1a6833829753 Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 5 Apr 2024 10:40:45 -0400 Subject: [PATCH 02/12] Rename hashing functions and files --- NAMESPACE | 6 +++--- R/class_aws.R | 2 +- R/class_command.R | 2 +- R/class_file.R | 8 ++++---- R/class_gcp.R | 2 +- R/class_inventory_aws.R | 2 +- R/class_inventory_gcp.R | 2 +- R/class_mermaid.R | 2 +- R/class_meta.R | 2 +- R/class_pattern.R | 6 +++--- R/class_value.R | 2 +- R/tar_cue.R | 2 +- R/{utils_digest.R => utils_hash.R} | 8 ++++---- R/utils_imports.R | 18 +++++++++--------- R/utils_url.R | 2 +- man/tar_cue.Rd | 2 +- tests/testthat/test-class_group.R | 4 ++-- tests/testthat/test-class_list.R | 6 +++--- tests/testthat/test-class_meta.R | 2 +- tests/testthat/test-class_vector.R | 4 ++-- tests/testthat/test-utils_digest.R | 7 ------- tests/testthat/test-utils_hash.R | 7 +++++++ 22 files changed, 49 insertions(+), 49 deletions(-) rename R/{utils_digest.R => utils_hash.R} (77%) delete mode 100644 tests/testthat/test-utils_digest.R create mode 100644 tests/testthat/test-utils_hash.R diff --git a/NAMESPACE b/NAMESPACE index 99ff311a9..34163c096 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,9 +2,9 @@ S3method(future_value_target,condition) S3method(future_value_target,tar_target) -S3method(hash_object,"function") -S3method(hash_object,character) -S3method(hash_object,default) +S3method(hash_import_object,"function") +S3method(hash_import_object,character) +S3method(hash_import_object,default) S3method(imports_init,default) S3method(imports_init,tar_imports) S3method(pipeline_from_list,default) diff --git a/R/class_aws.R b/R/class_aws.R index e8960048b..38cbc7e43 100644 --- a/R/class_aws.R +++ b/R/class_aws.R @@ -258,7 +258,7 @@ store_upload_object_aws <- function(store) { invert = TRUE ) store$file$path <- c(path, paste0("version=", head$VersionId)) - store$file$hash <- digest_chr64(head$ETag) + store$file$hash <- hash_character(head$ETag) invisible() } diff --git a/R/class_command.R b/R/class_command.R index eaf221b0c..f5d2a7792 100644 --- a/R/class_command.R +++ b/R/class_command.R @@ -9,7 +9,7 @@ command_init <- function( expr <- as.expression(expr) deps <- deps %|||% deps_function(embody_expr(expr)) string <- string %|||% mask_pointers(tar_deparse_safe(expr)) - hash <- digest_chr64(string) + hash <- hash_character(string) command_new(expr, packages, library, deps, seed, string, hash) } diff --git a/R/class_file.R b/R/class_file.R index 7de097b33..d1a463884 100644 --- a/R/class_file.R +++ b/R/class_file.R @@ -150,13 +150,13 @@ file_list_files <- function(path) { file_hash <- function(files) { n <- length(files) if (identical(n, 0L)) { - return(null64) + return(hash_null) } - hash <- digest_file64(files) + hash <- hash_file(files) if (identical(n, 1L)) { return(hash) } - digest_chr64(paste(hash, collapse = "")) + hash_character(paste(hash, collapse = "")) } file_info <- function(files) { @@ -189,7 +189,7 @@ file_bytes <- function(info) { } file_size <- function(bytes) { - digest_obj64(bytes) + hash_object(bytes) } file_diff_chr <- function(dbl) { diff --git a/R/class_gcp.R b/R/class_gcp.R index fb83ae75e..513aebf4b 100644 --- a/R/class_gcp.R +++ b/R/class_gcp.R @@ -209,7 +209,7 @@ store_upload_object_gcp <- function(store) { invert = TRUE ) store$file$path <- c(path, paste0("version=", head$generation)) - store$file$hash <- digest_chr64(head$md5) + store$file$hash <- hash_character(head$md5) invisible() } diff --git a/R/class_inventory_aws.R b/R/class_inventory_aws.R index a340c7575..f0c62e038 100644 --- a/R/class_inventory_aws.R +++ b/R/class_inventory_aws.R @@ -42,7 +42,7 @@ inventory_aws_class <- R6::R6Class( ) for (key in names(results)) { name <- self$get_name(key = key, bucket = bucket) - self$cache[[name]] <- digest_chr64(results[[key]]) + self$cache[[name]] <- hash_character(results[[key]]) } } ) diff --git a/R/class_inventory_gcp.R b/R/class_inventory_gcp.R index cd072a4d9..d2b4cc0c2 100644 --- a/R/class_inventory_gcp.R +++ b/R/class_inventory_gcp.R @@ -35,7 +35,7 @@ inventory_gcp_class <- R6::R6Class( ) for (key in names(results)) { name <- self$get_name(key = key, bucket = bucket) - self$cache[[name]] <- digest_chr64(results[[key]]) + self$cache[[name]] <- hash_character(results[[key]]) } } ) diff --git a/R/class_mermaid.R b/R/class_mermaid.R index f0baaa5bf..e8569a54a 100644 --- a/R/class_mermaid.R +++ b/R/class_mermaid.R @@ -122,7 +122,7 @@ mermaid_class <- R6::R6Class( produce_mermaid_vertices = function(data) { sprintf( "%s%s%s%s:::%s", - sprintf("x%s", as.character(map_chr(data$name, digest_chr64))), + sprintf("x%s", as.character(map_chr(data$name, hash_character))), data$open, sprintf("\"%s\"", data$label), data$close, diff --git a/R/class_meta.R b/R/class_meta.R index 5a3b00f68..d20c0e1b9 100644 --- a/R/class_meta.R +++ b/R/class_meta.R @@ -86,7 +86,7 @@ meta_class <- R6::R6Class( ) hashes <- hashes[nzchar(hashes)] string <- paste(c(names(hashes), hashes), collapse = "") - digest_chr64(string) + hash_character(string) }, produce_depend = function(target, pipeline) { self$hash_deps(target$command$deps, pipeline) diff --git a/R/class_pattern.R b/R/class_pattern.R index 2619202d5..9dfd7bae8 100644 --- a/R/class_pattern.R +++ b/R/class_pattern.R @@ -91,7 +91,7 @@ target_branches_over.tar_pattern <- function(target, name) { #' @export target_update_depend.tar_pattern <- function(target, pipeline, meta) { depends <- meta$depends - memory_set_object(depends, target_get_name(target), null64) + memory_set_object(depends, target_get_name(target), hash_null) } #' @export @@ -259,7 +259,7 @@ pattern_priority <- function() { pattern_produce_data_hash <- function(target, pipeline, meta) { hash_branches <- meta$hash_deps(target_get_children(target), pipeline) - digest_chr64(paste(target$settings$iteration, hash_branches)) + hash_character(paste(target$settings$iteration, hash_branches)) } pattern_conclude_initial <- function(target, pipeline, scheduler, meta) { @@ -361,7 +361,7 @@ pattern_combine_niblings_siblings <- function(niblings, siblings) { pattern_name_branches <- function(parent, niblings) { tuples <- do.call(paste, niblings) - suffixes <- map_chr(tuples, digest_chr64) + suffixes <- map_chr(tuples, hash_character) paste0(parent, "_", suffixes) } diff --git a/R/class_value.R b/R/class_value.R index 945808d88..76828514e 100644 --- a/R/class_value.R +++ b/R/class_value.R @@ -13,7 +13,7 @@ value_new <- function(object = NULL) { } value_hash_slice <- function(value, index) { - digest_obj64(value_produce_slice_kernel(value, index)) + hash_object(value_produce_slice_kernel(value, index)) } value_hash_slices <- function(value) { diff --git a/R/tar_cue.R b/R/tar_cue.R index 9d35ce96e..c151bf129 100644 --- a/R/tar_cue.R +++ b/R/tar_cue.R @@ -57,7 +57,7 @@ #' Append the hashes of those dependencies to the string representation #' of the current function. #' 1. Compute the hash of the final string representation using -#' `targets:::digest_chr64()`. +#' `targets:::hash_character()`. #' #' Above, (3) is important because user-defined functions #' have dependencies of their own, such as other user-defined diff --git a/R/utils_digest.R b/R/utils_hash.R similarity index 77% rename from R/utils_digest.R rename to R/utils_hash.R index 2ef1d97c9..20b802906 100644 --- a/R/utils_digest.R +++ b/R/utils_hash.R @@ -1,8 +1,8 @@ -digest_chr64 <- function(object, ...) { +hash_character <- function(object, ...) { vdigest64(object, serialize = FALSE, file = FALSE, seed = 0L, ...) } -digest_file64 <- function(object, ...) { +hash_file <- function(object, ...) { vapply( X = object, FUN = vdigest64_file, @@ -15,7 +15,7 @@ digest_file64 <- function(object, ...) { ) } -digest_obj64 <- function(object, ...) { +hash_object <- function(object, ...) { vdigest64( object = list(object), serialize = TRUE, @@ -30,4 +30,4 @@ vdigest64 <- digest::getVDigest(algo = "xxhash64") vdigest64_file <- digest::getVDigest(algo = "xxhash64", errormode = "warn") -null64 <- digest_obj64(NULL) +hash_null <- hash_object(NULL) diff --git a/R/utils_imports.R b/R/utils_imports.R index 6678b3ad4..00dc02579 100644 --- a/R/utils_imports.R +++ b/R/utils_imports.R @@ -62,31 +62,31 @@ rep_to <- function(index, names, lengths) { hash_import <- function(name, hashes, envir, graph) { value <- base::get(x = name, envir = envir, inherits = FALSE) - hash_object(value, name, hashes, graph) + hash_import_object(value, name, hashes, graph) } -hash_object <- function(value, name, hashes, graph) { - UseMethod("hash_object") +hash_import_object <- function(value, name, hashes, graph) { + UseMethod("hash_import_object") } #' @export -hash_object.character <- function(value, name, hashes, graph) { +hash_import_object.character <- function(value, name, hashes, graph) { base <- paste(value, collapse = " ") - assign(x = name, value = digest_chr64(base), envir = hashes) + assign(x = name, value = hash_character(base), envir = hashes) } #' @export -hash_object.function <- function(value, name, hashes, graph) { +hash_import_object.function <- function(value, name, hashes, graph) { str <- mask_pointers(tar_deparse_safe(value)) deps <- sort_chr( names(igraph::neighbors(graph = graph, v = name, mode = "in")) ) dep_hashes <- unlist(lapply(deps, get_field, collection = hashes)) base <- paste(c(str, dep_hashes), collapse = " ") - assign(x = name, value = digest_chr64(base), envir = hashes) + assign(x = name, value = hash_character(base), envir = hashes) } #' @export -hash_object.default <- function(value, name, hashes, graph) { - assign(x = name, value = digest_obj64(value), envir = hashes) +hash_import_object.default <- function(value, name, hashes, graph) { + assign(x = name, value = hash_object(value), envir = hashes) } diff --git a/R/utils_url.R b/R/utils_url.R index 150eac21d..00a10c28b 100644 --- a/R/utils_url.R +++ b/R/utils_url.R @@ -69,7 +69,7 @@ url_hash <- function( verbose ) { envir <- new.env(parent = emptyenv()) - digest_obj64( + hash_object( lapply( url, url_hash_impl, diff --git a/man/tar_cue.Rd b/man/tar_cue.Rd index cde21c4cb..9e84fb396 100644 --- a/man/tar_cue.Rd +++ b/man/tar_cue.Rd @@ -110,7 +110,7 @@ and global objects that the current function depends on. Append the hashes of those dependencies to the string representation of the current function. \item Compute the hash of the final string representation using -\code{targets:::digest_chr64()}. +\code{targets:::hash_character()}. } Above, (3) is important because user-defined functions diff --git a/tests/testthat/test-class_group.R b/tests/testthat/test-class_group.R index b10931711..f897f1cc1 100644 --- a/tests/testthat/test-class_group.R +++ b/tests/testthat/test-class_group.R @@ -33,7 +33,7 @@ tar_test("value_hash_slice(group)", { for (index in seq_len(3)) { exp_object <- object[object$tar_group == index, ] exp_object$tar_group <- NULL - exp <- digest_obj64(exp_object) + exp <- hash_object(exp_object) expect_equiv(value_hash_slice(x, index), exp) } }) @@ -47,7 +47,7 @@ tar_test("value_hash_slices(group)", { exp <- map_chr(seq_len(3), function(index) { exp_object <- object[object$tar_group == index, ] exp_object$tar_group <- NULL - digest_obj64(exp_object) + hash_object(exp_object) }) expect_equal(value_hash_slices(x), exp) }) diff --git a/tests/testthat/test-class_list.R b/tests/testthat/test-class_list.R index 1fb71c217..7cbd49146 100644 --- a/tests/testthat/test-class_list.R +++ b/tests/testthat/test-class_list.R @@ -26,14 +26,14 @@ tar_test("value_produce_slice(list)", { tar_test("value_hash_slice(list)", { x <- value_init(object = "abc", iteration = "list") x$object <- data_frame(x = seq_len(26), y = letters) - expect_equal(value_hash_slice(x, 1L), digest_obj64(seq_len(26))) - expect_equal(value_hash_slice(x, 2L), digest_obj64(letters)) + expect_equal(value_hash_slice(x, 1L), hash_object(seq_len(26))) + expect_equal(value_hash_slice(x, 2L), hash_object(letters)) }) tar_test("value_hash_slices(list)", { x <- value_init(object = "abc", iteration = "list") x$object <- data_frame(x = seq_len(26), y = letters) - exp <- c(digest_obj64(seq_len(26)), digest_obj64(letters)) + exp <- c(hash_object(seq_len(26)), hash_object(letters)) expect_equal(value_hash_slices(x), exp) }) diff --git a/tests/testthat/test-class_meta.R b/tests/testthat/test-class_meta.R index 1dbb7129f..061b79c0b 100644 --- a/tests/testthat/test-class_meta.R +++ b/tests/testthat/test-class_meta.R @@ -243,7 +243,7 @@ tar_test("data hash of pattern updates", { hash <- data$data[data$name == "map"] expect_equal(length(hash), 1L) expect_false(is.na(hash)) - expect_false(hash == null64) + expect_false(hash == hash_null) pipeline <- pipeline_init( list( target_init( diff --git a/tests/testthat/test-class_vector.R b/tests/testthat/test-class_vector.R index 054706d2d..2f3e60c70 100644 --- a/tests/testthat/test-class_vector.R +++ b/tests/testthat/test-class_vector.R @@ -21,7 +21,7 @@ tar_test("value_hash_slice(vector)", { x$object <- object for (index in seq_len(nrow(object))) { slice <- vctrs::vec_slice(x = object, i = index) - expect_equal(value_hash_slice(x, index), digest_obj64(slice)) + expect_equal(value_hash_slice(x, index), hash_object(slice)) } }) @@ -33,7 +33,7 @@ tar_test("value_hash_slices(vector)", { expect_equal(length(out), 26) for (index in seq_len(nrow(object))) { slice <- vctrs::vec_slice(x = object, i = index) - exp <- digest_obj64(slice) + exp <- hash_object(slice) expect_equal(out[index], exp) } }) diff --git a/tests/testthat/test-utils_digest.R b/tests/testthat/test-utils_digest.R deleted file mode 100644 index d0c50f5c5..000000000 --- a/tests/testthat/test-utils_digest.R +++ /dev/null @@ -1,7 +0,0 @@ -tar_test("digest functions", { - expect_silent(tar_assert_chr(digest_chr64("x"))) - expect_silent(tar_assert_scalar(digest_chr64("x"))) - expect_silent(tar_assert_chr(digest_obj64(0L))) - expect_silent(tar_assert_scalar(digest_obj64(0L))) - expect_error(digest_chr64(0L)) -}) diff --git a/tests/testthat/test-utils_hash.R b/tests/testthat/test-utils_hash.R new file mode 100644 index 000000000..8f53301a2 --- /dev/null +++ b/tests/testthat/test-utils_hash.R @@ -0,0 +1,7 @@ +tar_test("hashing utilities", { + expect_silent(tar_assert_chr(hash_character("x"))) + expect_silent(tar_assert_scalar(hash_character("x"))) + expect_silent(tar_assert_chr(hash_object(0L))) + expect_silent(tar_assert_scalar(hash_object(0L))) + expect_error(hash_character(0L)) +}) From 8539be9fb858d73f1b705afd9ab011c789d643ba Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 5 Apr 2024 12:16:07 -0400 Subject: [PATCH 03/12] switch hashes --- DESCRIPTION | 5 ++--- NAMESPACE | 3 +-- NEWS.md | 4 +++- R/class_aws.R | 2 +- R/class_command.R | 2 +- R/class_file.R | 4 ++-- R/class_gcp.R | 2 +- R/class_inventory_aws.R | 2 +- R/class_inventory_gcp.R | 2 +- R/class_mermaid.R | 2 +- R/class_meta.R | 2 +- R/class_pattern.R | 4 ++-- R/tar_cue.R | 2 +- R/tar_package.R | 3 +-- R/utils_hash.R | 35 ++++++-------------------------- R/utils_imports.R | 4 ++-- man/tar_cue.Rd | 2 +- tests/testthat/test-utils_hash.R | 28 +++++++++++++++++++------ 18 files changed, 50 insertions(+), 58 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7e5588c91..3a918a1c4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,7 +12,7 @@ Description: Pipeline tools coordinate the pieces of computationally The methodology in this package borrows from GNU 'Make' (2015, ISBN:978-9881443519) and 'drake' (2018, ). -Version: 1.6.0.9000 +Version: 1.6.0.9001 License: MIT + file LICENSE URL: https://docs.ropensci.org/targets/, https://github.com/ropensci/targets BugReports: https://github.com/ropensci/targets/issues @@ -60,13 +60,12 @@ Imports: cli (>= 2.0.2), codetools (>= 0.2.16), data.table (>= 1.12.8), - digest (>= 0.6.25), igraph (>= 2.0.0), knitr (>= 1.34), ps, R6 (>= 2.4.1), rlang (>= 1.0.0), - secretbase, + secretbase (>= 0.4.0), stats, tibble (>= 3.0.1), tidyselect (>= 1.1.0), diff --git a/NAMESPACE b/NAMESPACE index 34163c096..fc02a05a6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -532,8 +532,6 @@ importFrom(data.table,fread) importFrom(data.table,fwrite) importFrom(data.table,rbindlist) importFrom(data.table,set) -importFrom(digest,digest) -importFrom(digest,getVDigest) importFrom(igraph,V) importFrom(igraph,adjacent_vertices) importFrom(igraph,as_edgelist) @@ -557,6 +555,7 @@ importFrom(rlang,is_installed) importFrom(rlang,quo_squash) importFrom(rlang,warn) importFrom(secretbase,sha3) +importFrom(secretbase,siphash13) importFrom(stats,complete.cases) importFrom(stats,runif) importFrom(tibble,as_tibble) diff --git a/NEWS.md b/NEWS.md index bd2972612..6a2ae90bd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ -# targets 1.6.0.9000 +# targets 1.6.0.9001 +## Invalidating changes +* Use `secretbase::siphash13()` instead of `digest(algo = "xxhash64", serializationVersion = 3)` so hashes of in-memory objects no longer depend on serialization version 3 headers (#1244, @shikokuchuo). Unfortunately, pipelines built with earlier versions of `targets` will need to rerun. However, `tar_make()` now prompts the user so a decision can be made to continue or downgrade in the interactive case. # targets 1.6.0 diff --git a/R/class_aws.R b/R/class_aws.R index 38cbc7e43..6f61fb277 100644 --- a/R/class_aws.R +++ b/R/class_aws.R @@ -258,7 +258,7 @@ store_upload_object_aws <- function(store) { invert = TRUE ) store$file$path <- c(path, paste0("version=", head$VersionId)) - store$file$hash <- hash_character(head$ETag) + store$file$hash <- hash_object(head$ETag) invisible() } diff --git a/R/class_command.R b/R/class_command.R index f5d2a7792..fa85b95bd 100644 --- a/R/class_command.R +++ b/R/class_command.R @@ -9,7 +9,7 @@ command_init <- function( expr <- as.expression(expr) deps <- deps %|||% deps_function(embody_expr(expr)) string <- string %|||% mask_pointers(tar_deparse_safe(expr)) - hash <- hash_character(string) + hash <- hash_object(string) command_new(expr, packages, library, deps, seed, string, hash) } diff --git a/R/class_file.R b/R/class_file.R index d1a463884..25caaf65b 100644 --- a/R/class_file.R +++ b/R/class_file.R @@ -152,11 +152,11 @@ file_hash <- function(files) { if (identical(n, 0L)) { return(hash_null) } - hash <- hash_file(files) + hash <- map_chr(x = files, f = hash_file, USE.NAMES = FALSE) if (identical(n, 1L)) { return(hash) } - hash_character(paste(hash, collapse = "")) + hash_object(paste(hash, collapse = "")) } file_info <- function(files) { diff --git a/R/class_gcp.R b/R/class_gcp.R index 513aebf4b..5f6cbf4fc 100644 --- a/R/class_gcp.R +++ b/R/class_gcp.R @@ -209,7 +209,7 @@ store_upload_object_gcp <- function(store) { invert = TRUE ) store$file$path <- c(path, paste0("version=", head$generation)) - store$file$hash <- hash_character(head$md5) + store$file$hash <- hash_object(head$md5) invisible() } diff --git a/R/class_inventory_aws.R b/R/class_inventory_aws.R index f0c62e038..646a23238 100644 --- a/R/class_inventory_aws.R +++ b/R/class_inventory_aws.R @@ -42,7 +42,7 @@ inventory_aws_class <- R6::R6Class( ) for (key in names(results)) { name <- self$get_name(key = key, bucket = bucket) - self$cache[[name]] <- hash_character(results[[key]]) + self$cache[[name]] <- hash_object(results[[key]]) } } ) diff --git a/R/class_inventory_gcp.R b/R/class_inventory_gcp.R index d2b4cc0c2..3d3d72a9a 100644 --- a/R/class_inventory_gcp.R +++ b/R/class_inventory_gcp.R @@ -35,7 +35,7 @@ inventory_gcp_class <- R6::R6Class( ) for (key in names(results)) { name <- self$get_name(key = key, bucket = bucket) - self$cache[[name]] <- hash_character(results[[key]]) + self$cache[[name]] <- hash_object(results[[key]]) } } ) diff --git a/R/class_mermaid.R b/R/class_mermaid.R index e8569a54a..dcaffadf1 100644 --- a/R/class_mermaid.R +++ b/R/class_mermaid.R @@ -122,7 +122,7 @@ mermaid_class <- R6::R6Class( produce_mermaid_vertices = function(data) { sprintf( "%s%s%s%s:::%s", - sprintf("x%s", as.character(map_chr(data$name, hash_character))), + sprintf("x%s", as.character(map_chr(data$name, hash_object))), data$open, sprintf("\"%s\"", data$label), data$close, diff --git a/R/class_meta.R b/R/class_meta.R index d20c0e1b9..c908395b2 100644 --- a/R/class_meta.R +++ b/R/class_meta.R @@ -86,7 +86,7 @@ meta_class <- R6::R6Class( ) hashes <- hashes[nzchar(hashes)] string <- paste(c(names(hashes), hashes), collapse = "") - hash_character(string) + hash_object(string) }, produce_depend = function(target, pipeline) { self$hash_deps(target$command$deps, pipeline) diff --git a/R/class_pattern.R b/R/class_pattern.R index 9dfd7bae8..1ef1e8e3a 100644 --- a/R/class_pattern.R +++ b/R/class_pattern.R @@ -259,7 +259,7 @@ pattern_priority <- function() { pattern_produce_data_hash <- function(target, pipeline, meta) { hash_branches <- meta$hash_deps(target_get_children(target), pipeline) - hash_character(paste(target$settings$iteration, hash_branches)) + hash_object(paste(target$settings$iteration, hash_branches)) } pattern_conclude_initial <- function(target, pipeline, scheduler, meta) { @@ -361,7 +361,7 @@ pattern_combine_niblings_siblings <- function(niblings, siblings) { pattern_name_branches <- function(parent, niblings) { tuples <- do.call(paste, niblings) - suffixes <- map_chr(tuples, hash_character) + suffixes <- map_chr(tuples, hash_object) paste0(parent, "_", suffixes) } diff --git a/R/tar_cue.R b/R/tar_cue.R index c151bf129..43aaaa254 100644 --- a/R/tar_cue.R +++ b/R/tar_cue.R @@ -57,7 +57,7 @@ #' Append the hashes of those dependencies to the string representation #' of the current function. #' 1. Compute the hash of the final string representation using -#' `targets:::hash_character()`. +#' `targets:::hash_object()`. #' #' Above, (3) is important because user-defined functions #' have dependencies of their own, such as other user-defined diff --git a/R/tar_package.R b/R/tar_package.R index 98c40c6cb..9c047359d 100644 --- a/R/tar_package.R +++ b/R/tar_package.R @@ -18,7 +18,6 @@ #' num_ansi_colors symbol #' @importFrom codetools findGlobals #' @importFrom data.table data.table fread fwrite rbindlist set -#' @importFrom digest digest getVDigest #' @importFrom igraph adjacent_vertices as_edgelist gorder #' graph_from_data_frame igraph_opt igraph_options is_dag simplify topo_sort #' V @@ -27,7 +26,7 @@ #' @importFrom R6 R6Class #' @importFrom rlang abort as_function check_installed enquo inform #' is_installed quo_squash warn -#' @importFrom secretbase sha3 +#' @importFrom secretbase sha3 siphash13 #' @importFrom stats complete.cases runif #' @importFrom tibble as_tibble #' @importFrom tidyselect all_of any_of contains ends_with everything diff --git a/R/utils_hash.R b/R/utils_hash.R index 20b802906..5fb0460f2 100644 --- a/R/utils_hash.R +++ b/R/utils_hash.R @@ -1,33 +1,10 @@ -hash_character <- function(object, ...) { - vdigest64(object, serialize = FALSE, file = FALSE, seed = 0L, ...) +hash_file <- function(path) { + stopifnot(length(path) == 1L) + secretbase::siphash13(file = path) } -hash_file <- function(object, ...) { - vapply( - X = object, - FUN = vdigest64_file, - serialize = FALSE, - file = TRUE, - seed = 0L, - ..., - FUN.VALUE = character(1L), - USE.NAMES = FALSE - ) +hash_object <- function(object) { + secretbase::siphash13(x = object) } -hash_object <- function(object, ...) { - vdigest64( - object = list(object), - serialize = TRUE, - serializeVersion = 3L, - file = FALSE, - seed = 0L, - ... - ) -} - -vdigest64 <- digest::getVDigest(algo = "xxhash64") - -vdigest64_file <- digest::getVDigest(algo = "xxhash64", errormode = "warn") - -hash_null <- hash_object(NULL) +hash_null <- hash_object(object = NULL) diff --git a/R/utils_imports.R b/R/utils_imports.R index 00dc02579..8e5e06a12 100644 --- a/R/utils_imports.R +++ b/R/utils_imports.R @@ -72,7 +72,7 @@ hash_import_object <- function(value, name, hashes, graph) { #' @export hash_import_object.character <- function(value, name, hashes, graph) { base <- paste(value, collapse = " ") - assign(x = name, value = hash_character(base), envir = hashes) + assign(x = name, value = hash_object(base), envir = hashes) } #' @export @@ -83,7 +83,7 @@ hash_import_object.function <- function(value, name, hashes, graph) { ) dep_hashes <- unlist(lapply(deps, get_field, collection = hashes)) base <- paste(c(str, dep_hashes), collapse = " ") - assign(x = name, value = hash_character(base), envir = hashes) + assign(x = name, value = hash_object(base), envir = hashes) } #' @export diff --git a/man/tar_cue.Rd b/man/tar_cue.Rd index 9e84fb396..312bfc980 100644 --- a/man/tar_cue.Rd +++ b/man/tar_cue.Rd @@ -110,7 +110,7 @@ and global objects that the current function depends on. Append the hashes of those dependencies to the string representation of the current function. \item Compute the hash of the final string representation using -\code{targets:::hash_character()}. +\code{targets:::hash_object()}. } Above, (3) is important because user-defined functions diff --git a/tests/testthat/test-utils_hash.R b/tests/testthat/test-utils_hash.R index 8f53301a2..1d91a5df5 100644 --- a/tests/testthat/test-utils_hash.R +++ b/tests/testthat/test-utils_hash.R @@ -1,7 +1,23 @@ -tar_test("hashing utilities", { - expect_silent(tar_assert_chr(hash_character("x"))) - expect_silent(tar_assert_scalar(hash_character("x"))) - expect_silent(tar_assert_chr(hash_object(0L))) - expect_silent(tar_assert_scalar(hash_object(0L))) - expect_error(hash_character(0L)) +tar_test("hash_object()", { + for (object in list("x", 0L)) { + out <- hash_object(object) + expect_true(is.character(out)) + expect_true(length(out) == 1L) + expect_false(anyNA(out)) + expect_true(nzchar(out)) + } +}) + +tar_test("hash_file()", { + for (object in list("x", 0L)) { + file <- tempfile() + saveRDS(object, file) + out <- hash_file(file) + expect_true(is.character(out)) + expect_true(length(out) == 1L) + expect_false(anyNA(out)) + expect_true(nzchar(out)) + expect_false(out == hash_object(file)) + unlink(file) + } }) From 8173799ab9e88f353a74e709490375dae38a5cc6 Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 5 Apr 2024 12:39:49 -0400 Subject: [PATCH 04/12] type safety in data.table::fread() --- NEWS.md | 4 +++ R/class_crew.R | 4 ++- R/class_database.R | 47 +++++++++++++++++++++++++--- R/class_database_aws.R | 4 +++ R/class_database_gcp.R | 4 +++ R/class_database_local.R | 4 +++ R/class_meta.R | 2 ++ R/class_progress.R | 1 + tests/testthat/test-class_database.R | 27 ++++++++++------ 9 files changed, 82 insertions(+), 15 deletions(-) diff --git a/NEWS.md b/NEWS.md index 6a2ae90bd..6dc5f7408 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,10 @@ * Use `secretbase::siphash13()` instead of `digest(algo = "xxhash64", serializationVersion = 3)` so hashes of in-memory objects no longer depend on serialization version 3 headers (#1244, @shikokuchuo). Unfortunately, pipelines built with earlier versions of `targets` will need to rerun. However, `tar_make()` now prompts the user so a decision can be made to continue or downgrade in the interactive case. +## Other improvements + +* For type safety in the internal database class, read all columns as character vectors in `data.table::fread()`, then convert them to the correct types afterwards. + # targets 1.6.0 * Modernize `extras` in `tar_renv()`. diff --git a/R/class_crew.R b/R/class_crew.R index de61d55bb..6d12ffeb7 100644 --- a/R/class_crew.R +++ b/R/class_crew.R @@ -329,7 +329,9 @@ database_crew <- function(path_store) { database_init( path = file.path(path_meta_dir(path_store), "crew"), subkey = file.path(basename(path_meta("")), "crew"), - header = c("controller", "worker", "seconds", "targets") + header = c("controller", "worker", "seconds", "targets"), + integer_columns = "targets", + numeric_columns = "seconds" ) } diff --git a/R/class_database.R b/R/class_database.R index ed499edd9..0c0b4609c 100644 --- a/R/class_database.R +++ b/R/class_database.R @@ -2,6 +2,8 @@ database_init <- function( path = tempfile(), subkey = basename(tempfile()), header = "name", + integer_columns = character(0L), + numeric_columns = character(0L), list_columns = character(0L), list_column_modes = character(0L), repository = tar_options$get_repository_meta(), @@ -19,6 +21,8 @@ database_init <- function( path = path, key = key, header = header, + integer_columns = integer_columns, + numeric_columns = numeric_columns, list_columns = list_columns, list_column_modes = list_column_modes, resources = resources @@ -28,6 +32,8 @@ database_init <- function( path = path, key = key, header = header, + integer_columns = integer_columns, + numeric_columns = numeric_columns, list_columns = list_columns, list_column_modes = list_column_modes, resources = resources @@ -37,6 +43,8 @@ database_init <- function( path = path, key = key, header = header, + integer_columns = integer_columns, + numeric_columns = numeric_columns, list_columns = list_columns, list_column_modes = list_column_modes, resources = resources @@ -59,6 +67,8 @@ database_class <- R6::R6Class( path = NULL, key = NULL, header = NULL, + integer_columns = NULL, + numeric_columns = NULL, list_columns = NULL, list_column_modes = NULL, resources = NULL, @@ -69,6 +79,8 @@ database_class <- R6::R6Class( path = NULL, key = NULL, header = NULL, + integer_columns = NULL, + numeric_columns = NULL, list_columns = NULL, list_column_modes = NULL, resources = NULL, @@ -78,6 +90,8 @@ database_class <- R6::R6Class( self$path <- path self$key <- key self$header <- header + self$integer_columns <- integer_columns + self$numeric_columns <- numeric_columns self$list_columns <- list_columns self$list_column_modes <- list_column_modes self$resources <- resources @@ -275,9 +289,16 @@ database_class <- R6::R6Class( sep = database_sep_outer, fill = TRUE, na.strings = "", - encoding = encoding + encoding = encoding, + colClasses = "character" ) out <- as_data_frame(out) + for (name in self$integer_columns) { + out[[name]] <- as.integer(out[[name]]) + } + for (name in self$numeric_columns) { + out[[name]] <- as.numeric(out[[name]]) + } if (nrow(out) < 1L) { return(out) } @@ -387,9 +408,17 @@ database_class <- R6::R6Class( invisible() } }, - validate_columns = function(header, list_columns) { - if (!all(list_columns %in% header)) { - tar_throw_validate("all list columns must be in the header") + validate_columns = function( + header, + integer_columns, + numeric_columns, + list_columns + ) { + special_columns <- c(integer_columns, numeric_columns, list_columns) + if (!all(special_columns %in% header)) { + tar_throw_validate( + "all integer/numeric/list columns must be in the header" + ) } if (!is.null(header) && !("name" %in% header)) { tar_throw_validate("header must have a column called \"name\"") @@ -416,7 +445,6 @@ database_class <- R6::R6Class( }, validate = function() { memory_validate(self$memory) - self$validate_columns(self$header, self$list_columns) self$validate_file() tar_assert_chr(self$path) tar_assert_scalar(self$path) @@ -427,7 +455,16 @@ database_class <- R6::R6Class( tar_assert_none_na(self$key) tar_assert_nzchar(self$key) tar_assert_chr(self$header) + tar_assert_chr(self$integer_columns) + tar_assert_chr(self$numeric_columns) tar_assert_chr(self$list_columns) + tar_assert_chr(self$list_column_modes) + self$validate_columns( + self$header, + self$integer_columns, + self$numeric_columns, + self$list_columns + ) } ) ) diff --git a/R/class_database_aws.R b/R/class_database_aws.R index 285160743..0cdde0b33 100644 --- a/R/class_database_aws.R +++ b/R/class_database_aws.R @@ -5,6 +5,8 @@ database_aws_new <- function( path = NULL, key = NULL, header = NULL, + numeric_columns = NULL, + integer_columns = NULL, list_columns = NULL, list_column_modes = NULL, buffer = NULL, @@ -15,6 +17,8 @@ database_aws_new <- function( path = path, key = key, header = header, + numeric_columns = numeric_columns, + integer_columns = integer_columns, list_columns = list_columns, list_column_modes = list_column_modes, buffer = buffer, diff --git a/R/class_database_gcp.R b/R/class_database_gcp.R index c83d90ece..2d9db923e 100644 --- a/R/class_database_gcp.R +++ b/R/class_database_gcp.R @@ -5,6 +5,8 @@ database_gcp_new <- function( path = NULL, key = NULL, header = NULL, + numeric_columns = NULL, + integer_columns = NULL, list_columns = NULL, list_column_modes = NULL, buffer = NULL, @@ -15,6 +17,8 @@ database_gcp_new <- function( path = path, key = key, header = header, + numeric_columns = numeric_columns, + integer_columns = integer_columns, list_columns = list_columns, list_column_modes = list_column_modes, buffer = buffer, diff --git a/R/class_database_local.R b/R/class_database_local.R index 7a34f50d5..5bce4461b 100644 --- a/R/class_database_local.R +++ b/R/class_database_local.R @@ -3,6 +3,8 @@ database_local_new <- function( path = NULL, key = NULL, header = NULL, + integer_columns = NULL, + numeric_columns = NULL, list_columns = NULL, list_column_modes = NULL, resources = NULL, @@ -13,6 +15,8 @@ database_local_new <- function( path = path, key = key, header = header, + integer_columns = integer_columns, + numeric_columns = numeric_columns, list_columns = list_columns, list_column_modes = list_column_modes, resources = resources, diff --git a/R/class_meta.R b/R/class_meta.R index c908395b2..8700029a8 100644 --- a/R/class_meta.R +++ b/R/class_meta.R @@ -160,6 +160,8 @@ database_meta <- function(path_store) { path = path_meta(path_store = path_store), subkey = file.path(basename(path_meta("")), "meta"), header = header_meta(), + integer_columns = "seed", + numeric_columns = c("bytes", "seconds"), list_columns = c("path", "children"), list_column_modes = c("character", "character") ) diff --git a/R/class_progress.R b/R/class_progress.R index 41f57bd32..0f0918774 100644 --- a/R/class_progress.R +++ b/R/class_progress.R @@ -239,6 +239,7 @@ database_progress <- function(path_store) { path = path_progress(path_store = path_store), subkey = file.path(basename(path_meta("")), "progress"), header = header_progress(), + integer_columns = "branches" ) } diff --git a/tests/testthat/test-class_database.R b/tests/testthat/test-class_database.R index d3f539854..6da392293 100644 --- a/tests/testthat/test-class_database.R +++ b/tests/testthat/test-class_database.R @@ -247,17 +247,19 @@ tar_test("database$preprocess() on empty data", { tar_test("database$preprocess()", { path <- tempfile() lines <- c( - "name|col2|col3|col4", - "x|e02|1*2|1", - "e|e12|e13*e14|2", - "e|e22|e23*e24*e25|3", - "f|e32|x|4" + "name|col2|col3|col4|col5", + "x|e02|1*2|1|1", + "e|e12|e13*e14|2|2", + "e|e22|e23*e24*e25|3|3", + "f|e32|x|4|4" ) writeLines(lines, path) db <- database_init( path = path, header = colnames(data), - list_columns = "col3" + list_columns = "col3", + integer_columns = "col4", + numeric_columns = "col5" ) db$preprocess(write = TRUE) out <- readLines(path) @@ -267,12 +269,19 @@ tar_test("database$preprocess()", { name = "e", col2 = "e22", col3 = c("e23", "e24", "e25"), - col4 = 3L + col4 = 3L, + col5 = 3 ) expect_equal(db$get_row("e"), exp) - exp <- list(name = "f", col2 = "e32", col3 = "x", col4 = 4L) + exp <- list(name = "f", col2 = "e32", col3 = "x", col4 = 4L, col5 = 4) expect_equal(db$get_row("f"), exp) - exp <- list(name = "x", col2 = "e02", col3 = c("1", "2"), col4 = 1L) + exp <- list( + name = "x", + col2 = "e02", + col3 = c("1", "2"), + col4 = 1L, + col5 = 1 + ) expect_equal(db$get_row("x"), exp) }) From 0ded6628b29c59ec61d2b44dd13f6c078d98a4b0 Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 5 Apr 2024 12:56:32 -0400 Subject: [PATCH 05/12] More database class safety --- R/class_database.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R/class_database.R b/R/class_database.R index 0c0b4609c..048747685 100644 --- a/R/class_database.R +++ b/R/class_database.R @@ -294,10 +294,16 @@ database_class <- R6::R6Class( ) out <- as_data_frame(out) for (name in self$integer_columns) { - out[[name]] <- as.integer(out[[name]]) + value <- out[[name]] + if (!is.null(value)) { + out[[name]] <- as.integer(value) + } } for (name in self$numeric_columns) { - out[[name]] <- as.numeric(out[[name]]) + value <- out[[name]] + if (!is.null(value)) { + out[[name]] <- as.numeric(value) + } } if (nrow(out) < 1L) { return(out) From 60b50a154ff30a4811ce05882b07c79a33ad341c Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 5 Apr 2024 13:00:24 -0400 Subject: [PATCH 06/12] another hash swap --- tests/testthat/helper-aws.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/helper-aws.R b/tests/testthat/helper-aws.R index 101fbb87b..b85c1d158 100644 --- a/tests/testthat/helper-aws.R +++ b/tests/testthat/helper-aws.R @@ -11,7 +11,7 @@ random_bucket_name <- function() { paste0( "targets-test-bucket-", substr( - digest::digest(tempfile(), algo = "sha256"), + secretbase::siphash13(tempfile()), start = 0, stop = 43 ) From c8165b4db332dd543fac9dae378d01f71c33b470 Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 5 Apr 2024 13:42:08 -0400 Subject: [PATCH 07/12] Add menu/message on pkg upgrade that causes pipelines to rerun --- R/utils_assert.R | 42 +++++++++++++++++++ R/utils_callr.R | 14 ++++++- .../test-tar_assert_store_noninvalidating.R | 19 +++++++++ 3 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 tests/interactive/test-tar_assert_store_noninvalidating.R diff --git a/R/utils_assert.R b/R/utils_assert.R index 3018100aa..cdb771dec 100644 --- a/R/utils_assert.R +++ b/R/utils_assert.R @@ -587,6 +587,48 @@ tar_assert_store <- function(store) { ) } +# Tested in tests/interactive/test-tar_assert_store_noninvalidating.R +# nocov start +tar_assert_store_noninvalidating <- function(store, threshold, prompt) { + process <- tar_process( + names = tidyselect::any_of("version_targets"), + store = store + ) + version_old <- process$value + if (!length(version_old)) { + return() + } + version_current <- as.character(utils::packageVersion("targets")) + if (utils::compareVersion(a = version_old, b = threshold) > 0L) { + return() + } + tar_message_run( + "You are running {targets} version ", + version_current, + ", and the pipeline was last run with version ", + version_old, + ". Just after version ", + threshold, + ", {targets} made changes that cause the targets in old pipelines ", + "to rerun. For details, please see ", + "https://github.com/ropensci/targets/blob/main/NEWS.md. Sorry for the ", + "inconvenience. As a workaround, you can either rerun this pipeline ", + "from scratch, or you can stop/interrupt the pipeline and downgrade ", + "to {targets} version ", + threshold, + " to keep your work up to date in the short term." + ) + choice <- NULL + if (prompt) { + choice <- utils::menu( + title = "\nStop the pipeline?", + choices = c("Yes", "No") + ) + } + choice +} +# nocov end + #' @export #' @rdname tar_assert tar_assert_target <- function(x, msg = NULL) { diff --git a/R/utils_callr.R b/R/utils_callr.R index f9105f2ca..38d83721e 100644 --- a/R/utils_callr.R +++ b/R/utils_callr.R @@ -8,10 +8,22 @@ callr_outer <- function( store, fun ) { + tar_assert_script(script) tar_assert_scalar(store) tar_assert_chr(store) tar_assert_nzchar(store) - tar_assert_script(script) + choice <- tar_assert_store_noninvalidating( + store, + threshold = "1.6.0", + prompt = grepl("^tar_make", fun) + ) + # Tested in tests/interactive/test-tar_assert_store_noninvalidating.R + # nocov start + if (identical(choice, 1L)) { + tar_message_run("Pipeline stopped.") + return(invisible()) + } + # nocov end out <- callr_dispatch( targets_function = targets_function, targets_arguments = targets_arguments, diff --git a/tests/interactive/test-tar_assert_store_noninvalidating.R b/tests/interactive/test-tar_assert_store_noninvalidating.R new file mode 100644 index 000000000..5431200c2 --- /dev/null +++ b/tests/interactive/test-tar_assert_store_noninvalidating.R @@ -0,0 +1,19 @@ +tar_test("tar_assert_store_noninvalidating()", { + rstudioapi::restartSession() + # remotes::install_version("targets", version = "1.6.0") # nolint + rstudioapi::restartSession() + targets::tar_script(tar_target(x, 1)) + targets::tar_make() + rstudioapi::restartSession() + # pkgload::load_all() # nolint + expect_equal(tar_outdated(callr_function = NULL), "x") + expect_message( + tar_outdated(callr_function = NULL), + class = "tar_condition_run" + ) + tar_make() # Select 1. Should stop pipeline. + tar_make(callr_function = NULL) # Select 2. Should rerun pipeline. + expect_equal(tar_outdated(callr_function = NULL), character(0L)) + expect_silent(tar_outdated(callr_function = NULL)) + expect_silent(tar_make(callr_function = NULL, reporter = "silent")) +}) From 98490c1af0937cfb6c8cf4ce9374e3ffdcdeb20f Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 5 Apr 2024 13:45:22 -0400 Subject: [PATCH 08/12] Fix #1244 --- NAMESPACE | 1 + NEWS.md | 3 ++- R/tar_package.R | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index fc02a05a6..1f484dff5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -572,6 +572,7 @@ importFrom(tidyselect,starts_with) importFrom(tools,file_path_sans_ext) importFrom(utils,browseURL) importFrom(utils,capture.output) +importFrom(utils,compareVersion) importFrom(utils,data) importFrom(utils,globalVariables) importFrom(utils,head) diff --git a/NEWS.md b/NEWS.md index 6dc5f7408..1b49f5702 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,10 +2,11 @@ ## Invalidating changes -* Use `secretbase::siphash13()` instead of `digest(algo = "xxhash64", serializationVersion = 3)` so hashes of in-memory objects no longer depend on serialization version 3 headers (#1244, @shikokuchuo). Unfortunately, pipelines built with earlier versions of `targets` will need to rerun. However, `tar_make()` now prompts the user so a decision can be made to continue or downgrade in the interactive case. +* Use `secretbase::siphash13()` instead of `digest(algo = "xxhash64", serializationVersion = 3)` so hashes of in-memory objects no longer depend on serialization version 3 headers (#1244, @shikokuchuo). Unfortunately, pipelines built with earlier versions of `targets` will need to rerun. ## Other improvements +* Inform and prompt the user when the pipeline was built with an old version of `targets` and changes to the package will cause the current work to rerun (#1244). For the `tar_make*()` functions, `utils::menu()` prompts the user to give people a chance to downgrade if necessary. * For type safety in the internal database class, read all columns as character vectors in `data.table::fread()`, then convert them to the correct types afterwards. # targets 1.6.0 diff --git a/R/tar_package.R b/R/tar_package.R index 9c047359d..948593333 100644 --- a/R/tar_package.R +++ b/R/tar_package.R @@ -32,8 +32,8 @@ #' @importFrom tidyselect all_of any_of contains ends_with everything #' last_col matches num_range one_of starts_with #' @importFrom tools file_path_sans_ext -#' @importFrom utils browseURL capture.output data globalVariables head menu -#' packageVersion stack tail +#' @importFrom utils browseURL capture.output compareVersion data +#' globalVariables head menu packageVersion stack tail #' @importFrom vctrs vec_c vec_rbind vec_size vec_slice #' @importFrom yaml read_yaml NULL From 27af7f850176f0fc15622b2731c0208ea03eab6c Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 5 Apr 2024 13:53:23 -0400 Subject: [PATCH 09/12] Migrate cloud tests --- tests/aws/test-class_inventory_aws.R | 2 +- tests/gcp/test-class_inventory_gcp.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/aws/test-class_inventory_aws.R b/tests/aws/test-class_inventory_aws.R index e6970f33a..62105aed7 100644 --- a/tests/aws/test-class_inventory_aws.R +++ b/tests/aws/test-class_inventory_aws.R @@ -32,7 +32,7 @@ tar_test("inventory_aws class", { out <- inventory$get_cache(store) expect_equal(inventory$misses, 1L) expect_equal(inventory$downloads, 1L) - expect_equal(out, digest_chr64(head[[key]]$ETag)) + expect_equal(out, hash_object(head[[key]]$ETag)) expect_equal( sort(inventory$list_cache()), sort( diff --git a/tests/gcp/test-class_inventory_gcp.R b/tests/gcp/test-class_inventory_gcp.R index 76112c5ed..57e7e6fdf 100644 --- a/tests/gcp/test-class_inventory_gcp.R +++ b/tests/gcp/test-class_inventory_gcp.R @@ -36,7 +36,7 @@ tar_test("inventory_gcp class", { out <- inventory$get_cache(store) expect_equal(inventory$misses, 1L) expect_equal(inventory$downloads, 1L) - expect_equal(out, digest_chr64(head[[key]]$md5)) + expect_equal(out, hash_object(head[[key]]$md5)) expect_equal( sort(inventory$list_cache()), sort( From 9052f71be4faec39fb2a32683f1bc95c8fb9d955 Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 5 Apr 2024 14:16:37 -0400 Subject: [PATCH 10/12] logical db columns --- R/class_database.R | 25 +++++++- R/class_database_aws.R | 6 +- R/class_database_gcp.R | 6 +- R/class_database_local.R | 2 + tests/testthat/test-class_database.R | 90 ++++++++++++++++------------ 5 files changed, 85 insertions(+), 44 deletions(-) diff --git a/R/class_database.R b/R/class_database.R index 048747685..a5c9e8dec 100644 --- a/R/class_database.R +++ b/R/class_database.R @@ -2,6 +2,7 @@ database_init <- function( path = tempfile(), subkey = basename(tempfile()), header = "name", + logical_columns = character(0L), integer_columns = character(0L), numeric_columns = character(0L), list_columns = character(0L), @@ -21,6 +22,7 @@ database_init <- function( path = path, key = key, header = header, + logical_columns = logical_columns, integer_columns = integer_columns, numeric_columns = numeric_columns, list_columns = list_columns, @@ -32,6 +34,7 @@ database_init <- function( path = path, key = key, header = header, + logical_columns = logical_columns, integer_columns = integer_columns, numeric_columns = numeric_columns, list_columns = list_columns, @@ -43,6 +46,7 @@ database_init <- function( path = path, key = key, header = header, + logical_columns = logical_columns, integer_columns = integer_columns, numeric_columns = numeric_columns, list_columns = list_columns, @@ -67,6 +71,7 @@ database_class <- R6::R6Class( path = NULL, key = NULL, header = NULL, + logical_columns = NULL, integer_columns = NULL, numeric_columns = NULL, list_columns = NULL, @@ -79,6 +84,7 @@ database_class <- R6::R6Class( path = NULL, key = NULL, header = NULL, + logical_columns = NULL, integer_columns = NULL, numeric_columns = NULL, list_columns = NULL, @@ -90,6 +96,7 @@ database_class <- R6::R6Class( self$path <- path self$key <- key self$header <- header + self$logical_columns <- logical_columns self$integer_columns <- integer_columns self$numeric_columns <- numeric_columns self$list_columns <- list_columns @@ -293,6 +300,12 @@ database_class <- R6::R6Class( colClasses = "character" ) out <- as_data_frame(out) + for (name in self$logical_columns) { + value <- out[[name]] + if (!is.null(value)) { + out[[name]] <- as.logical(value) + } + } for (name in self$integer_columns) { value <- out[[name]] if (!is.null(value)) { @@ -416,14 +429,20 @@ database_class <- R6::R6Class( }, validate_columns = function( header, + logical_columns, integer_columns, numeric_columns, list_columns ) { - special_columns <- c(integer_columns, numeric_columns, list_columns) + special_columns <- c( + logical_columns, + integer_columns, + numeric_columns, + list_columns + ) if (!all(special_columns %in% header)) { tar_throw_validate( - "all integer/numeric/list columns must be in the header" + "all logical/integer/numeric/list columns must be in the header" ) } if (!is.null(header) && !("name" %in% header)) { @@ -461,12 +480,14 @@ database_class <- R6::R6Class( tar_assert_none_na(self$key) tar_assert_nzchar(self$key) tar_assert_chr(self$header) + tar_assert_chr(self$logical_columns) tar_assert_chr(self$integer_columns) tar_assert_chr(self$numeric_columns) tar_assert_chr(self$list_columns) tar_assert_chr(self$list_column_modes) self$validate_columns( self$header, + self$logical_columns, self$integer_columns, self$numeric_columns, self$list_columns diff --git a/R/class_database_aws.R b/R/class_database_aws.R index 0cdde0b33..f8db244a2 100644 --- a/R/class_database_aws.R +++ b/R/class_database_aws.R @@ -5,8 +5,9 @@ database_aws_new <- function( path = NULL, key = NULL, header = NULL, - numeric_columns = NULL, + logical_columns = NULL, integer_columns = NULL, + numeric_columns = NULL, list_columns = NULL, list_column_modes = NULL, buffer = NULL, @@ -17,8 +18,9 @@ database_aws_new <- function( path = path, key = key, header = header, - numeric_columns = numeric_columns, + logical_columns = logical_columns, integer_columns = integer_columns, + numeric_columns = numeric_columns, list_columns = list_columns, list_column_modes = list_column_modes, buffer = buffer, diff --git a/R/class_database_gcp.R b/R/class_database_gcp.R index 2d9db923e..4350359b5 100644 --- a/R/class_database_gcp.R +++ b/R/class_database_gcp.R @@ -5,8 +5,9 @@ database_gcp_new <- function( path = NULL, key = NULL, header = NULL, - numeric_columns = NULL, + logical_columns = NULL, integer_columns = NULL, + numeric_columns = NULL, list_columns = NULL, list_column_modes = NULL, buffer = NULL, @@ -17,8 +18,9 @@ database_gcp_new <- function( path = path, key = key, header = header, - numeric_columns = numeric_columns, + logical_columns = logical_columns, integer_columns = integer_columns, + numeric_columns = numeric_columns, list_columns = list_columns, list_column_modes = list_column_modes, buffer = buffer, diff --git a/R/class_database_local.R b/R/class_database_local.R index 5bce4461b..9a5da88e8 100644 --- a/R/class_database_local.R +++ b/R/class_database_local.R @@ -3,6 +3,7 @@ database_local_new <- function( path = NULL, key = NULL, header = NULL, + logical_columns = NULL, integer_columns = NULL, numeric_columns = NULL, list_columns = NULL, @@ -15,6 +16,7 @@ database_local_new <- function( path = path, key = key, header = header, + logical_columns = logical_columns, integer_columns = integer_columns, numeric_columns = numeric_columns, list_columns = list_columns, diff --git a/tests/testthat/test-class_database.R b/tests/testthat/test-class_database.R index 6da392293..9dbff715a 100644 --- a/tests/testthat/test-class_database.R +++ b/tests/testthat/test-class_database.R @@ -118,7 +118,7 @@ tar_test("database$overwrite_storage()", { col2 = c("e12", "e22"), col3 = list(c("e13", "e14"), c("e23", "e24", "e25")) ) - + db <- database_init(header = c("name", "col2", "col3")) exp <- c( "name|col2|col3", @@ -244,45 +244,59 @@ tar_test("database$preprocess() on empty data", { expect_equal(readLines(db$path), "name|col3") }) -tar_test("database$preprocess()", { +tar_test("database$preprocess() on different column types", { path <- tempfile() + on.exit(unlink(path)) lines <- c( - "name|col2|col3|col4|col5", - "x|e02|1*2|1|1", - "e|e12|e13*e14|2|2", - "e|e22|e23*e24*e25|3|3", - "f|e32|x|4|4" - ) - writeLines(lines, path) - db <- database_init( - path = path, - header = colnames(data), - list_columns = "col3", - integer_columns = "col4", - numeric_columns = "col5" - ) - db$preprocess(write = TRUE) - out <- readLines(path) - expect_equal(out, lines[-3]) - expect_equal(sort(db$memory$names), sort(c("e", "f", "x"))) - exp <- list( - name = "e", - col2 = "e22", - col3 = c("e23", "e24", "e25"), - col4 = 3L, - col5 = 3 - ) - expect_equal(db$get_row("e"), exp) - exp <- list(name = "f", col2 = "e32", col3 = "x", col4 = 4L, col5 = 4) - expect_equal(db$get_row("f"), exp) - exp <- list( - name = "x", - col2 = "e02", - col3 = c("1", "2"), - col4 = 1L, - col5 = 1 - ) - expect_equal(db$get_row("x"), exp) + "name|col2|col3|col4|col5|col6", + "x|e02|1*2|1|1|TRUE", + "e|e12|e13*e14|2|2|FALSE", + "e|e22|e23*e24*e25|3|3|TRUE", + "f|e32|x|4|4|FALSE" + ) + for (repository in c("local", "aws", "gcp")) { + writeLines(lines, path) + db <- database_init( + path = path, + header = colnames(data), + list_columns = "col3", + logical_columns = "col6", + integer_columns = "col4", + numeric_columns = "col5", + repository = repository + ) + db$preprocess(write = TRUE) + out <- readLines(path) + expect_equal(out, lines[-3]) + expect_equal(sort(db$memory$names), sort(c("e", "f", "x"))) + exp <- list( + name = "e", + col2 = "e22", + col3 = c("e23", "e24", "e25"), + col4 = 3L, + col5 = 3, + col6 = TRUE + ) + expect_equal(db$get_row("e"), exp) + exp <- list( + name = "f", + col2 = "e32", + col3 = "x", + col4 = 4L, + col5 = 4, + col6 = FALSE + ) + expect_equal(db$get_row("f"), exp) + exp <- list( + name = "x", + col2 = "e02", + col3 = c("1", "2"), + col4 = 1L, + col5 = 1, + col6 = TRUE + ) + expect_equal(db$get_row("x"), exp) + } }) tar_test("database$write_row()", { From ed5bc5d8204900cfabeca431e0c238177971db99 Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 5 Apr 2024 14:19:25 -0400 Subject: [PATCH 11/12] space --- tests/testthat/test-class_database.R | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/testthat/test-class_database.R b/tests/testthat/test-class_database.R index 9dbff715a..ba46be69e 100644 --- a/tests/testthat/test-class_database.R +++ b/tests/testthat/test-class_database.R @@ -118,7 +118,6 @@ tar_test("database$overwrite_storage()", { col2 = c("e12", "e22"), col3 = list(c("e13", "e14"), c("e23", "e24", "e25")) ) - db <- database_init(header = c("name", "col2", "col3")) exp <- c( "name|col2|col3", From 11076278d27d82c356da0cf4d2a7fea9207e19b8 Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 5 Apr 2024 14:42:49 -0400 Subject: [PATCH 12/12] more cleanup --- codemeta.json | 88 ++++++++++++++++------------ tests/testthat/test-class_imports.R | 10 ++-- tests/testthat/test-utils_packages.R | 2 +- 3 files changed, 56 insertions(+), 44 deletions(-) diff --git a/codemeta.json b/codemeta.json index 7c789a377..17fac8e50 100644 --- a/codemeta.json +++ b/codemeta.json @@ -8,13 +8,13 @@ "codeRepository": "https://github.com/ropensci/targets", "issueTracker": "https://github.com/ropensci/targets/issues", "license": "https://spdx.org/licenses/MIT", - "version": "1.3.2.9002", + "version": "1.6.0.9001", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", "url": "https://r-project.org" }, - "runtimePlatform": "R version 4.3.0 (2023-04-21)", + "runtimePlatform": "R version 4.3.2 (2023-10-31)", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", @@ -60,19 +60,6 @@ } ], "softwareSuggestions": [ - { - "@type": "SoftwareApplication", - "identifier": "arrow", - "name": "arrow", - "version": ">= 3.0.0", - "provider": { - "@id": "https://cran.r-project.org", - "@type": "Organization", - "name": "Comprehensive R Archive Network (CRAN)", - "url": "https://cran.r-project.org" - }, - "sameAs": "https://CRAN.R-project.org/package=arrow" - }, { "@type": "SoftwareApplication", "identifier": "bs4Dash", @@ -90,7 +77,7 @@ "@type": "SoftwareApplication", "identifier": "clustermq", "name": "clustermq", - "version": ">= 0.9.1", + "version": ">= 0.9.2", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", @@ -103,7 +90,7 @@ "@type": "SoftwareApplication", "identifier": "crew", "name": "crew", - "version": ">= 0.6.0", + "version": ">= 0.9.0", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", @@ -272,7 +259,7 @@ "@type": "SoftwareApplication", "identifier": "nanonext", "name": "nanonext", - "version": ">= 0.9.0", + "version": ">= 0.12.0", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", @@ -307,11 +294,24 @@ }, "sameAs": "https://CRAN.R-project.org/package=parallelly" }, + { + "@type": "SoftwareApplication", + "identifier": "paws.common", + "name": "paws.common", + "version": ">= 0.6.4", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=paws.common" + }, { "@type": "SoftwareApplication", "identifier": "paws.storage", "name": "paws.storage", - "version": ">= 0.2.0", + "version": ">= 0.4.0", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", @@ -577,42 +577,41 @@ }, "7": { "@type": "SoftwareApplication", - "identifier": "digest", - "name": "digest", - "version": ">= 0.6.25", + "identifier": "igraph", + "name": "igraph", + "version": ">= 2.0.0", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", "name": "Comprehensive R Archive Network (CRAN)", "url": "https://cran.r-project.org" }, - "sameAs": "https://CRAN.R-project.org/package=digest" + "sameAs": "https://CRAN.R-project.org/package=igraph" }, "8": { "@type": "SoftwareApplication", - "identifier": "igraph", - "name": "igraph", - "version": ">= 1.2.5", + "identifier": "knitr", + "name": "knitr", + "version": ">= 1.34", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", "name": "Comprehensive R Archive Network (CRAN)", "url": "https://cran.r-project.org" }, - "sameAs": "https://CRAN.R-project.org/package=igraph" + "sameAs": "https://CRAN.R-project.org/package=knitr" }, "9": { "@type": "SoftwareApplication", - "identifier": "knitr", - "name": "knitr", - "version": ">= 1.34", + "identifier": "ps", + "name": "ps", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", "name": "Comprehensive R Archive Network (CRAN)", "url": "https://cran.r-project.org" }, - "sameAs": "https://CRAN.R-project.org/package=knitr" + "sameAs": "https://CRAN.R-project.org/package=ps" }, "10": { "@type": "SoftwareApplication", @@ -641,11 +640,24 @@ "sameAs": "https://CRAN.R-project.org/package=rlang" }, "12": { + "@type": "SoftwareApplication", + "identifier": "secretbase", + "name": "secretbase", + "version": ">= 0.4.0", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=secretbase" + }, + "13": { "@type": "SoftwareApplication", "identifier": "stats", "name": "stats" }, - "13": { + "14": { "@type": "SoftwareApplication", "identifier": "tibble", "name": "tibble", @@ -658,7 +670,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=tibble" }, - "14": { + "15": { "@type": "SoftwareApplication", "identifier": "tidyselect", "name": "tidyselect", @@ -671,17 +683,17 @@ }, "sameAs": "https://CRAN.R-project.org/package=tidyselect" }, - "15": { + "16": { "@type": "SoftwareApplication", "identifier": "tools", "name": "tools" }, - "16": { + "17": { "@type": "SoftwareApplication", "identifier": "utils", "name": "utils" }, - "17": { + "18": { "@type": "SoftwareApplication", "identifier": "vctrs", "name": "vctrs", @@ -694,7 +706,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=vctrs" }, - "18": { + "19": { "@type": "SoftwareApplication", "identifier": "yaml", "name": "yaml", @@ -709,7 +721,7 @@ }, "SystemRequirements": null }, - "fileSize": "2565.143KB", + "fileSize": "2608.25KB", "citation": [ { "@type": "ScholarlyArticle", diff --git a/tests/testthat/test-class_imports.R b/tests/testthat/test-class_imports.R index 180fd101e..7c85d991f 100644 --- a/tests/testthat/test-class_imports.R +++ b/tests/testthat/test-class_imports.R @@ -41,23 +41,23 @@ tar_test("imports_set_datasets()", { }) tar_test("imports_init()", { - tar_option_set(imports = c("utils", "digest")) + tar_option_set(imports = c("utils", "secretbase")) envir <- new.env(parent = emptyenv()) envir$head <- "abc" expect_null(envir$tail) - expect_null(envir$digest) + expect_null(envir$siphash13) imports <- imports_init(envir) expect_equal(imports$head, "abc") expect_true(is.function(imports$tail)) - expect_true(is.function(imports$digest)) + expect_true(is.function(imports$siphash13)) expect_null(envir$tail) - expect_null(envir$digest) + expect_null(envir$siphash13) expect_true(inherits(imports, "tar_imports")) expect_false(inherits(envir, "tar_imports")) }) tar_test("imports_init() idempotence", { - tar_option_set(imports = c("utils", "digest")) + tar_option_set(imports = c("utils", "secretbase")) imports <- imports_init(imports_new(new.env(parent = emptyenv()))) expect_true(inherits(imports, "tar_imports")) expect_equal(length(imports), 0L) diff --git a/tests/testthat/test-utils_packages.R b/tests/testthat/test-utils_packages.R index 1550f9f90..334eeaaf7 100644 --- a/tests/testthat/test-utils_packages.R +++ b/tests/testthat/test-utils_packages.R @@ -1,5 +1,5 @@ tar_test("load packages", { - expect_silent(load_packages(packages = "digest", library = NULL)) + expect_silent(load_packages(packages = "secretbase", library = NULL)) expect_error( suppressWarnings( load_packages(packages = "does;not;exist", library = NULL)