Skip to content

Commit

Permalink
Merge pull request #1262 from ropensci/1244
Browse files Browse the repository at this point in the history
Switch to secretbase::siphash13()
  • Loading branch information
wlandau authored Apr 5, 2024
2 parents d71b987 + e89878b commit fd96c4e
Show file tree
Hide file tree
Showing 42 changed files with 363 additions and 188 deletions.
5 changes: 2 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Description: Pipeline tools coordinate the pieces of computationally
The methodology in this package
borrows from GNU 'Make' (2015, ISBN:978-9881443519)
and 'drake' (2018, <doi:10.21105/joss.00550>).
Version: 1.6.0.9000
Version: 1.6.0.9001
License: MIT + file LICENSE
URL: https://docs.ropensci.org/targets/, https://github.com/ropensci/targets
BugReports: https://github.com/ropensci/targets/issues
Expand Down Expand Up @@ -60,13 +60,12 @@ Imports:
cli (>= 2.0.2),
codetools (>= 0.2.16),
data.table (>= 1.12.8),
digest (>= 0.6.25),
igraph (>= 2.0.0),
knitr (>= 1.34),
ps,
R6 (>= 2.4.1),
rlang (>= 1.0.0),
secretbase,
secretbase (>= 0.4.0),
stats,
tibble (>= 3.0.1),
tidyselect (>= 1.1.0),
Expand Down
10 changes: 5 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

S3method(future_value_target,condition)
S3method(future_value_target,tar_target)
S3method(hash_object,"function")
S3method(hash_object,character)
S3method(hash_object,default)
S3method(hash_import_object,"function")
S3method(hash_import_object,character)
S3method(hash_import_object,default)
S3method(imports_init,default)
S3method(imports_init,tar_imports)
S3method(pipeline_from_list,default)
Expand Down Expand Up @@ -532,8 +532,6 @@ importFrom(data.table,fread)
importFrom(data.table,fwrite)
importFrom(data.table,rbindlist)
importFrom(data.table,set)
importFrom(digest,digest)
importFrom(digest,getVDigest)
importFrom(igraph,V)
importFrom(igraph,adjacent_vertices)
importFrom(igraph,as_edgelist)
Expand All @@ -557,6 +555,7 @@ importFrom(rlang,is_installed)
importFrom(rlang,quo_squash)
importFrom(rlang,warn)
importFrom(secretbase,sha3)
importFrom(secretbase,siphash13)
importFrom(stats,complete.cases)
importFrom(stats,runif)
importFrom(tibble,as_tibble)
Expand All @@ -573,6 +572,7 @@ importFrom(tidyselect,starts_with)
importFrom(tools,file_path_sans_ext)
importFrom(utils,browseURL)
importFrom(utils,capture.output)
importFrom(utils,compareVersion)
importFrom(utils,data)
importFrom(utils,globalVariables)
importFrom(utils,head)
Expand Down
9 changes: 8 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# targets 1.6.0.9000
# targets 1.6.0.9001

## Invalidating changes

* Use `secretbase::siphash13()` instead of `digest(algo = "xxhash64", serializationVersion = 3)` so hashes of in-memory objects no longer depend on serialization version 3 headers (#1244, @shikokuchuo). Unfortunately, pipelines built with earlier versions of `targets` will need to rerun.

## Other improvements

* Inform and prompt the user when the pipeline was built with an old version of `targets` and changes to the package will cause the current work to rerun (#1244). For the `tar_make*()` functions, `utils::menu()` prompts the user to give people a chance to downgrade if necessary.
* For type safety in the internal database class, read all columns as character vectors in `data.table::fread()`, then convert them to the correct types afterwards.

# targets 1.6.0

Expand Down
2 changes: 1 addition & 1 deletion R/class_aws.R
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ store_upload_object_aws <- function(store) {
invert = TRUE
)
store$file$path <- c(path, paste0("version=", head$VersionId))
store$file$hash <- digest_chr64(head$ETag)
store$file$hash <- hash_object(head$ETag)
invisible()
}

Expand Down
2 changes: 1 addition & 1 deletion R/class_command.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ command_init <- function(
expr <- as.expression(expr)
deps <- deps %|||% deps_function(embody_expr(expr))
string <- string %|||% mask_pointers(tar_deparse_safe(expr))
hash <- digest_chr64(string)
hash <- hash_object(string)
command_new(expr, packages, library, deps, seed, string, hash)
}

Expand Down
4 changes: 3 additions & 1 deletion R/class_crew.R
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,9 @@ database_crew <- function(path_store) {
database_init(
path = file.path(path_meta_dir(path_store), "crew"),
subkey = file.path(basename(path_meta("")), "crew"),
header = c("controller", "worker", "seconds", "targets")
header = c("controller", "worker", "seconds", "targets"),
integer_columns = "targets",
numeric_columns = "seconds"
)
}

Expand Down
74 changes: 69 additions & 5 deletions R/class_database.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ database_init <- function(
path = tempfile(),
subkey = basename(tempfile()),
header = "name",
logical_columns = character(0L),
integer_columns = character(0L),
numeric_columns = character(0L),
list_columns = character(0L),
list_column_modes = character(0L),
repository = tar_options$get_repository_meta(),
Expand All @@ -19,6 +22,9 @@ database_init <- function(
path = path,
key = key,
header = header,
logical_columns = logical_columns,
integer_columns = integer_columns,
numeric_columns = numeric_columns,
list_columns = list_columns,
list_column_modes = list_column_modes,
resources = resources
Expand All @@ -28,6 +34,9 @@ database_init <- function(
path = path,
key = key,
header = header,
logical_columns = logical_columns,
integer_columns = integer_columns,
numeric_columns = numeric_columns,
list_columns = list_columns,
list_column_modes = list_column_modes,
resources = resources
Expand All @@ -37,6 +46,9 @@ database_init <- function(
path = path,
key = key,
header = header,
logical_columns = logical_columns,
integer_columns = integer_columns,
numeric_columns = numeric_columns,
list_columns = list_columns,
list_column_modes = list_column_modes,
resources = resources
Expand All @@ -59,6 +71,9 @@ database_class <- R6::R6Class(
path = NULL,
key = NULL,
header = NULL,
logical_columns = NULL,
integer_columns = NULL,
numeric_columns = NULL,
list_columns = NULL,
list_column_modes = NULL,
resources = NULL,
Expand All @@ -69,6 +84,9 @@ database_class <- R6::R6Class(
path = NULL,
key = NULL,
header = NULL,
logical_columns = NULL,
integer_columns = NULL,
numeric_columns = NULL,
list_columns = NULL,
list_column_modes = NULL,
resources = NULL,
Expand All @@ -78,6 +96,9 @@ database_class <- R6::R6Class(
self$path <- path
self$key <- key
self$header <- header
self$logical_columns <- logical_columns
self$integer_columns <- integer_columns
self$numeric_columns <- numeric_columns
self$list_columns <- list_columns
self$list_column_modes <- list_column_modes
self$resources <- resources
Expand Down Expand Up @@ -275,9 +296,28 @@ database_class <- R6::R6Class(
sep = database_sep_outer,
fill = TRUE,
na.strings = "",
encoding = encoding
encoding = encoding,
colClasses = "character"
)
out <- as_data_frame(out)
for (name in self$logical_columns) {
value <- out[[name]]
if (!is.null(value)) {
out[[name]] <- as.logical(value)
}
}
for (name in self$integer_columns) {
value <- out[[name]]
if (!is.null(value)) {
out[[name]] <- as.integer(value)
}
}
for (name in self$numeric_columns) {
value <- out[[name]]
if (!is.null(value)) {
out[[name]] <- as.numeric(value)
}
}
if (nrow(out) < 1L) {
return(out)
}
Expand Down Expand Up @@ -387,9 +427,23 @@ database_class <- R6::R6Class(
invisible()
}
},
validate_columns = function(header, list_columns) {
if (!all(list_columns %in% header)) {
tar_throw_validate("all list columns must be in the header")
validate_columns = function(
header,
logical_columns,
integer_columns,
numeric_columns,
list_columns
) {
special_columns <- c(
logical_columns,
integer_columns,
numeric_columns,
list_columns
)
if (!all(special_columns %in% header)) {
tar_throw_validate(
"all logical/integer/numeric/list columns must be in the header"
)
}
if (!is.null(header) && !("name" %in% header)) {
tar_throw_validate("header must have a column called \"name\"")
Expand All @@ -416,7 +470,6 @@ database_class <- R6::R6Class(
},
validate = function() {
memory_validate(self$memory)
self$validate_columns(self$header, self$list_columns)
self$validate_file()
tar_assert_chr(self$path)
tar_assert_scalar(self$path)
Expand All @@ -427,7 +480,18 @@ database_class <- R6::R6Class(
tar_assert_none_na(self$key)
tar_assert_nzchar(self$key)
tar_assert_chr(self$header)
tar_assert_chr(self$logical_columns)
tar_assert_chr(self$integer_columns)
tar_assert_chr(self$numeric_columns)
tar_assert_chr(self$list_columns)
tar_assert_chr(self$list_column_modes)
self$validate_columns(
self$header,
self$logical_columns,
self$integer_columns,
self$numeric_columns,
self$list_columns
)
}
)
)
Expand Down
6 changes: 6 additions & 0 deletions R/class_database_aws.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ database_aws_new <- function(
path = NULL,
key = NULL,
header = NULL,
logical_columns = NULL,
integer_columns = NULL,
numeric_columns = NULL,
list_columns = NULL,
list_column_modes = NULL,
buffer = NULL,
Expand All @@ -15,6 +18,9 @@ database_aws_new <- function(
path = path,
key = key,
header = header,
logical_columns = logical_columns,
integer_columns = integer_columns,
numeric_columns = numeric_columns,
list_columns = list_columns,
list_column_modes = list_column_modes,
buffer = buffer,
Expand Down
6 changes: 6 additions & 0 deletions R/class_database_gcp.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ database_gcp_new <- function(
path = NULL,
key = NULL,
header = NULL,
logical_columns = NULL,
integer_columns = NULL,
numeric_columns = NULL,
list_columns = NULL,
list_column_modes = NULL,
buffer = NULL,
Expand All @@ -15,6 +18,9 @@ database_gcp_new <- function(
path = path,
key = key,
header = header,
logical_columns = logical_columns,
integer_columns = integer_columns,
numeric_columns = numeric_columns,
list_columns = list_columns,
list_column_modes = list_column_modes,
buffer = buffer,
Expand Down
6 changes: 6 additions & 0 deletions R/class_database_local.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ database_local_new <- function(
path = NULL,
key = NULL,
header = NULL,
logical_columns = NULL,
integer_columns = NULL,
numeric_columns = NULL,
list_columns = NULL,
list_column_modes = NULL,
resources = NULL,
Expand All @@ -13,6 +16,9 @@ database_local_new <- function(
path = path,
key = key,
header = header,
logical_columns = logical_columns,
integer_columns = integer_columns,
numeric_columns = numeric_columns,
list_columns = list_columns,
list_column_modes = list_column_modes,
resources = resources,
Expand Down
8 changes: 4 additions & 4 deletions R/class_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,13 +150,13 @@ file_list_files <- function(path) {
file_hash <- function(files) {
n <- length(files)
if (identical(n, 0L)) {
return(null64)
return(hash_null)
}
hash <- digest_file64(files)
hash <- map_chr(x = files, f = hash_file, USE.NAMES = FALSE)
if (identical(n, 1L)) {
return(hash)
}
digest_chr64(paste(hash, collapse = ""))
hash_object(paste(hash, collapse = ""))
}

file_info <- function(files) {
Expand Down Expand Up @@ -189,7 +189,7 @@ file_bytes <- function(info) {
}

file_size <- function(bytes) {
digest_obj64(bytes)
hash_object(bytes)
}

file_diff_chr <- function(dbl) {
Expand Down
2 changes: 1 addition & 1 deletion R/class_gcp.R
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ store_upload_object_gcp <- function(store) {
invert = TRUE
)
store$file$path <- c(path, paste0("version=", head$generation))
store$file$hash <- digest_chr64(head$md5)
store$file$hash <- hash_object(head$md5)
invisible()
}

Expand Down
2 changes: 1 addition & 1 deletion R/class_inventory_aws.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ inventory_aws_class <- R6::R6Class(
)
for (key in names(results)) {
name <- self$get_name(key = key, bucket = bucket)
self$cache[[name]] <- digest_chr64(results[[key]])
self$cache[[name]] <- hash_object(results[[key]])
}
}
)
Expand Down
2 changes: 1 addition & 1 deletion R/class_inventory_gcp.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ inventory_gcp_class <- R6::R6Class(
)
for (key in names(results)) {
name <- self$get_name(key = key, bucket = bucket)
self$cache[[name]] <- digest_chr64(results[[key]])
self$cache[[name]] <- hash_object(results[[key]])
}
}
)
Expand Down
2 changes: 1 addition & 1 deletion R/class_mermaid.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ mermaid_class <- R6::R6Class(
produce_mermaid_vertices = function(data) {
sprintf(
"%s%s%s%s:::%s",
sprintf("x%s", as.character(map_chr(data$name, digest_chr64))),
sprintf("x%s", as.character(map_chr(data$name, hash_object))),
data$open,
sprintf("\"%s\"", data$label),
data$close,
Expand Down
4 changes: 3 additions & 1 deletion R/class_meta.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ meta_class <- R6::R6Class(
)
hashes <- hashes[nzchar(hashes)]
string <- paste(c(names(hashes), hashes), collapse = "")
digest_chr64(string)
hash_object(string)
},
produce_depend = function(target, pipeline) {
self$hash_deps(target$command$deps, pipeline)
Expand Down Expand Up @@ -160,6 +160,8 @@ database_meta <- function(path_store) {
path = path_meta(path_store = path_store),
subkey = file.path(basename(path_meta("")), "meta"),
header = header_meta(),
integer_columns = "seed",
numeric_columns = c("bytes", "seconds"),
list_columns = c("path", "children"),
list_column_modes = c("character", "character")
)
Expand Down
Loading

0 comments on commit fd96c4e

Please sign in to comment.