diff --git a/NEWS.md b/NEWS.md index 521ab3907..b92608c39 100644 --- a/NEWS.md +++ b/NEWS.md @@ -93,6 +93,8 @@ 4. New option `options(datatable.quiet = TRUE)` turns off the package startup message, [#3489](https://github.com/Rdatatable/data.table/issues/3489). `suppressPackageStartupMessages()` continues to work too. Thanks to @leobarlach for the suggestion inspired by `options(tidyverse.quiet = TRUE)`. We don't know of a way to make a package respect the `quietly=` option of `library()` and `require()` because the `quietly=` isn't passed through for use by the package's own `.onAttach`. If you can see how to do that, please submit a patch to R. +5. When loading a `data.table` from disk (e.g. with `readRDS`), best practice is to run `setDT()` on the new object to assure it is correctly allocated memory for new column pointers. Barring this, unexpected behavior can follow; for example, if you assign a new column to `DT` from a function `f`, the new columns will only be assigned within `f` and `DT` will be unchanged. The `verbose` messaging in this situation is now more helpful, [#1729](https://github.com/Rdatatable/data.table/issues/1729). Thanks @vspinu for sharing his experience to spur this. + ### Changes in [v1.12.2](https://github.com/Rdatatable/data.table/milestone/14?closed=1) (07 Apr 2019) diff --git a/R/data.table.R b/R/data.table.R index 6772fba6f..b49a45b31 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1161,7 +1161,10 @@ replace_dot_alias <- function(e) { newnames=setdiff(lhs,names(x)) m[is.na(m)] = ncol(x)+seq_len(length(newnames)) cols = as.integer(m) - if ((ok<-selfrefok(x,verbose))==0L) # ok==0 so no warning when loaded from disk (-1) [-1 considered TRUE by R] + # don't pass verbose to selfrefok here -- only activated when + # ok=-1 which will trigger alloc.col with verbose in the next + # branch, which again calls _selfrefok and returns the message then + if ((ok<-selfrefok(x))==0L) # ok==0 so no warning when loaded from disk (-1) [-1 considered TRUE by R] warning("Invalid .internal.selfref detected and fixed by taking a (shallow) copy of the data.table so that := can add this new column by reference. At an earlier point, this data.table has been copied by R (or was created manually using structure() or similar). Avoid names<- and attr<- which in R currently (and oddly) may copy the whole data.table. Use set* syntax instead to avoid copying: ?set, ?setnames and ?setattr. If this message doesn't help, please report your use case to the data.table issue tracker so the root cause can be fixed or this message improved.") if ((ok<1L) || (truelength(x) < ncol(x)+length(newnames))) { DT = x # in case getOption contains "ncol(DT)" as it used to. TODO: warn and then remove @@ -1169,7 +1172,10 @@ replace_dot_alias <- function(e) { # i.e. reallocate at the size as if the new columns were added followed by alloc.col(). name = substitute(x) if (is.name(name) && ok && verbose) { # && NAMED(x)>0 (TO DO) # ok here includes -1 (loaded from disk) - cat("Growing vector of column pointers from truelength ",truelength(x)," to ",n,". A shallow copy has been taken, see ?alloc.col. Only a potential issue if two variables point to the same data (we can't yet detect that well) and if not you can safely ignore this. To avoid this message you could alloc.col() first, deep copy first using copy(), wrap with suppressWarnings() or increase the 'datatable.alloccol' option.\n") + cat("Growing vector of column pointers from truelength ", truelength(x), " to ", n, ". A shallow copy has been taken, see ?alloc.col. Only a potential issue if two variables point to the same data (we can't yet detect that well) and if not you can safely ignore this. To avoid this message you could alloc.col() first, deep copy first using copy(), wrap with suppressWarnings() or increase the 'datatable.alloccol' option.\n") + # #1729 -- copying to the wrong environment here can cause some confusion + if (ok == -1L) cat("Note that the shallow copy will assign to the environment from which := was called. That means for example that if := was called within a function, the original table may be unaffected.\n") + # Verbosity should not issue warnings, so cat rather than warning. # TO DO: Add option 'datatable.pedantic' to turn on warnings like this. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index cd97c0cfc..7475680fa 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14370,6 +14370,20 @@ if (test_yaml) { # csvy; #1701 DT_yaml, warning = 'Combining a search.*YAML.*') } +# #1729 -- more helpful guidance when assigning before setDT() after readRDS() +DT = data.table(a = 1:3) +tmp = tempfile() +saveRDS(DT, tmp) +rm(DT) +DT = readRDS(tmp) +foo = function(x) { + x[ , b := 4:6, verbose = TRUE][] +} +test(2033.1, foo(DT), output = 'Please remember to always setDT()') +# no assignment was made to DT +test(2033.2, names(DT), 'a') +# _selrefok() verbose message was duplicated +test(2033.3, unname(table(unlist(strsplit(capture.output(foo(DT)), '\n|\\s+')))['ptr']), 1L) ################################### # Add new tests above this line # diff --git a/src/assign.c b/src/assign.c index 89899d34d..75224bf37 100644 --- a/src/assign.c +++ b/src/assign.c @@ -120,7 +120,7 @@ static int _selfrefok(SEXP x, Rboolean checkNames, Rboolean verbose) { } p = R_ExternalPtrAddr(v); if (p==NULL) { - if (verbose) Rprintf(".internal.selfref ptr is NULL. This is expected and normal for a data.table loaded from disk. If not, please report to data.table issue tracker.\n"); + if (verbose) Rprintf(".internal.selfref ptr is NULL. This is expected and normal for a data.table loaded from disk. Please remember to always setDT() immediately after loading to prevent unexpected behavior. If this table was not loaded from disk or you've already run setDT(), please report to data.table issue tracker.\n"); return -1; } if (!isNull(p)) error("Internal error: .internal.selfref ptr is not NULL or R_NilValue"); // # nocov