From f00aee717b14d43069057e878c10bde60263a194 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 24 May 2019 01:24:25 +0800 Subject: [PATCH 1/2] Closes #697 -- extend not-in-scope error message for when it might be an integer column --- NEWS.md | 1 + R/data.table.R | 10 ++++++---- inst/tests/tests.Rraw | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index ac1c30b9b..ef1db2f05 100644 --- a/NEWS.md +++ b/NEWS.md @@ -138,6 +138,7 @@ 8. Historically, `dcast` and `melt` were built as enhancements to `reshape2`'s own `dcast`/`melt`. We removed dependency on `reshape2` in v1.9.6 but maintained some backward compatibility. As that package has been deprecated since December 2017, we have now formally completed the split from `reshape2` by removing some last vestiges, [#3549](https://github.com/Rdatatable/data.table/issues/3549). We thank the `reshape2` authors for their original inspiration for these functions. +9. `DT[i]` queries, where `i` is an _`integer`_ column of `DT`, now fail more helpfully, [#697](https://github.com/Rdatatable/data.table/issues/697). The proper form of this query is `DT[(i)]` (or `DT[DT$i]` or `DT[DT[['i']]`) for simplicity in the case that `i` is a valid symbol outside of `DT` (i.e., when there is ambiguity about which `i` is referred to). This mirrors the similar error message nudged introduced in [#1884](https://github.com/Rdatatable/data.table/issues/1884) for the case where `i` is `logical`. ### Changes in [v1.12.2](https://github.com/Rdatatable/data.table/milestone/14?closed=1) (07 Apr 2019) diff --git a/R/data.table.R b/R/data.table.R index 4812019e2..7e24eb4da 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -452,10 +452,12 @@ replace_dot_alias <- function(e) { if (inherits(i,"try-error")) { # must be "not found" since isub is a mere symbol col = try(eval(isub, x), silent=TRUE) # is it a column name? - if (identical(typeof(col),"logical")) - stop(as.character(isub)," is not found in calling scope but it is a column of type logical. If you wish to select rows where that column is TRUE, either wrap the symbol with '()' or use ==TRUE to be clearest to readers of your code.") - else - stop(as.character(isub)," is not found in calling scope and it is not a column of type logical. When the first argument inside DT[...] is a single symbol, data.table looks for it in calling scope.") + msg_tail = + switch(typeof(col), + logical = "but it is a column of type logical. If you wish to select rows where that column is TRUE, either wrap the symbol with '()' or use ==TRUE to be clearest to readers of your code.", + integer = "but it is a column of type integer. If you wish to index rows using that column, either wrap the symbol with '()' or extract it with `[[` or `$` to be clearest to readers of your code.", + "and it is not a column of type logical or integer. When the first argument inside DT[...] is a single symbol, data.table looks for it in calling scope.") + stop(as.character(isub), " is not found in calling scope ", msg_tail) } } if (restore.N) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ee35ec302..6f78f077f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11038,7 +11038,8 @@ if (exists("B")) rm(B) if (exists("NOTEXIST")) rm(NOTEXIST) DT <- data.table(A = c(FALSE, TRUE), B = 1:2) test(1773.1, DT[A], error = "not found in calling scope.*wrap the symbol with") -test(1773.2, DT[B], error = "not found in calling scope and it is not a column of type logical") +# #697 -- potential attempts to subset using an integer column are caught and advised in the error +test(1773.2, DT[B], error = 'B is not found in calling scope but it is a column of type integer') test(1773.3, DT[NOTEXIST], error = "not found in calling scope and it is not a column of type logical") test(1773.4, DT[(A)], DT[2]) @@ -14856,7 +14857,6 @@ test(2050.4, rbind(DT[0], DT[0])[,levels(f)], c("a","b","c")) # now ok again (o test(2050.5, rbindlist(list(DT[0], DT[0]))[,levels(f)], c("a","b","c")) # now ok again test(2050.6, rbind(DT[1], data.table(f=factor(letters[10:11]))[0])[,levels(f)], c("a","b","c","j","k")) # now includes "j","k" again - ################################### # Add new tests above this line # ################################### From d2c70f748678f5a1a205aeae128aaca7c9f34f26 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Thu, 23 May 2019 14:15:23 -0700 Subject: [PATCH 2/2] Extended to double too, consolidated the message into one, and shortened news item --- NEWS.md | 2 +- R/data.table.R | 13 ++++++------- inst/tests/tests.Rraw | 20 +++++++++++++------- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/NEWS.md b/NEWS.md index ef1db2f05..a0dbdba02 100644 --- a/NEWS.md +++ b/NEWS.md @@ -138,7 +138,7 @@ 8. Historically, `dcast` and `melt` were built as enhancements to `reshape2`'s own `dcast`/`melt`. We removed dependency on `reshape2` in v1.9.6 but maintained some backward compatibility. As that package has been deprecated since December 2017, we have now formally completed the split from `reshape2` by removing some last vestiges, [#3549](https://github.com/Rdatatable/data.table/issues/3549). We thank the `reshape2` authors for their original inspiration for these functions. -9. `DT[i]` queries, where `i` is an _`integer`_ column of `DT`, now fail more helpfully, [#697](https://github.com/Rdatatable/data.table/issues/697). The proper form of this query is `DT[(i)]` (or `DT[DT$i]` or `DT[DT[['i']]`) for simplicity in the case that `i` is a valid symbol outside of `DT` (i.e., when there is ambiguity about which `i` is referred to). This mirrors the similar error message nudged introduced in [#1884](https://github.com/Rdatatable/data.table/issues/1884) for the case where `i` is `logical`. +9. `DT[col]` where `col` is a column containing row numbers of itself to select, now suggests the correct syntax (`DT[(col)]` or `DT[DT$col]`), [#697](https://github.com/Rdatatable/data.table/issues/697). This expands the message introduced in [#1884](https://github.com/Rdatatable/data.table/issues/1884) for the case where `col` is type `logical` and `DT[col==TRUE]` is suggested. ### Changes in [v1.12.2](https://github.com/Rdatatable/data.table/milestone/14?closed=1) (07 Apr 2019) diff --git a/R/data.table.R b/R/data.table.R index 7e24eb4da..238c5ec72 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -452,12 +452,11 @@ replace_dot_alias <- function(e) { if (inherits(i,"try-error")) { # must be "not found" since isub is a mere symbol col = try(eval(isub, x), silent=TRUE) # is it a column name? - msg_tail = - switch(typeof(col), - logical = "but it is a column of type logical. If you wish to select rows where that column is TRUE, either wrap the symbol with '()' or use ==TRUE to be clearest to readers of your code.", - integer = "but it is a column of type integer. If you wish to index rows using that column, either wrap the symbol with '()' or extract it with `[[` or `$` to be clearest to readers of your code.", - "and it is not a column of type logical or integer. When the first argument inside DT[...] is a single symbol, data.table looks for it in calling scope.") - stop(as.character(isub), " is not found in calling scope ", msg_tail) + msg = if (inherits(col,"try-error")) " and it is not a column name either." + else paste0(" but it is a column of type ", typeof(col),". If you wish to select rows where that column contains TRUE", + ", or perhaps that column contains row numbers of itself to select, try DT[(col)], DT[DT$col], or DT[col==TRUE] is particularly clear and is optimized.") + stop(as.character(isub), " is not found in calling scope", msg, + " When the first argument inside DT[...] is a single symbol (e.g. DT[var]), data.table looks for var in calling scope.") } } if (restore.N) { @@ -635,7 +634,7 @@ replace_dot_alias <- function(e) { } # TO DO: TODO: Incorporate which_ here on DT[!i] where i is logical. Should avoid i = !i (above) - inefficient. # i is not a data.table - if (!is.logical(i) && !is.numeric(i)) stop("i has not evaluated to logical, integer or double") + if (!is.logical(i) && !is.numeric(i)) stop("i has evaluated to type ", typeof(i), ". Expecting logical, integer or double.") if (is.logical(i)) { if (length(i)==1L # to avoid unname copy when length(i)==nrow (normal case we don't want to slow down) && isTRUE(unname(i))) { irows=i=NULL } # unname() for #2152 - length 1 named logical vector. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6f78f077f..aab26a85b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11036,12 +11036,18 @@ test(1772.2, split(DT, by = c('a', 'b'), sorted = TRUE), if (exists("A")) rm(A) if (exists("B")) rm(B) if (exists("NOTEXIST")) rm(NOTEXIST) -DT <- data.table(A = c(FALSE, TRUE), B = 1:2) -test(1773.1, DT[A], error = "not found in calling scope.*wrap the symbol with") -# #697 -- potential attempts to subset using an integer column are caught and advised in the error -test(1773.2, DT[B], error = 'B is not found in calling scope but it is a column of type integer') -test(1773.3, DT[NOTEXIST], error = "not found in calling scope and it is not a column of type logical") -test(1773.4, DT[(A)], DT[2]) +if (exists("MyCol")) rm(MyCol) +DT <- data.table(A = c(FALSE, TRUE), B = 2:1, C=c(2,3), MyCol=c(2,2)) +test(1773.01, DT[A], error = "A is not found in calling scope but it is a column of type logical.*==TRUE.*When the first argument") +test(1773.02, DT[B], error = "B is not found in calling scope but it is a column of type integer.*DT\\[\\(col\\)\\].*When the first argument") # 697 +test(1773.03, DT[C], error = "i has evaluated to type closure. Expecting logical, integer or double") # C picks up stats::C in calling scope +test(1773.04, DT[MyCol], error="MyCol is not found in calling scope but it is a column of type double.*DT\\[\\(col\\)\\].*When the first argument") +test(1773.05, DT[NOTEXIST], error = "NOTEXIST is not found in calling scope and it is not a column name either. When the first argument") +test(1773.06, DT[(A)], DT[2]) +test(1773.07, DT[A==TRUE], DT[2]) +test(1773.08, DT[(B)], data.table(A=c(TRUE,FALSE), B=1:2, C=c(3,2), MyCol=2)) +test(1773.09, DT[(MyCol)], data.table(A=c(TRUE,TRUE), B=INT(1,1), C=c(3,3), MyCol=2)) +test(1773.10, DT[(C)], data.table(A=c(TRUE,NA), B=c(1L,NA), C=c(3,NA), MyCol=c(2,NA))) # New as.data.table.array method in v1.10.5 set.seed(1L) @@ -13400,7 +13406,7 @@ test(1967.48, x[ , b, .SDcols = 'a'], 6:10, test(1967.49, x[ , list(5) := 6], error = 'LHS of := must be a symbol') test(1967.50, x[ , 1 + 3i := 6], error = "LHS of := isn't column names") test(1967.511, x[ , .(5L), by = .EACHI, mult = 'all'], error='logical error. i is not data.table') -test(1967.512, x[1+3i], error='i has not evaluated to logical') +test(1967.512, x[1+3i], error='i has evaluated to type complex. Expecting logical, integer or double') test(1967.521, x[1:2, by=a], x[1:2,], warning="Ignoring by= because j= is not supplied") test(1967.522, x[, by=a], x, warning=c("Ignoring by= because j= is not supplied","i and j are both missing.*upgraded to error in future")) test(1967.523, x[by=a], x, warning=c("Ignoring by= because j= is not supplied","i and j are both missing.*upgraded to error in future"))