From dc57d065df3f14e13d227200c1405c87d21ad1b6 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 8 Feb 2019 20:48:15 +0800 Subject: [PATCH 1/2] closes #3319 -- manually escape backslash in character by processing --- NEWS.md | 2 ++ R/data.table.R | 6 ++++-- inst/tests/tests.Rraw | 6 ++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index c4ada9269..b0527e99f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,8 @@ 3. Default print output (top 5 and bottom 5 rows) when ncol>255 could display the columns in the wrong order, [#3306](https://github.com/Rdatatable/data.table/issues/3306). Thanks to Kun Ren for reporting. +4. Grouping `by = 'string_with_\\'` would fail, [#3319](https://github.com/Rdatatable/data.table/issues/3319). Thanks to @HughParsonage for reporting and @MichaelChirico for the fix. + #### NOTES 1. When upgrading to 1.12.0 some Windows users might have seen `CdllVersion not found` in some circumstances. We found a way to catch that so the [helpful message](https://twitter.com/MattDowle/status/1084528873549705217) now occurs for those upgrading from versions prior to 1.12.0 too, as well as those upgrading from 1.12.0 to a later version. See item 1 in notes section of 1.12.0 below for more background. diff --git a/R/data.table.R b/R/data.table.R index 4626c0098..f4c2a7732 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -866,8 +866,10 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (length(bysub)>1L) stop("'by' is a character vector length ",length(bysub)," but one or more items include a comma. Either pass a vector of column names (which can contain spaces, but no commas), or pass a vector length 1 containing comma separated column names. See ?data.table for other possibilities.") bysub = strsplit(bysub,split=",")[[1L]] } - tt = grep("^[^`]+$",bysub) - if (length(tt)) bysub[tt] = paste0("`",bysub[tt],"`") + backtick_idx = grep("^[^`]+$",bysub) + if (length(backtick_idx)) bysub[backtick_idx] = paste0("`",bysub[backtick_idx],"`") + backslash_idx = grep("\\", bysub, fixed = TRUE) + if (length(backslash_idx)) bysub[backslash_idx] = gsub('\\', '\\\\', bysub[backslash_idx], fixed = TRUE) bysub = parse(text=paste0("list(",paste(bysub,collapse=","),")"))[[1L]] bysubl = as.list.default(bysub) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 560dda493..f2e001e4b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -13320,6 +13320,12 @@ test(1982.1, tt[1L], "") tt = as.integer(tt[tt!=""]) test(1982.2, tt, seq_along(tt)) +# parse(text = 'list(`\\phantom{.}`)') fails, #3319 +library(data.table) +DT <- data.table(x = 1, y = 1:5) +setnames(DT, "x", "\\phantom{.}") +test(1983.1, DT[, .(y = mean(y)), keyby = "\\phantom{.}"], + data.table(`\\phantom{.}` = 1, y = 3, key = '\\phantom{.}')) ################################### # Add new tests above this line # From 916a25d429c1974cec92677ea51c5fe8a5b65690 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 8 Feb 2019 20:53:10 +0800 Subject: [PATCH 2/2] use fixed = TRUE in gsub|grepl? when possible --- R/data.table.R | 4 ++-- R/fread.R | 2 +- R/onLoad.R | 4 ++-- R/setkey.R | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index f4c2a7732..2b0c0f72c 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -862,7 +862,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { } else if (is.call(bysub) && bysub[[1L]] == ".") bysub[[1L]] = quote(list) if (mode(bysub) == "character") { - if (length(grep(",",bysub))) { + if (length(grep(",", bysub, fixed = TRUE))) { if (length(bysub)>1L) stop("'by' is a character vector length ",length(bysub)," but one or more items include a comma. Either pass a vector of column names (which can contain spaces, but no commas), or pass a vector length 1 containing comma separated column names. See ?data.table for other possibilities.") bysub = strsplit(bysub,split=",")[[1L]] } @@ -1844,7 +1844,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { attrs = attr(x, 'index') skeys = names(attributes(attrs)) if (!is.null(skeys)) { - hits = unlist(lapply(paste0("__", names(x)[cols]), function(x) grep(x, skeys))) + hits = unlist(lapply(paste0("__", names(x)[cols]), function(x) grep(x, skeys, fixed = TRUE))) hits = skeys[unique(hits)] for (i in seq_along(hits)) setattr(attrs, hits[i], NULL) # does by reference } diff --git a/R/fread.R b/R/fread.R index 47c02c733..26dcfca3c 100644 --- a/R/fread.R +++ b/R/fread.R @@ -72,7 +72,7 @@ fread <- function(input="",file=NULL,text=NULL,cmd=NULL,sep="auto",sep2="auto",d on.exit(unlink(tmpFile), add=TRUE) # nocov end } - else if (length(grep(' ', input)) && !file.exists(input)) { # file name or path containing spaces is not a command + else if (length(grep(' ', input, fixed = TRUE)) && !file.exists(input)) { # file name or path containing spaces is not a command cmd = input if (input_has_vars && getOption("datatable.fread.input.cmd.message", TRUE)) { message("Taking input= as a system command ('",cmd,"') and a variable has been used in the expression passed to `input=`. Please use fread(cmd=...). There is a security concern if you are creating an app, and the app could have a malicious user, and the app is not running in a secure envionment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.") diff --git a/R/onLoad.R b/R/onLoad.R index 558e9c7d1..e5a950d50 100644 --- a/R/onLoad.R +++ b/R/onLoad.R @@ -16,7 +16,7 @@ ss = body(tt) if (class(ss)[1L]!="{") ss = as.call(c(as.name("{"), ss)) prefix = if (!missing(pkgname)) "data.table::" else "" # R provides the arguments when it calls .onLoad, I don't in dev/test - if (!length(grep("data.table",ss[[2L]]))) { + if (!length(grep("data.table", ss[[2L]], fixed = TRUE))) { ss = ss[c(1L, NA, 2L:length(ss))] ss[[2L]] = parse(text=paste0("if (!identical(class(..1),'data.frame')) for (x in list(...)) { if (inherits(x,'data.table')) return(",prefix,"data.table(...)) }"))[[1]] body(tt)=ss @@ -27,7 +27,7 @@ tt = base::rbind.data.frame ss = body(tt) if (class(ss)[1L]!="{") ss = as.call(c(as.name("{"), ss)) - if (!length(grep("data.table",ss[[2L]]))) { + if (!length(grep("data.table", ss[[2L]], fixed = TRUE))) { ss = ss[c(1L, NA, 2L:length(ss))] ss[[2L]] = parse(text=paste0("for (x in list(...)) { if (inherits(x,'data.table')) return(",prefix,".rbind.data.table(...)) }"))[[1L]] # fix for #4995 body(tt)=ss diff --git a/R/setkey.R b/R/setkey.R index 6f5ea7c46..a7a03cdac 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -49,7 +49,7 @@ setkeyv <- function(x, cols, verbose=getOption("datatable.verbose"), physical=TR cols = colnames(x) # All columns in the data.table, usually a few when used in this form } else { # remove backticks from cols - cols <- gsub("`", "", cols) + cols <- gsub("`", "", cols, fixed = TRUE) miss = !(cols %chin% colnames(x)) if (any(miss)) stop("some columns are not in the data.table: ", paste(cols[miss], collapse=",")) } @@ -321,7 +321,7 @@ setorderv <- function(x, cols = colnames(x), order=1L, na.last=FALSE) } if (!all(nzchar(cols))) stop("cols contains some blanks.") # TODO: probably I'm checking more than necessary here.. there are checks in 'forderv' as well # remove backticks from cols - cols <- gsub("`", "", cols) + cols <- gsub("`", "", cols, fixed = TRUE) miss = !(cols %chin% colnames(x)) if (any(miss)) stop("some columns are not in the data.table: ", paste(cols[miss], collapse=",")) if (".xi" %chin% colnames(x)) stop("x contains a column called '.xi'. Conflicts with internal use by data.table.")