From dc57d065df3f14e13d227200c1405c87d21ad1b6 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Fri, 8 Feb 2019 20:48:15 +0800
Subject: [PATCH 1/2] closes #3319 -- manually escape backslash in character by
 processing

---
 NEWS.md               | 2 ++
 R/data.table.R        | 6 ++++--
 inst/tests/tests.Rraw | 6 ++++++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index c4ada9269..b0527e99f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -14,6 +14,8 @@
 
 3. Default print output (top 5 and bottom 5 rows) when ncol>255 could display the columns in the wrong order, [#3306](https://github.com/Rdatatable/data.table/issues/3306). Thanks to Kun Ren for reporting.
 
+4. Grouping `by = 'string_with_\\'` would fail, [#3319](https://github.com/Rdatatable/data.table/issues/3319). Thanks to @HughParsonage for reporting and @MichaelChirico for the fix.
+
 #### NOTES
 
 1. When upgrading to 1.12.0 some Windows users might have seen `CdllVersion not found` in some circumstances. We found a way to catch that so the [helpful message](https://twitter.com/MattDowle/status/1084528873549705217) now occurs for those upgrading from versions prior to 1.12.0 too, as well as those upgrading from 1.12.0 to a later version. See item 1 in notes section of 1.12.0 below for more background.
diff --git a/R/data.table.R b/R/data.table.R
index 4626c0098..f4c2a7732 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -866,8 +866,10 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
             if (length(bysub)>1L) stop("'by' is a character vector length ",length(bysub)," but one or more items include a comma. Either pass a vector of column names (which can contain spaces, but no commas), or pass a vector length 1 containing comma separated column names. See ?data.table for other possibilities.")
             bysub = strsplit(bysub,split=",")[[1L]]
           }
-          tt = grep("^[^`]+$",bysub)
-          if (length(tt)) bysub[tt] = paste0("`",bysub[tt],"`")
+          backtick_idx = grep("^[^`]+$",bysub)
+          if (length(backtick_idx)) bysub[backtick_idx] = paste0("`",bysub[backtick_idx],"`")
+          backslash_idx = grep("\\", bysub, fixed = TRUE)
+          if (length(backslash_idx)) bysub[backslash_idx] = gsub('\\', '\\\\', bysub[backslash_idx], fixed = TRUE)
           bysub = parse(text=paste0("list(",paste(bysub,collapse=","),")"))[[1L]]
           bysubl = as.list.default(bysub)
         }
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 560dda493..f2e001e4b 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -13320,6 +13320,12 @@ test(1982.1, tt[1L], "")
 tt = as.integer(tt[tt!=""])
 test(1982.2, tt, seq_along(tt))
 
+# parse(text = 'list(`\\phantom{.}`)') fails, #3319
+library(data.table)
+DT <- data.table(x = 1, y = 1:5)
+setnames(DT, "x", "\\phantom{.}")
+test(1983.1, DT[, .(y = mean(y)), keyby = "\\phantom{.}"],
+     data.table(`\\phantom{.}` = 1, y = 3, key = '\\phantom{.}'))
 
 ###################################
 #  Add new tests above this line  #

From 916a25d429c1974cec92677ea51c5fe8a5b65690 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Fri, 8 Feb 2019 20:53:10 +0800
Subject: [PATCH 2/2] use fixed = TRUE in gsub|grepl? when possible

---
 R/data.table.R | 4 ++--
 R/fread.R      | 2 +-
 R/onLoad.R     | 4 ++--
 R/setkey.R     | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index f4c2a7732..2b0c0f72c 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -862,7 +862,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
         } else if (is.call(bysub) && bysub[[1L]] == ".") bysub[[1L]] = quote(list)
 
         if (mode(bysub) == "character") {
-          if (length(grep(",",bysub))) {
+          if (length(grep(",", bysub, fixed = TRUE))) {
             if (length(bysub)>1L) stop("'by' is a character vector length ",length(bysub)," but one or more items include a comma. Either pass a vector of column names (which can contain spaces, but no commas), or pass a vector length 1 containing comma separated column names. See ?data.table for other possibilities.")
             bysub = strsplit(bysub,split=",")[[1L]]
           }
@@ -1844,7 +1844,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
     attrs = attr(x, 'index')
     skeys = names(attributes(attrs))
     if (!is.null(skeys)) {
-      hits  = unlist(lapply(paste0("__", names(x)[cols]), function(x) grep(x, skeys)))
+      hits  = unlist(lapply(paste0("__", names(x)[cols]), function(x) grep(x, skeys, fixed = TRUE)))
       hits  = skeys[unique(hits)]
       for (i in seq_along(hits)) setattr(attrs, hits[i], NULL) # does by reference
     }
diff --git a/R/fread.R b/R/fread.R
index 47c02c733..26dcfca3c 100644
--- a/R/fread.R
+++ b/R/fread.R
@@ -72,7 +72,7 @@ fread <- function(input="",file=NULL,text=NULL,cmd=NULL,sep="auto",sep2="auto",d
         on.exit(unlink(tmpFile), add=TRUE)
         # nocov end
       }
-      else if (length(grep(' ', input)) && !file.exists(input)) {  # file name or path containing spaces is not a command
+      else if (length(grep(' ', input, fixed = TRUE)) && !file.exists(input)) {  # file name or path containing spaces is not a command
         cmd = input
         if (input_has_vars && getOption("datatable.fread.input.cmd.message", TRUE)) {
           message("Taking input= as a system command ('",cmd,"') and a variable has been used in the expression passed to `input=`. Please use fread(cmd=...). There is a security concern if you are creating an app, and the app could have a malicious user, and the app is not running in a secure envionment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.")
diff --git a/R/onLoad.R b/R/onLoad.R
index 558e9c7d1..e5a950d50 100644
--- a/R/onLoad.R
+++ b/R/onLoad.R
@@ -16,7 +16,7 @@
   ss = body(tt)
   if (class(ss)[1L]!="{") ss = as.call(c(as.name("{"), ss))
   prefix = if (!missing(pkgname)) "data.table::" else ""  # R provides the arguments when it calls .onLoad, I don't in dev/test
-  if (!length(grep("data.table",ss[[2L]]))) {
+  if (!length(grep("data.table", ss[[2L]], fixed = TRUE))) {
     ss = ss[c(1L, NA, 2L:length(ss))]
     ss[[2L]] = parse(text=paste0("if (!identical(class(..1),'data.frame')) for (x in list(...)) { if (inherits(x,'data.table')) return(",prefix,"data.table(...)) }"))[[1]]
     body(tt)=ss
@@ -27,7 +27,7 @@
   tt = base::rbind.data.frame
   ss = body(tt)
   if (class(ss)[1L]!="{") ss = as.call(c(as.name("{"), ss))
-  if (!length(grep("data.table",ss[[2L]]))) {
+  if (!length(grep("data.table", ss[[2L]], fixed = TRUE))) {
     ss = ss[c(1L, NA, 2L:length(ss))]
     ss[[2L]] = parse(text=paste0("for (x in list(...)) { if (inherits(x,'data.table')) return(",prefix,".rbind.data.table(...)) }"))[[1L]] # fix for #4995
     body(tt)=ss
diff --git a/R/setkey.R b/R/setkey.R
index 6f5ea7c46..a7a03cdac 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -49,7 +49,7 @@ setkeyv <- function(x, cols, verbose=getOption("datatable.verbose"), physical=TR
     cols = colnames(x)   # All columns in the data.table, usually a few when used in this form
   } else {
     # remove backticks from cols
-    cols <- gsub("`", "", cols)
+    cols <- gsub("`", "", cols, fixed = TRUE)
     miss = !(cols %chin% colnames(x))
     if (any(miss)) stop("some columns are not in the data.table: ", paste(cols[miss], collapse=","))
   }
@@ -321,7 +321,7 @@ setorderv <- function(x, cols = colnames(x), order=1L, na.last=FALSE)
   }
   if (!all(nzchar(cols))) stop("cols contains some blanks.")     # TODO: probably I'm checking more than necessary here.. there are checks in 'forderv' as well
   # remove backticks from cols
-  cols <- gsub("`", "", cols)
+  cols <- gsub("`", "", cols, fixed = TRUE)
   miss = !(cols %chin% colnames(x))
   if (any(miss)) stop("some columns are not in the data.table: ", paste(cols[miss], collapse=","))
   if (".xi" %chin% colnames(x)) stop("x contains a column called '.xi'. Conflicts with internal use by data.table.")