From 3c487ed60e21aeaad49877f533ba78245bd23149 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Sat, 4 Apr 2020 01:43:53 +0100
Subject: [PATCH 1/8] int i and nomatch arg, closes #3109

---
 NEWS.md               | 11 +++++
 R/data.table.R        |  6 ++-
 inst/tests/tests.Rraw | 15 +++++++
 src/subset.c          | 97 +++++++++++++++++++++++++++++++------------
 4 files changed, 101 insertions(+), 28 deletions(-)
diff --git a/NEWS.md b/NEWS.md
index 71fd76aa6..9662351a7 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -81,6 +81,17 @@ unit = "s")
 
 14. Added support for `round()` and `trunc()` to extend functionality of `ITime`. `round()` and `trunc()` can be used with argument units: "hours" or "minutes". Thanks to @JensPederM for the suggestion and PR.
 
+15. It is now possible to use `nomatch=NULL` argument and integer `i` to specify rows to subset but return only those rows which exists in data.table, closes [#3109](https://github.com/Rdatatable/data.table/issues/3109), [#3666](https://github.com/Rdatatable/data.table/issues/3666). Thanks @mllg and @hadley for feature request.
+
+```r
+DT = data.table(x = 1:4)
+DT[c(1L, NA_integer_, 3L, 5L), nomatch = 0L]
+#       x
+#   <int>
+#1:     1
+#2:     3
+```
+
 ## BUG FIXES
 
 1. A NULL timezone on POSIXct was interpreted by `as.IDate` and `as.ITime` as UTC rather than the session's default timezone (`tz=""`) , [#4085](https://github.com/Rdatatable/data.table/issues/4085).
diff --git a/R/data.table.R b/R/data.table.R
index 9292ee940..933f30a52 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -567,8 +567,10 @@ replace_dot_alias = function(e) {
         else stop("i evaluates to a logical vector length ", length(i), " but there are ", nrow(x), " rows. Recycling of logical i is no longer allowed as it hides more bugs than is worth the rare convenience. Explicitly use rep(...,length=.N) if you really need to recycle.")
       } else {
         irows = as.integer(i)  # e.g. DT[c(1,3)] and DT[c(-1,-3)] ok but not DT[c(1,-3)] (caught as error)
-        irows = .Call(CconvertNegAndZeroIdx, irows, nrow(x), is.null(jsub) || root!=":=")  # last argument is allowOverMax (NA when selecting, error when assigning)
-        # simplifies logic from here on: can assume positive subscripts (no zeros)
+        irows = .Call(CconvertNegAndZeroIdx, irows, nrow(x),
+                      is.null(jsub) || root!=":=",   # allowOverMax (NA when selecting, error when assigning)
+                      identical(nomatch,0L))         # nomatchArg, keeps only shuffle #3109
+        # simplifies logic from here on: can assume positive subscripts (no zeros), for a nomatch=0L also no NAs
         # maintains Arun's fix for #2697 (test 1042)
         # efficient in C with more detailed helpful messages when user mixes positives and negatives
         # falls through quickly (no R level allocs) if all items are within range [1,max] with no zeros or negatives
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 7cc6819e8..a26e26c36 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -16846,3 +16846,18 @@ A = data.table(A=c(complex(real = 1:3, imaginary=c(0, -1, 1)), NaN))
 test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A)))
 A = data.table(A=as.complex(rep(NA, 5)))
 test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A)))
+
+# Respect `nomatch = 0L` for integer i #3109, also #3666
+dt = data.table(x = 1:4)
+test(2139.01, dt[c(1L, 5L, NA_integer_)], data.table(x=c(1L,NA_integer_,NA_integer_))) # test the default nomatch
+test(2139.02, dt[c(1L, 5L, NA_integer_), nomatch=NULL], data.table(x = 1L))
+test(2139.03, dt[c(1L, 5L, NA_integer_), x, nomatch=NULL], 1L)
+test(2139.04, dt[c(1:4,1:4), nomatch=NULL], data.table(x=c(1:4,1:4))) # early stopping convertNegAndZeroIdx
+test(2139.05, dt[c(1:4,-1L), nomatch=NULL], error="i argument must not be negative")
+test(2139.06, dt[c(1:4,NA_integer_,-1L), nomatch=NULL], error="i argument must not be negative")
+test(2139.07, dt[NA, nomatch=NULL], data.table(x=integer()))
+test(2139.08, dt[0L, nomatch=NULL], data.table(x=integer()))
+test(2139.09, dt[0:1, nomatch=NULL], data.table(x=1L))
+test(2139.10, dt[-1L, nomatch=NULL], error="i argument must not be negative")
+test(2139.11, data.table()[1L, nomatch=NULL], data.table())
+test(2139.12, data.table()[-1L, nomatch=NULL], error="i argument must not be negative")
diff --git a/src/subset.c b/src/subset.c
index d9fea2800..bde0033db 100644
--- a/src/subset.c
+++ b/src/subset.c
@@ -106,51 +106,79 @@ static const char *check_idx(SEXP idx, int max, bool *anyNA_out, bool *orderedSu
   return NULL;
 }
 
-SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax)
+SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax, SEXP nomatchArg)
 {
   // called from [.data.table to massage user input, creating a new strictly positive idx if there are any negatives or zeros
   // + more precise and helpful error messages telling user exactly where the problem is (saving user debugging time)
   // + a little more efficient than negativeSubscript in src/main/subscript.c (it's private to R so we can't call it anyway)
   // allowOverMaxArg is false when := (test 1024), otherwise true for selecting
+  // nomatchArg is true (nomatch==0L) when we want to skip nomatch'ing rows, it also excludes NAs #3109
 
   if (!isInteger(idx)) error(_("Internal error. 'idx' is type '%s' not 'integer'"), type2char(TYPEOF(idx))); // # nocov
   if (!isInteger(maxArg) || length(maxArg)!=1) error(_("Internal error. 'maxArg' is type '%s' and length %d, should be an integer singleton"), type2char(TYPEOF(maxArg)), length(maxArg)); // # nocov
   if (!isLogical(allowOverMax) || LENGTH(allowOverMax)!=1 || LOGICAL(allowOverMax)[0]==NA_LOGICAL) error(_("Internal error: allowOverMax must be TRUE/FALSE"));  // # nocov
-  int max = INTEGER(maxArg)[0], n=LENGTH(idx);
+  if (!isLogical(nomatchArg) || LENGTH(nomatchArg)!=1 || LOGICAL(nomatchArg)[0]==NA_LOGICAL) error(_("Internal error: nomatchArg must be TRUE/FALSE"));  // # nocov
+  int max = INTEGER(maxArg)[0], n=LENGTH(idx), nomatch = LOGICAL(nomatchArg)[0];
   if (max<0) error(_("Internal error. max is %d, must be >= 0."), max); // # nocov    includes NA which will print as INT_MIN
   int *idxp = INTEGER(idx);
 
   bool stop = false;
-  #pragma omp parallel for num_threads(getDTthreads())
-  for (int i=0; i<n; i++) {
-    if (stop) continue;
-    int elem = idxp[i];
-    if ((elem<1 && elem!=NA_INTEGER) || elem>max) stop=true;
+  if (!nomatch) {
+    #pragma omp parallel for num_threads(getDTthreads())
+    for (int i=0; i<n; i++) {
+      if (stop)
+        continue;
+      int elem = idxp[i];
+      if ((elem<1 && elem!=NA_INTEGER) || elem>max)
+        stop = true;
+    }
+  } else { // nomatch
+    #pragma omp parallel for num_threads(getDTthreads())
+    for (int i=0; i<n; i++) {
+      if (stop)
+        continue;
+      int elem = idxp[i];
+      if (elem<1 || elem>max)
+        stop = true;
+    }
   }
-  if (!stop) return(idx); // most common case to return early: no 0, no negative; all idx either NA or in range [1-max]
+  if (!stop)
+    return(idx); // most common case to return early: no 0, no negative; all idx either NA (for !nomatch) or in range [1-max]
 
   // ---------
-  // else massage the input to a standard idx where all items are either NA or in range [1,max] ...
+  // else massage the input to a standard idx where all items are either NA (for !nomatch) or in range [1,max] ...
 
-  int countNeg=0, countZero=0, countNA=0, firstOverMax=0;
+  int countNeg=0, countZero=0, countNA=0, countOverMax=0, firstOverMax=0;
   for (int i=0; i<n; i++) {
     int elem = idxp[i];
-    if (elem==NA_INTEGER) countNA++;
-    else if (elem<0) countNeg++;
-    else if (elem==0) countZero++;
-    else if (elem>max && firstOverMax==0) firstOverMax=i+1;
+    if (elem==NA_INTEGER)
+      countNA++;
+    else if (elem<0)
+      countNeg++;
+    else if (elem==0)
+      countZero++;
+    else if (elem>max) {
+      countOverMax++;
+      if (firstOverMax==0)
+        firstOverMax=i+1;
+    }
   }
   if (firstOverMax && LOGICAL(allowOverMax)[0]==FALSE) {
     error(_("i[%d] is %d which is out of range [1,nrow=%d]"), firstOverMax, idxp[firstOverMax-1], max);
   }
+  if (countNeg && nomatch) {
+    error(_("i argument must not be negative when used together with nomatch=NULL"));
+  }
 
   int countPos = n-countNeg-countZero-countNA;
   if (countPos && countNeg) {
     int i=0, firstNeg=0, firstPos=0;
     while (i<n && (firstNeg==0 || firstPos==0)) {
       int elem = idxp[i];
-      if (firstPos==0 && elem>0) firstPos=i+1;
-      if (firstNeg==0 && elem<0 && elem!=NA_INTEGER) firstNeg=i+1;
+      if (firstPos==0 && elem>0)
+        firstPos=i+1;
+      if (firstNeg==0 && elem<0 && elem!=NA_INTEGER)
+        firstNeg=i+1;
       i++;
     }
     error(_("Item %d of i is %d and item %d is %d. Cannot mix positives and negatives."), firstNeg, idxp[firstNeg-1], firstPos, idxp[firstPos-1]);
@@ -159,40 +187,56 @@ SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax)
     int i=0, firstNeg=0, firstNA=0;
     while (i<n && (firstNeg==0 || firstNA==0)) {
       int elem = idxp[i];
-      if (firstNeg==0 && elem<0 && elem!=NA_INTEGER) firstNeg=i+1;
-      if (firstNA==0 && elem==NA_INTEGER) firstNA=i+1;
+      if (firstNeg==0 && elem<0 && elem!=NA_INTEGER)
+        firstNeg=i+1;
+      if (firstNA==0 && elem==NA_INTEGER)
+        firstNA=i+1;
       i++;
     }
     error(_("Item %d of i is %d and item %d is NA. Cannot mix negatives and NA."), firstNeg, idxp[firstNeg-1], firstNA);
   }
 
   SEXP ans;
-  if (countNeg==0) {
+  if (nomatch) { // also countNeg==0 checked before
+    ans = PROTECT(allocVector(INTSXP, n-countZero-countNA-countOverMax));
+    int *ansp = INTEGER(ans);
+    for (int i=0, ansi=0; i<n; i++) {
+      int elem = idxp[i];
+      if (elem<1 || elem>max)
+        continue;
+      ansp[ansi++] = elem;
+    }
+  } else if (countNeg==0) {
     // just zeros to remove, or >max to convert to NA
     ans = PROTECT(allocVector(INTSXP, n - countZero));
     int *ansp = INTEGER(ans);
     for (int i=0, ansi=0; i<n; i++) {
       int elem = idxp[i];
-      if (elem==0) continue;
+      if (elem==0)
+        continue;
       ansp[ansi++] = elem>max ? NA_INTEGER : elem;
     }
   } else {
     // idx is all negative without any NA but perhaps some zeros
     bool *keep = (bool *)R_alloc(max, sizeof(bool));    // 4 times less memory that INTSXP in src/main/subscript.c
-    for (int i=0; i<max; i++) keep[i] = true;
+    for (int i=0; i<max; i++)
+      keep[i] = true;
     int countRemoved=0, countDup=0, countBeyond=0;   // idx=c(-10,-5,-10) removing row 10 twice
     int firstBeyond=0, firstDup=0;
     for (int i=0; i<n; i++) {
       int elem = -idxp[i];
-      if (elem==0) continue;
+      if (elem==0)
+        continue;
       if (elem>max) {
         countBeyond++;
-        if (firstBeyond==0) firstBeyond=i+1;
+        if (firstBeyond==0)
+          firstBeyond=i+1;
         continue;
       }
       if (!keep[elem-1]) {
         countDup++;
-        if (firstDup==0) firstDup=i+1;
+        if (firstDup==0)
+          firstDup=i+1;
       } else {
         keep[elem-1] = false;
         countRemoved++;
@@ -206,7 +250,8 @@ SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax)
     ans = PROTECT(allocVector(INTSXP, ansn));
     int *ansp = INTEGER(ans);
     for (int i=0, ansi=0; i<max; i++) {
-      if (keep[i]) ansp[ansi++] = i+1;
+      if (keep[i])
+        ansp[ansi++] = i+1;
     }
   }
   UNPROTECT(1);
@@ -247,7 +292,7 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md
   bool anyNA=false, orderedSubset=true;   // true for when rows==null (meaning all rows)
   if (!isNull(rows) && check_idx(rows, nrow, &anyNA, &orderedSubset)!=NULL) {
     SEXP max = PROTECT(ScalarInteger(nrow)); nprotect++;
-    rows = PROTECT(convertNegAndZeroIdx(rows, max, ScalarLogical(TRUE))); nprotect++;
+    rows = PROTECT(convertNegAndZeroIdx(rows, max, ScalarLogical(TRUE), ScalarLogical(FALSE))); nprotect++;
     const char *err = check_idx(rows, nrow, &anyNA, &orderedSubset);
     if (err!=NULL) error(err);
   }

From 4aa050778cce87bf483aa5b6bdeaa3c76bea3d97 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Sat, 4 Apr 2020 02:09:11 +0100
Subject: [PATCH 2/8] afix test 798, notjoin and negative subset

---
 inst/tests/tests.Rraw | 19 ++++++++++---------
 src/subset.c          | 41 ++++++++++++++++++++---------------------
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index a26e26c36..b0f1abb1f 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -2217,7 +2217,7 @@ test(794, DT[!"b"], DT[c("a","c")])
 test(795, DT[!0], DT)
 test(796, DT[!NULL], DT[NULL])
 test(797, DT[!integer()], DT)
-test(798, DT[!-1], DT[1])
+test(798, DT[!-1], DT[1]) # this passed nomatch==0 to convertNegAndZeroIdx #3109
 test(799, DT[--1], DT[1])
 myi = c("a","c")
 test(800, DT[!myi], DT["b"])
@@ -16853,11 +16853,12 @@ test(2139.01, dt[c(1L, 5L, NA_integer_)], data.table(x=c(1L,NA_integer_,NA_integ
 test(2139.02, dt[c(1L, 5L, NA_integer_), nomatch=NULL], data.table(x = 1L))
 test(2139.03, dt[c(1L, 5L, NA_integer_), x, nomatch=NULL], 1L)
 test(2139.04, dt[c(1:4,1:4), nomatch=NULL], data.table(x=c(1:4,1:4))) # early stopping convertNegAndZeroIdx
-test(2139.05, dt[c(1:4,-1L), nomatch=NULL], error="i argument must not be negative")
-test(2139.06, dt[c(1:4,NA_integer_,-1L), nomatch=NULL], error="i argument must not be negative")
-test(2139.07, dt[NA, nomatch=NULL], data.table(x=integer()))
-test(2139.08, dt[0L, nomatch=NULL], data.table(x=integer()))
-test(2139.09, dt[0:1, nomatch=NULL], data.table(x=1L))
-test(2139.10, dt[-1L, nomatch=NULL], error="i argument must not be negative")
-test(2139.11, data.table()[1L, nomatch=NULL], data.table())
-test(2139.12, data.table()[-1L, nomatch=NULL], error="i argument must not be negative")
+test(2139.05, dt[c(1:4,-1L), nomatch=NULL], error="Cannot mix positives and negatives")
+test(2139.06, dt[c(1:4,NA_integer_,-1L), nomatch=NULL], error="Cannot mix positives and negatives")
+test(2139.07, dt[c(-1L,NA_integer_), nomatch=NULL], error="Cannot mix negatives and NA")
+test(2139.08, dt[NA, nomatch=NULL], data.table(x=integer()))
+test(2139.09, dt[0L, nomatch=NULL], data.table(x=integer()))
+test(2139.10, dt[0:1, nomatch=NULL], data.table(x=1L))
+test(2139.11, dt[-1L, nomatch=NULL], data.table(x=2:4))
+test(2139.12, data.table()[1L, nomatch=NULL], data.table())
+test(2139.13, data.table()[-1L, nomatch=NULL], data.table(), warning="there are only 0 rows")
diff --git a/src/subset.c b/src/subset.c
index bde0033db..f9366b47b 100644
--- a/src/subset.c
+++ b/src/subset.c
@@ -166,9 +166,6 @@ SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax, SEXP nomatch
   if (firstOverMax && LOGICAL(allowOverMax)[0]==FALSE) {
     error(_("i[%d] is %d which is out of range [1,nrow=%d]"), firstOverMax, idxp[firstOverMax-1], max);
   }
-  if (countNeg && nomatch) {
-    error(_("i argument must not be negative when used together with nomatch=NULL"));
-  }
 
   int countPos = n-countNeg-countZero-countNA;
   if (countPos && countNeg) {
@@ -197,24 +194,26 @@ SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax, SEXP nomatch
   }
 
   SEXP ans;
-  if (nomatch) { // also countNeg==0 checked before
-    ans = PROTECT(allocVector(INTSXP, n-countZero-countNA-countOverMax));
-    int *ansp = INTEGER(ans);
-    for (int i=0, ansi=0; i<n; i++) {
-      int elem = idxp[i];
-      if (elem<1 || elem>max)
-        continue;
-      ansp[ansi++] = elem;
-    }
-  } else if (countNeg==0) {
-    // just zeros to remove, or >max to convert to NA
-    ans = PROTECT(allocVector(INTSXP, n - countZero));
-    int *ansp = INTEGER(ans);
-    for (int i=0, ansi=0; i<n; i++) {
-      int elem = idxp[i];
-      if (elem==0)
-        continue;
-      ansp[ansi++] = elem>max ? NA_INTEGER : elem;
+  if (countNeg==0) {
+    if (nomatch) {
+      ans = PROTECT(allocVector(INTSXP, n-countZero-countNA-countOverMax));
+      int *ansp = INTEGER(ans);
+      for (int i=0, ansi=0; i<n; i++) {
+        int elem = idxp[i];
+        if (elem<1 || elem>max)
+          continue;
+        ansp[ansi++] = elem;
+      }
+    } else {
+      // just zeros to remove, or >max to convert to NA
+      ans = PROTECT(allocVector(INTSXP, n - countZero));
+      int *ansp = INTEGER(ans);
+      for (int i=0, ansi=0; i<n; i++) {
+        int elem = idxp[i];
+        if (elem==0)
+          continue;
+        ansp[ansi++] = elem>max ? NA_INTEGER : elem;
+      }
     }
   } else {
     // idx is all negative without any NA but perhaps some zeros

From 8879054889b1194028b487ad605298a84d59d216 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Sat, 4 Apr 2020 02:12:56 +0100
Subject: [PATCH 3/8] use proper nomatch value

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 9662351a7..4bf31abe8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -85,7 +85,7 @@ unit = "s")
 
 ```r
 DT = data.table(x = 1:4)
-DT[c(1L, NA_integer_, 3L, 5L), nomatch = 0L]
+DT[c(1L, NA_integer_, 3L, 5L), nomatch = NULL]
 #       x
 #   <int>
 #1:     1

From 8c7ff0a27fc71ec513ca8730732f62316e3dba63 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Sat, 4 Apr 2020 11:16:07 +0100
Subject: [PATCH 4/8] fix missing codecov

---
 inst/tests/tests.Rraw | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index b0f1abb1f..a605dc83e 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -16862,3 +16862,6 @@ test(2139.10, dt[0:1, nomatch=NULL], data.table(x=1L))
 test(2139.11, dt[-1L, nomatch=NULL], data.table(x=2:4))
 test(2139.12, data.table()[1L, nomatch=NULL], data.table())
 test(2139.13, data.table()[-1L, nomatch=NULL], data.table(), warning="there are only 0 rows")
+test(2139.14, dt[-c(1L,0L)], data.table(x=2:4)) # codecov gap, not related to nomatch
+test(2139.15, dt[-c(1L,0L), nomatch=NULL], data.table(x=2:4))
+

From 7434ed6a1ede1c08be543cfe354c6be9559ec912 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Sat, 21 Aug 2021 13:47:45 -0600
Subject: [PATCH 5/8] undo line splitting so I can see what logic changes are;
 pending C same-line-branch codecov
 https://github.com/Rdatatable/data.table/pull/4301#issuecomment-903161605

---
 src/subset.c | 39 +++++++++++++--------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/src/subset.c b/src/subset.c
index 576428a3c..b601eb5e7 100644
--- a/src/subset.c
+++ b/src/subset.c
@@ -160,8 +160,7 @@ SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax, SEXP nomatch
         stop = true;
     }
   }
-  if (!stop)
-    return(idx); // most common case to return early: no 0, no negative; all idx either NA (for !nomatch) or in range [1-max]
+  if (!stop) return(idx); // most common case to return early: no 0, no negative; all idx either NA (for !nomatch) or in range [1-max]
 
   // ---------
   // else massage the input to a standard idx where all items are either NA (for !nomatch) or in range [1,max] ...
@@ -169,12 +168,9 @@ SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax, SEXP nomatch
   int countNeg=0, countZero=0, countNA=0, countOverMax=0, firstOverMax=0;
   for (int i=0; i<n; i++) {
     int elem = idxp[i];
-    if (elem==NA_INTEGER)
-      countNA++;
-    else if (elem<0)
-      countNeg++;
-    else if (elem==0)
-      countZero++;
+    if (elem==NA_INTEGER) countNA++;
+    else if (elem<0) countNeg++;
+    else if (elem==0) countZero++;
     else if (elem>max) {
       countOverMax++;
       if (firstOverMax==0)
@@ -190,10 +186,8 @@ SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax, SEXP nomatch
     int i=0, firstNeg=0, firstPos=0;
     while (i<n && (firstNeg==0 || firstPos==0)) {
       int elem = idxp[i];
-      if (firstPos==0 && elem>0)
-        firstPos=i+1;
-      if (firstNeg==0 && elem<0 && elem!=NA_INTEGER)
-        firstNeg=i+1;
+      if (firstPos==0 && elem>0) firstPos=i+1;
+      if (firstNeg==0 && elem<0 && elem!=NA_INTEGER) firstNeg=i+1;
       i++;
     }
     error(_("Item %d of i is %d and item %d is %d. Cannot mix positives and negatives."), firstNeg, idxp[firstNeg-1], firstPos, idxp[firstPos-1]);
@@ -202,10 +196,8 @@ SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax, SEXP nomatch
     int i=0, firstNeg=0, firstNA=0;
     while (i<n && (firstNeg==0 || firstNA==0)) {
       int elem = idxp[i];
-      if (firstNeg==0 && elem<0 && elem!=NA_INTEGER)
-        firstNeg=i+1;
-      if (firstNA==0 && elem==NA_INTEGER)
-        firstNA=i+1;
+      if (firstNeg==0 && elem<0 && elem!=NA_INTEGER) firstNeg=i+1;
+      if (firstNA==0 && elem==NA_INTEGER) firstNA=i+1;
       i++;
     }
     error(_("Item %d of i is %d and item %d is NA. Cannot mix negatives and NA."), firstNeg, idxp[firstNeg-1], firstNA);
@@ -236,24 +228,20 @@ SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax, SEXP nomatch
   } else {
     // idx is all negative without any NA but perhaps some zeros
     bool *keep = (bool *)R_alloc(max, sizeof(bool));    // 4 times less memory that INTSXP in src/main/subscript.c
-    for (int i=0; i<max; i++)
-      keep[i] = true;
+    for (int i=0; i<max; i++) keep[i] = true;
     int countRemoved=0, countDup=0, countBeyond=0;   // idx=c(-10,-5,-10) removing row 10 twice
     int firstBeyond=0, firstDup=0;
     for (int i=0; i<n; i++) {
       int elem = -idxp[i];
-      if (elem==0)
-        continue;
+      if (elem==0) continue;
       if (elem>max) {
         countBeyond++;
-        if (firstBeyond==0)
-          firstBeyond=i+1;
+        if (firstBeyond==0) firstBeyond=i+1;
         continue;
       }
       if (!keep[elem-1]) {
         countDup++;
-        if (firstDup==0)
-          firstDup=i+1;
+        if (firstDup==0) firstDup=i+1;
       } else {
         keep[elem-1] = false;
         countRemoved++;
@@ -267,8 +255,7 @@ SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax, SEXP nomatch
     ans = PROTECT(allocVector(INTSXP, ansn));
     int *ansp = INTEGER(ans);
     for (int i=0, ansi=0; i<max; i++) {
-      if (keep[i])
-        ansp[ansi++] = i+1;
+      if (keep[i]) ansp[ansi++] = i+1;
     }
   }
   UNPROTECT(1);

From 98021b9acc34f7d6d31236aef6e71848bca5d3ed Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Sat, 21 Aug 2021 23:12:01 -0600
Subject: [PATCH 6/8] reduce new code, rename nomatch to allowNA

---
 R/data.table.R |  2 +-
 src/subset.c   | 73 +++++++++++++++++++-------------------------------
 2 files changed, 29 insertions(+), 46 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 3f90abbcb..adc7198f9 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -621,7 +621,7 @@ replace_dot_alias = function(e) {
         irows = as.integer(i)  # e.g. DT[c(1,3)] and DT[c(-1,-3)] ok but not DT[c(1,-3)] (caught as error)
         irows = .Call(CconvertNegAndZeroIdx, irows, nrow(x),
                       is.null(jsub) || root!=":=",   # allowOverMax (NA when selecting, error when assigning)
-                      identical(nomatch,0L))         # nomatchArg, keeps only shuffle #3109
+                      is.na(nomatch))                # allowNA=false when nomatch=NULL (converted to 0L at the top currently) #3109 #3666
         # simplifies logic from here on: can assume positive subscripts (no zeros), for a nomatch=0L also no NAs
         # maintains Arun's fix for #2697 (test 1042)
         # efficient in C with more detailed helpful messages when user mixes positives and negatives
diff --git a/src/subset.c b/src/subset.c
index b601eb5e7..a04fcc9ec 100644
--- a/src/subset.c
+++ b/src/subset.c
@@ -124,58 +124,42 @@ const char *check_idx(SEXP idx, int max, bool *anyNA_out, bool *orderedSubset_ou
   return NULL;
 }
 
-SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax, SEXP nomatchArg)
+SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax, SEXP allowNAArg)
 {
   // called from [.data.table to massage user input, creating a new strictly positive idx if there are any negatives or zeros
   // + more precise and helpful error messages telling user exactly where the problem is (saving user debugging time)
   // + a little more efficient than negativeSubscript in src/main/subscript.c (it's private to R so we can't call it anyway)
   // allowOverMaxArg is false when := (test 1024), otherwise true for selecting
-  // nomatchArg is true (nomatch==0L) when we want to skip nomatch'ing rows, it also excludes NAs #3109
+  // allowNAArg is false when nomatch=NULL #3109 #3666
 
   if (!isInteger(idx)) error(_("Internal error. 'idx' is type '%s' not 'integer'"), type2char(TYPEOF(idx))); // # nocov
   if (!isInteger(maxArg) || length(maxArg)!=1) error(_("Internal error. 'maxArg' is type '%s' and length %d, should be an integer singleton"), type2char(TYPEOF(maxArg)), length(maxArg)); // # nocov
   if (!isLogical(allowOverMax) || LENGTH(allowOverMax)!=1 || LOGICAL(allowOverMax)[0]==NA_LOGICAL) error(_("Internal error: allowOverMax must be TRUE/FALSE"));  // # nocov
-  if (!isLogical(nomatchArg) || LENGTH(nomatchArg)!=1 || LOGICAL(nomatchArg)[0]==NA_LOGICAL) error(_("Internal error: nomatchArg must be TRUE/FALSE"));  // # nocov
-  int max = INTEGER(maxArg)[0], n=LENGTH(idx), nomatch = LOGICAL(nomatchArg)[0];
+  const int max = INTEGER(maxArg)[0], n=LENGTH(idx);
   if (max<0) error(_("Internal error. max is %d, must be >= 0."), max); // # nocov    includes NA which will print as INT_MIN
-  int *idxp = INTEGER(idx);
+  if (!isLogical(allowNAArg) || LENGTH(allowNAArg)!=1 || LOGICAL(allowNAArg)[0]==NA_LOGICAL) error(_("Internal error: allowNAArg must be TRUE/FALSE"));  // # nocov
+  const bool allowNA = LOGICAL(allowNAArg)[0];
 
+  const int *idxp = INTEGER(idx);
   bool stop = false;
-  if (!nomatch) {
-    #pragma omp parallel for num_threads(getDTthreads(n, true))
-    for (int i=0; i<n; i++) {
-      if (stop)
-        continue;
-      int elem = idxp[i];
-      if ((elem<1 && elem!=NA_INTEGER) || elem>max)
-        stop = true;
-    }
-  } else {
-    #pragma omp parallel for num_threads(getDTthreads(n, true))
-    for (int i=0; i<n; i++) {
-      if (stop)
-        continue;
-      int elem = idxp[i];
-      if (elem<1 || elem>max)
-        stop = true;
-    }
+  #pragma omp parallel for num_threads(getDTthreads(n, true))
+  for (int i=0; i<n; ++i) {
+    if (stop) continue;
+    int elem = idxp[i];
+    if ((elem<1 && (elem!=NA_INTEGER || !allowNA)) || elem>max) stop=true;
   }
-  if (!stop) return(idx); // most common case to return early: no 0, no negative; all idx either NA (for !nomatch) or in range [1-max]
+  if (!stop) return(idx); // most common case to return early: no 0, no negative; all idx either NA (if allowNA) or in range [1-max]
 
   // ---------
-  // else massage the input to a standard idx where all items are either NA (for !nomatch) or in range [1,max] ...
+  // else massage the input to a standard idx where all items are either in range [1,max], or NA (if allowNA)
 
-  int countNeg=0, countZero=0, countNA=0, countOverMax=0, firstOverMax=0;
-  for (int i=0; i<n; i++) {
+  int countNeg=0, countZero=0, countNA=0, firstOverMax=0, countOverMax=0;
+  for (int i=0; i<n; ++i) {
     int elem = idxp[i];
     if (elem==NA_INTEGER) countNA++;
     else if (elem<0) countNeg++;
     else if (elem==0) countZero++;
-    else if (elem>max) {
-      countOverMax++;
-      if (firstOverMax==0)
-        firstOverMax=i+1;
-    }
+    else if (elem>max && ++countOverMax && firstOverMax==0) firstOverMax=i+1;
   }
   if (firstOverMax && LOGICAL(allowOverMax)[0]==FALSE) {
     error(_("i[%d] is %d which is out of range [1,nrow=%d]"), firstOverMax, idxp[firstOverMax-1], max);
@@ -205,24 +189,23 @@ SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax, SEXP nomatch
 
   SEXP ans;
   if (countNeg==0) {
-    if (nomatch) {
-      ans = PROTECT(allocVector(INTSXP, n-countZero-countNA-countOverMax));
+    if (allowNA) {
+      // remove zeros, convert >max to NA
+      ans = PROTECT(allocVector(INTSXP, n-countZero));
       int *ansp = INTEGER(ans);
-      for (int i=0, ansi=0; i<n; i++) {
+      for (int i=0, ansi=0; i<n; ++i) {
         int elem = idxp[i];
-        if (elem<1 || elem>max)
-          continue;
-        ansp[ansi++] = elem;
+        if (elem==0) continue;
+        ansp[ansi++] = elem>max ? NA_INTEGER : elem;
       }
     } else {
-      // just zeros to remove, or >max to convert to NA
-      ans = PROTECT(allocVector(INTSXP, n - countZero));
+      // remove zeros, NA and >max
+      ans = PROTECT(allocVector(INTSXP, n-countZero-countNA-countOverMax));
       int *ansp = INTEGER(ans);
-      for (int i=0, ansi=0; i<n; i++) {
+      for (int i=0, ansi=0; i<n; ++i) {
         int elem = idxp[i];
-        if (elem==0)
-          continue;
-        ansp[ansi++] = elem>max ? NA_INTEGER : elem;
+        if (elem<1 || elem>max) continue;
+        ansp[ansi++] = elem;
       }
     }
   } else {
@@ -296,7 +279,7 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md
   bool anyNA=false, orderedSubset=true;   // true for when rows==null (meaning all rows)
   if (!isNull(rows) && check_idx(rows, nrow, &anyNA, &orderedSubset)!=NULL) {
     SEXP max = PROTECT(ScalarInteger(nrow)); nprotect++;
-    rows = PROTECT(convertNegAndZeroIdx(rows, max, ScalarLogical(TRUE), ScalarLogical(FALSE))); nprotect++;
+    rows = PROTECT(convertNegAndZeroIdx(rows, max, ScalarLogical(TRUE), ScalarLogical(TRUE))); nprotect++;
     const char *err = check_idx(rows, nrow, &anyNA, &orderedSubset);
     if (err!=NULL) error(err);
   }

From 4ffe3d0a96dd7af80229ff74446275e683580800 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Sun, 22 Aug 2021 13:06:49 -0600
Subject: [PATCH 7/8] reworked news item, warning for nomatch=0 just in this
 case which was ignored before, convert nomatch=0 to NULL instead of NULL to 0
 in internals

---
 NEWS.md                 | 21 ++++++++++++------
 R/data.table.R          | 37 +++++++++++++++----------------
 inst/tests/tests.Rraw   | 48 +++++++++++++++++++++++++----------------
 src/bmerge.c            |  3 +--
 src/nqrecreateindices.c |  6 +++---
 5 files changed, 67 insertions(+), 48 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index e82ba16ca..f0581ccb5 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -115,15 +115,22 @@
     mtcars |> DT(mpg>20, .(mean_hp=mean(hp)), by=cyl)
     ```
     
-23. It is now possible to use `nomatch=NULL` argument and integer `i` to specify rows to subset but return only those rows which exists in data.table, closes [#3109](https://github.com/Rdatatable/data.table/issues/3109), [#3666](https://github.com/Rdatatable/data.table/issues/3666). Thanks @mllg and @hadley for feature request.
+23. `DT[i, nomatch=NULL]` where `i` contains row numbers now excludes `NA` and any outside the range [1,nrow], [#3109](https://github.com/Rdatatable/data.table/issues/3109) [#3666](https://github.com/Rdatatable/data.table/issues/3666). Before, `NA` rows were returned always for such values; i.e. `nomatch=0|NULL` was ignored. Thanks Michel Lang and Hadley Wickham for the requests, and Jan Gorecki for the PR. Using `nomatch=0` in this case when `i` is row numbers generates the warning `Please use nomatch=NULL instead of nomatch=0; see news item 5 in v1.12.0 (Jan 2019)`.
 
     ```R
-    DT = data.table(x = 1:4)
-    DT[c(1L, NA_integer_, 3L, 5L), nomatch = NULL]
-    #       x
-    #   <int>
-    #1:     1
-    #2:     3
+    DT = data.table(A=1:3)
+    DT[c(1L, NA, 3L, 5L)]  # default nomatch=NA
+    #        A
+    #    <int>
+    # 1:     1
+    # 2:    NA
+    # 3:     3
+    # 4:    NA
+    DT[c(1L, NA, 3L, 5L), nomatch=NULL]
+    #        A
+    #    <int>
+    # 1:     1
+    # 2:     3
     ```
 
 ## BUG FIXES
diff --git a/R/data.table.R b/R/data.table.R
index e8bf7ca69..10a138129 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -221,12 +221,12 @@ replace_dot_alias = function(e) {
   # TO DO (document/faq/example). Removed for now ... if ((roll || rolltolast) && missing(mult)) mult="last" # for when there is exact match to mult. This does not control cases where the roll is mult, that is always the last one.
   .unsafe.opt() #3585
   missingnomatch = missing(nomatch)
-  if (is.null(nomatch)) nomatch = 0L # allow nomatch=NULL API already now, part of: https://github.com/Rdatatable/data.table/issues/857
-  if (!is.na(nomatch) && nomatch!=0L) stopf("nomatch= must be either NA or NULL (or 0 for backwards compatibility which is the same as NULL)")
-  nomatch = as.integer(nomatch)
+  nomatch0 = identical(nomatch,0) || identical(nomatch,0L)  # for warning with row-numbers in i; #4353
+  if (nomatch0) nomatch=NULL  # retain nomatch=0 backwards compatibility; #857
+  if (!is.na(nomatch) && !is.null(nomatch)) stopf("nomatch= must be either NA or NULL (or 0 for backwards compatibility which is the same as NULL but please use NULL)")
   if (!is.logical(which) || length(which)>1L) stopf("which= must be a logical vector length 1. Either FALSE, TRUE or NA.")
   if ((isTRUE(which)||is.na(which)) && !missing(j)) stopf("which==%s (meaning return row numbers) but j is also supplied. Either you need row numbers or the result of j, but only one type of result can be returned.", which)
-  if (!is.na(nomatch) && is.na(which)) stopf("which=NA with nomatch=0 would always return an empty vector. Please change or remove either which or nomatch.")
+  if (is.null(nomatch) && is.na(which)) stopf("which=NA with nomatch=0|NULL would always return an empty vector. Please change or remove either which or nomatch.")
   if (!with && missing(j)) stopf("j must be provided when with=FALSE")
   irows = NULL  # Meaning all rows. We avoid creating 1:nrow(x) for efficiency.
   notjoin = FALSE
@@ -313,7 +313,7 @@ replace_dot_alias = function(e) {
         stopf(":= with keyby is only possible when i is not supplied since you can't setkey on a subset of rows. Either change keyby to by or remove i")
       if (!missingnomatch) {
         warningf("nomatch isn't relevant together with :=, ignoring nomatch")
-        nomatch=0L
+        nomatch=NULL
       }
     }
   }
@@ -369,7 +369,7 @@ replace_dot_alias = function(e) {
     if (isub %iscall% "!") {
       notjoin = TRUE
       if (!missingnomatch) stopf("not-join '!' prefix is present on i but nomatch is provided. Please remove nomatch.");
-      nomatch = 0L
+      nomatch = NULL
       isub = isub[[2L]]
       # #932 related so that !(v1 == 1) becomes v1 == 1 instead of (v1 == 1) after removing "!"
       if (isub %iscall% "(" && !is.name(isub[[2L]]))
@@ -389,7 +389,7 @@ replace_dot_alias = function(e) {
       on = o$on
       ## the following two are ignored if i is not a data.table.
       ## Since we are converting i to data.table, it is important to set them properly.
-      nomatch = 0L
+      nomatch = NULL
       mult = "all"
     }
     else if (!is.name(isub)) {
@@ -509,8 +509,7 @@ replace_dot_alias = function(e) {
       len__ = ans$lens
       allGrp1 = all(ops==1L) # was previously 'ans$allGrp1'. Fixing #1991. TODO: Revisit about allGrp1 possibility for speedups in certain cases when I find some time.
       indices__ = if (length(ans$indices)) ans$indices else seq_along(f__) # also for #1991 fix
-      # length of input nomatch (single 0 or NA) is 1 in both cases.
-      # When no match, len__ is 0 for nomatch=0 and 1 for nomatch=NA, so len__ isn't .N
+      # When no match, len__ is 0 for nomatch=NULL and 1 for nomatch=NA, so len__ isn't .N
       # If using secondary key of x, f__ will refer to xo
       if (is.na(which)) {
         w = if (notjoin) f__!=0L else is.na(f__)
@@ -533,7 +532,7 @@ replace_dot_alias = function(e) {
           # Fix for #1092 and #1074
           # TODO: implement better version of "any"/"all"/"which" to avoid
           # unnecessary construction of logical vectors
-          if (identical(nomatch, 0L) && allLen1) irows = irows[irows != 0L]
+          if (is.null(nomatch) && allLen1) irows = irows[irows != 0L]
         } else {
           if (length(xo) && missing(on))
             stopf("Internal error. Cannot by=.EACHI when joining to an index, yet") # nocov
@@ -553,10 +552,10 @@ replace_dot_alias = function(e) {
       } else {
         if (!byjoin) { #1287 and #1271
           irows = f__ # len__ is set to 1 as well, no need for 'pmin' logic
-          if (identical(nomatch,0L)) irows = irows[len__>0L]  # 0s are len 0, so this removes -1 irows
+          if (is.null(nomatch)) irows = irows[len__>0L]  # 0s are len 0, so this removes -1 irows
         }
         # TODO: when nomatch=NA, len__ need not be allocated / set at all for mult="first"/"last"?
-        # TODO: how about when nomatch=0L, can we avoid allocating then as well?
+        # TODO: how about when nomatch=NULL, can we avoid allocating then as well?
       }
       if (length(xo) && length(irows)) {
         irows = xo[irows]   # TO DO: fsort here?
@@ -619,10 +618,12 @@ replace_dot_alias = function(e) {
         else stopf("i evaluates to a logical vector length %d but there are %d rows. Recycling of logical i is no longer allowed as it hides more bugs than is worth the rare convenience. Explicitly use rep(...,length=.N) if you really need to recycle.", length(i), nrow(x))
       } else {
         irows = as.integer(i)  # e.g. DT[c(1,3)] and DT[c(-1,-3)] ok but not DT[c(1,-3)] (caught as error)
+        if (nomatch0) warning("Please use nomatch=NULL instead of nomatch=0; see news item 5 in v1.12.0 (Jan 2019)")
+                      # warning only for this case where nomatch was ignored before v1.14.2; #3109
         irows = .Call(CconvertNegAndZeroIdx, irows, nrow(x),
                       is.null(jsub) || root!=":=",   # allowOverMax (NA when selecting, error when assigning)
-                      is.na(nomatch))                # allowNA=false when nomatch=NULL (converted to 0L at the top currently) #3109 #3666
-        # simplifies logic from here on: can assume positive subscripts (no zeros), for a nomatch=0L also no NAs
+                      !is.null(nomatch))             # allowNA=false when nomatch=NULL #3109, true when nomatch=NA #3666
+        # simplifies logic from here on: can assume positive subscripts (no zeros), for nomatch=NULL also no NAs
         # maintains Arun's fix for #2697 (test 1042)
         # efficient in C with more detailed helpful messages when user mixes positives and negatives
         # falls through quickly (no R level allocs) if all items are within range [1,max] with no zeros or negatives
@@ -630,7 +631,7 @@ replace_dot_alias = function(e) {
       }
     }
     if (notjoin) {
-      if (byjoin || !is.integer(irows) || is.na(nomatch)) stopf("Internal error: notjoin but byjoin or !integer or nomatch==NA") # nocov
+      if (byjoin || !is.integer(irows) || !is.null(nomatch)) stopf("Internal error: notjoin but byjoin or !integer or nomatch==NA") # nocov
       irows = irows[irows!=0L]
       if (verbose) {last.started.at=proc.time();catf("Inverting irows for notjoin done in ... ");flush.console()}
       i = irows = if (length(irows)) seq_len(nrow(x))[-irows] else NULL  # NULL meaning all rows i.e. seq_len(nrow(x))
@@ -845,7 +846,7 @@ replace_dot_alias = function(e) {
           # in 1.8.3, but this failed test 876.
           # TO DO: Add a test like X[i,sum(v),by=i.x2], or where by includes a join column (both where some i don't match).
           # TO DO: Make xss directly, rather than recursive call.
-          if (!is.na(nomatch)) irows = irows[irows!=0L]   # TO DO: can be removed now we have CisSortedSubset
+          if (is.null(nomatch)) irows = irows[irows!=0L]   # TO DO: can be removed now we have CisSortedSubset
           if (length(allbyvars)) {    ###############  TO DO  TO DO  TO DO  ###############
             if (verbose) catf("i clause present and columns used in by detected, only these subset: %s\n", brackify(allbyvars))
             xss = `[.data.table`(x,irows,allbyvars,with=FALSE,nomatch=nomatch,mult=mult,roll=roll,rollends=rollends)
@@ -1292,7 +1293,7 @@ replace_dot_alias = function(e) {
         }
         ans = vector("list", length(ansvars))
         ii = rep.int(indices__, len__) # following #1991 fix
-        # TODO: if (allLen1 && allGrp1 && (is.na(nomatch) || !any(f__==0L))) then ii will be 1:nrow(i)  [nomatch=0 should drop rows in i that have no match]
+        # TODO: if (allLen1 && allGrp1 && (!is.null(nomatch) || !any(f__==0L))) then ii will be 1:nrow(i)  [nomatch=NULL should drop rows in i that have no match]
         #       But rather than that complex logic here at R level to catch that and do a shallow copy for efficiency, just do the check inside CsubsetDT
         #       to see if it passed 1:nrow(x) and then CsubsetDT should do the shallow copy safely and centrally.
         #       That R level branch was taken out in PR #3213
@@ -1723,7 +1724,7 @@ replace_dot_alias = function(e) {
     }
     dotN = function(x) is.name(x) && x==".N" # For #334. TODO: Rprof() showed dotN() may be the culprit if iterated (#1470)?; avoid the == which converts each x to character?
     # FR #971, GForce kicks in on all subsets, no joins yet. Although joins could work with
-    # nomatch=0L even now.. but not switching it on yet, will deal it separately.
+    # nomatch=NULL even now.. but not switching it on yet, will deal it separately.
     if (getOption("datatable.optimize")>=2L && !is.data.table(i) && !byjoin && length(f__) && !length(lhs)) {
       if (!length(ansvars) && !use.I) {
         GForce = FALSE
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 7f3bbb3dc..4522fc693 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -2293,7 +2293,8 @@ test(807, DT[!c("b","foo","c"),nomatch=0], error="not-join.*prefix is present on
 test(808, DT[c("b","foo","c"),which=TRUE,nomatch=NA], INT(3:4,NA,5:6))
 test(809, DT[c("b","foo","c"),which=TRUE,nomatch=0], INT(3:4,5:6))
 test(810, DT[c("b","foo","c"),which=NA,nomatch=NA], 2L)
-test(811, DT[c("b","foo","c"),which=NA,nomatch=0], error="which=NA with nomatch=0 would always return an empty vector[.] Please change or remove either which or nomatch")
+test(811.1, DT[c("b","foo","c"),which=NA,nomatch=0], error="which=NA with nomatch=0|NULL would always return an empty vector[.] Please change or remove either which or nomatch")
+test(811.2, DT[c("b","foo","c"),which=NA,nomatch=NULL], error="which=NA with nomatch=0|NULL would always return an empty vector[.] Please change or remove either which or nomatch")
 
 # New notj for column names and positions when with=FALSE, #1384
 DT = data.table(a=1:3,b=4:6,c=7:9)
@@ -17979,21 +17980,32 @@ test(2209.3, fread('A|B,C\n1|+,4\n2|-,5\n3|-,6\n', dec=','), data.table(A=1:3, '
 test(2209.4, fread('A|B|C\n1|0,4|a\n2|0,5|b\n', dec=','), data.table(A=1:2, B=c(0.4,0.5), C=c("a","b")))  # ok before
 test(2209.5, fread('A|B,C\n1|0,4\n2|0,5\n', dec=','), data.table(A=1:2, "B,C"=c(0.4,0.5)))
 
-# respect `nomatch = 0L` for integer i, #3109 #3666
-dt = data.table(x = 1:4)
-test(2210.01, dt[c(1L, 5L, NA_integer_)], data.table(x=c(1L,NA_integer_,NA_integer_))) # test the default nomatch
-test(2210.02, dt[c(1L, 5L, NA_integer_), nomatch=NULL], data.table(x = 1L))
-test(2210.03, dt[c(1L, 5L, NA_integer_), x, nomatch=NULL], 1L)
-test(2210.04, dt[c(1:4,1:4), nomatch=NULL], data.table(x=c(1:4,1:4))) # early stopping convertNegAndZeroIdx
-test(2210.05, dt[c(1:4,-1L), nomatch=NULL], error="Cannot mix positives and negatives")
-test(2210.06, dt[c(1:4,NA_integer_,-1L), nomatch=NULL], error="Cannot mix positives and negatives")
-test(2210.07, dt[c(-1L,NA_integer_), nomatch=NULL], error="Cannot mix negatives and NA")
-test(2210.08, dt[NA, nomatch=NULL], data.table(x=integer()))
-test(2210.09, dt[0L, nomatch=NULL], data.table(x=integer()))
-test(2210.10, dt[0:1, nomatch=NULL], data.table(x=1L))
-test(2210.11, dt[-1L, nomatch=NULL], data.table(x=2:4))
-test(2210.12, data.table()[1L, nomatch=NULL], data.table())
-test(2210.13, data.table()[-1L, nomatch=NULL], data.table(), warning="there are only 0 rows")
-test(2210.14, dt[-c(1L,0L)], data.table(x=2:4)) # codecov gap, not related to nomatch
-test(2210.15, dt[-c(1L,0L), nomatch=NULL], data.table(x=2:4))
+# respect `nomatch=NULL` for integer i, #3109 #3666
+DT = data.table(x = 1:4)
+test(2210.01, DT[c(1L, 5L, NA_integer_)], data.table(x=c(1L,NA_integer_,NA_integer_))) # default nomatch=NA
+test(2210.02, DT[c(1L, 5L, NA_integer_), nomatch=NULL], data.table(x = 1L))
+test(2210.03, DT[c(1L, 5L, NA_integer_), nomatch=0], data.table(x = 1L), warning="Please use nomatch=NULL")
+test(2210.04, DT[c(1L, 5L, NA_integer_), x, nomatch=NULL], 1L)
+test(2210.05, DT[c(1L, 5L, NA_integer_), x, nomatch=0], 1L, warning="Please use nomatch=NULL")
+test(2210.06, DT[c(1:4,1:4), nomatch=NULL], data.table(x=c(1:4,1:4))) # early stopping convertNegAndZeroIdx
+test(2210.07, DT[c(1:4,-1L), nomatch=NULL], error="Cannot mix positives and negatives")
+test(2210.08, DT[c(1:4,NA_integer_,-1L), nomatch=NULL], error="Cannot mix positives and negatives")
+test(2210.09, DT[c(1:4,NA_integer_,-1L), nomatch=0], error="Cannot mix positives and negatives", warning="Please use nomatch=NULL")
+test(2210.10, DT[c(-1L,NA_integer_), nomatch=NULL], error="Cannot mix negatives and NA")
+test(2210.11, DT[c(-1L,NA_integer_), nomatch=0], error="Cannot mix negatives and NA", warning="Please use nomatch=NULL")
+test(2210.12, DT[NA, nomatch=NULL], data.table(x=integer()))
+test(2210.13, DT[NA, nomatch=0], data.table(x=integer()), warning="Please use nomatch=NULL")
+test(2210.14, DT[0L, nomatch=NULL], data.table(x=integer()))
+test(2210.15, DT[0L, nomatch=0], data.table(x=integer()), warning="Please use nomatch=NULL")
+test(2210.16, DT[0:1, nomatch=NULL], data.table(x=1L))
+test(2210.17, DT[0:1, nomatch=0], data.table(x=1L), warning="Please use nomatch=NULL")
+test(2210.18, DT[-1L, nomatch=NULL], data.table(x=2:4))
+test(2210.19, DT[-1L, nomatch=0], data.table(x=2:4), warning="Please use nomatch=NULL")
+test(2210.20, data.table()[1L, nomatch=NULL], data.table())
+test(2210.21, data.table()[1L, nomatch=0], data.table(), warning="Please use nomatch=NULL")
+test(2210.22, data.table()[-1L, nomatch=NULL], data.table(), warning="there are only 0 rows")
+test(2210.23, data.table()[-1L, nomatch=0], data.table(), warning=c("Please use nomatch=NULL","there are only 0 rows"))
+test(2210.24, DT[-c(1L,0L)], data.table(x=2:4)) # codecov gap, not related to nomatch
+test(2210.25, DT[-c(1L,0L), nomatch=NULL], data.table(x=2:4))
+test(2210.26, DT[-c(1L,0L), nomatch=0], data.table(x=2:4), warning="Please use nomatch=NULL")
 
diff --git a/src/bmerge.c b/src/bmerge.c
index 83c5b167d..fac7ee281 100644
--- a/src/bmerge.c
+++ b/src/bmerge.c
@@ -85,8 +85,7 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP isorted, SEXP
     error(_("rollends must be a length 2 logical vector"));
   rollends = LOGICAL(rollendsArg);
 
-  // nomatch arg
-  nomatch = INTEGER(nomatchArg)[0];
+  nomatch = isNull(nomatchArg) ? 0 : INTEGER(nomatchArg)[0];
 
   // mult arg
   if (!strcmp(CHAR(STRING_ELT(multArg, 0)), "all")) mult = ALL;
diff --git a/src/nqrecreateindices.c b/src/nqrecreateindices.c
index a07ce38c7..a750c093e 100644
--- a/src/nqrecreateindices.c
+++ b/src/nqrecreateindices.c
@@ -14,7 +14,7 @@ SEXP nqRecreateIndices(SEXP xo, SEXP len, SEXP indices, SEXP nArg, SEXP nomatch)
   const int *iindices = INTEGER(indices);
   const int *ilen = INTEGER(len);
   const int *ixo = INTEGER(xo);
-  const int *inomatch = INTEGER(nomatch);
+  const int inomatch = isNull(nomatch) ? 0 : INTEGER(nomatch)[0];
   int *inewstarts = INTEGER(newstarts);
 
   for (int i=0; i<n; ++i) inewlen[i] = 0;
@@ -28,8 +28,8 @@ SEXP nqRecreateIndices(SEXP xo, SEXP len, SEXP indices, SEXP nArg, SEXP nomatch)
   for (int i=0; i<n; ++i) {
     if (ixo[j] <= 0 || j >= xn) {
       // NA_integer_ = INT_MIN is checked in init.c
-      // j >= xn needed for special nomatch=0L case, see issue#4388 (due to xo[irows] from R removing '0' value in xo)
-      inewstarts[i] = inomatch[0];
+      // j >= xn needed for special nomatch=NULL case, see issue#4388 (due to xo[irows] from R removing '0' value in xo)
+      inewstarts[i] = inomatch;
       j++; // newlen will be 1 for xo=NA and 0 for xo=0 .. but we need to increment by 1 for both
     } else {
       inewstarts[i] = tmp+1;

From 442e0d35a6ee0b486da0aa3702a55e9952e02fc4 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Sun, 22 Aug 2021 13:19:27 -0600
Subject: [PATCH 8/8] remove new comment on unchanged test that I don't
 understand (nomatch wasn't passed to convertNegAndZeroIdx at all)

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 4522fc693..d477b13e7 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -2277,7 +2277,7 @@ test(794, DT[!"b"], DT[c("a","c")])
 test(795, DT[!0], DT)
 test(796, DT[!NULL], DT[NULL])
 test(797, DT[!integer()], DT)
-test(798, DT[!-1], DT[1]) # this passed nomatch==0 to convertNegAndZeroIdx #3109
+test(798, DT[!-1], DT[1])
 test(799, DT[--1], DT[1])
 myi = c("a","c")
 test(800, DT[!myi], DT["b"])