From 8cd20774e2df468cba4b8f002c6cf28c6bbc69fe Mon Sep 17 00:00:00 2001 From: arunsrinivasan Date: Sat, 5 Mar 2016 22:01:41 +0100 Subject: [PATCH] Closes #1287 and closes #1271. by=.EACHI plays nice with mult="first"/"last". --- R/data.table.R | 11 +++++++---- README.md | 2 ++ inst/tests/tests.Rraw | 8 ++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 8a8ba6c43..38c9a353a 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -671,10 +671,13 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (length(irows)) stop("Internal error. irows has length in by=.EACHI") } } else { - irows = if (mult=="first") f__ else f__+len__-1L - if (identical(nomatch,0L)) irows = irows[len__>0L] # 0s are len 0, so this removes -1 irows - if (length(len__)) len__ = pmin(len__,1L) # for test 456, and consistency generally - # the if() is for R < 2.15.1 when pmin was enhanced, see v1.8.6. + if (!byjoin) { # fix for #1287 and #1271 + irows = if (mult=="first") f__ else f__+len__-1L + if (identical(nomatch,0L)) irows = irows[len__>0L] # 0s are len 0, so this removes -1 irows + if (length(len__)) len__ = pmin(len__,1L) + } else { if (mult == "last") f__ = f__+len__- 1L } # fix for #1287 and #1271 + # for test 456, and consistency generally. The if() is for R < 2.15.1 when pmin was enhanced, see v1.8.6. + if (length(len__)) len__ = pmin(len__, 1L) } if (length(xo) && length(irows)) irows = xo[irows] # TO DO: fsort here? } else { diff --git a/README.md b/README.md index cc2ae1b87..8617af9e5 100644 --- a/README.md +++ b/README.md @@ -142,6 +142,8 @@ 40. Not join along with `mult="first"` and `mult="last"` is handled correctly, [#1571](https://github.com/Rdatatable/data.table/issues/1571). + 41. `by=.EACHI` works as expected along with `mult="first"` and `mult="last"`, [#1287](https://github.com/Rdatatable/data.table/issues/1287) and [#1271](https://github.com/Rdatatable/data.table/issues/1271). + #### NOTES 1. Updated error message on invalid joins to reflect the new `on=` syntax, [#1368](https://github.com/Rdatatable/data.table/issues/1368). Thanks @MichaelChirico. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index af1f70fb3..14c23ff6d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7618,6 +7618,14 @@ test(1615.1, x[!y, on="V1", mult="first"], data.table(V1=c(1,7), V2=INT(c(1,4))) test(1615.2, x[!y, on="V1", mult="last"], data.table(V1=c(1,7), V2=INT(c(2,9)))) test(1615.3, x[!y, on="V1", mult="all"], data.table(V1=c(1,1,7,7), V2=INT(c(1,2,4,9)))) +# fix for #1287 and #1271 +set.seed(1L) +dt = data.table(a=c(1,1,2), b=sample(10,3), c=sample(10,3)) +test(1616.1, dt[.(1:2), if (c-b > 0L) b, on="a", by=.EACHI, mult="first"], data.table(a=c(1,2), V1=c(3L,5L))) +test(1616.2, dt[.(1:2), if (c-b > 0L) b, on="a", by=.EACHI, mult="last"], data.table(a=c(2), V1=5L)) +test(1613.1, dt[.(1:2), c := if (c-b > 0L) b, by=.EACHI, mult="first", on="a"], + data.table(a=dt$a, b=dt$b, c=c(3L,2L,5L)) ) + ########################## # TODO: Tests involving GForce functions needs to be run with optimisation level 1 and 2, so that both functions are tested all the time.