Skip to content

Commit

Permalink
Pass ... on to merge
Browse files Browse the repository at this point in the history
Fixes #41

And rewrite test
  • Loading branch information
hadley committed Jun 13, 2019
1 parent 68bdea7 commit d9c7583
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 59 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Description: This implements the data table back-end for 'dplyr' so that you
License: GPL (>= 2)
Imports:
dplyr (>= 0.5.0),
ellipsis,
tidyselect,
data.table (>= 1.9.6),
rlang
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# dtplyr 0.0.3.9000

* Joins now pass `...` on to data table's merge method (#41).

* Convert from lazyeval to tidy eval (@christophsax).

* `ungroup()` now copies it's input (@christophsax, #54).
Expand All @@ -20,7 +22,6 @@
data.table 1.9.6. Avoids copy and allows joins by different keys (#20, #21,
@christophsax).


# dtplyr 0.0.2

- This is a compatibility release. It makes dtplyr compatible with
Expand Down
56 changes: 33 additions & 23 deletions R/joins.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
#'
#' @inheritParams dplyr::join
#' @param x,y tbls to join
#' @param ... Included for compatibility with generic; otherwise ignored.
#' @param ... For `inner_join()`, `left_join()`, `right_join()` and
#' `full_join()` passed on to data.table [merge()] method. For
#' `semi_join()` and `anti_join()` included only for compatibility with
#' generic and must be empty.
#' @examples
#' library(dplyr, warn.conflicts = FALSE)
#'
Expand All @@ -31,51 +34,56 @@
#' @name join.tbl_dt
NULL

join_using_merge <- function(x, y, by, copy, suffix,
join_using_merge <- function(x, y, ..., by, copy, suffix,
all.x = FALSE, all.y = FALSE){
by <- dplyr::common_by(by, x, y)
y <- dplyr::auto_copy(x, y, copy = copy)
out <- merge(
x, y,
by.x = by$x, by.y = by$y,
x, y,
by.x = by$x, by.y = by$y,
all.x = all.x, all.y = all.y,
suffixes = suffix,
allow.cartesian = TRUE
suffixes = suffix,
allow.cartesian = TRUE,
...
)
grouped_dt(out, groups(x))
grouped_dt(out, groups(x))
}

#' @rdname join.tbl_dt
inner_join.data.table <- function(x, y, by = NULL, copy = FALSE,
suffix = c(".x", ".y"), ...){
join_using_merge(x, y, by = by, copy = copy, suffix = suffix)
inner_join.data.table <- function(x, y, ..., by = NULL, copy = FALSE,
suffix = c(".x", ".y")){
join_using_merge(x, y, by = by, copy = copy, suffix = suffix, ...)
}

#' @rdname join.tbl_dt
left_join.data.table <- function(x, y, by = NULL, copy = FALSE,
suffix = c(".x", ".y"), ...){
join_using_merge(x, y, by = by, copy = copy, suffix = suffix, all.x = TRUE)
left_join.data.table <- function(x, y, ..., by = NULL, copy = FALSE,
suffix = c(".x", ".y")){
join_using_merge(x, y, by = by, copy = copy, suffix = suffix, all.x = TRUE, ...)
}

#' @rdname join.tbl_dt
right_join.data.table <- function(x, y, by = NULL, copy = FALSE,
suffix = c(".x", ".y"), ...){
join_using_merge(x, y, by = by, copy = copy, suffix = suffix, all.y = TRUE)
right_join.data.table <- function(x, y, ..., by = NULL, copy = FALSE,
suffix = c(".x", ".y")){
join_using_merge(x, y, by = by, copy = copy, suffix = suffix, all.y = TRUE, ...)
}

#' @rdname join.tbl_dt
full_join.data.table <- function(x, y, by = NULL, copy = FALSE,
suffix = c(".x", ".y"), ...){
join_using_merge(x, y,
by = by,
copy = copy,
suffix = suffix,
all.x = TRUE, all.y = TRUE
full_join.data.table <- function(x, y, ..., by = NULL, copy = FALSE,
suffix = c(".x", ".y")){
join_using_merge(x, y,
by = by,
copy = copy,
suffix = suffix,
all.x = TRUE,
all.y = TRUE,
...
)
}

#' @rdname join.tbl_dt
semi_join.data.table <- function(x, y, by = NULL, copy = FALSE, ...) {
ellipsis::check_dots_empty()

by <- dplyr::common_by(by, x, y)
y <- dplyr::auto_copy(x, y, copy = copy)
on <- set_names(by$y, by$x)
Expand All @@ -87,6 +95,8 @@ semi_join.data.table <- function(x, y, by = NULL, copy = FALSE, ...) {

#' @rdname join.tbl_dt
anti_join.data.table <- function(x, y, by = NULL, copy = FALSE, ...) {
ellipsis::check_dots_empty()

by <- dplyr::common_by(by, x, y)
y <- dplyr::auto_copy(x, y, copy = copy)
on <- set_names(by$y, by$x)
Expand Down
23 changes: 13 additions & 10 deletions man/join.tbl_dt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 12 additions & 25 deletions tests/testthat/test-joins.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,21 @@ context("joins")


test_that("joining data tables returns data tables (#470) and does not modify them (#659)", {
a <- data.table(x = c(1, 1, 2, 3), y = 4:1)
b <- data.table(x = c(1, 2, 2, 4), z = 1:4)

test_join <- function(join_fun, ak, bk) {
data.table::setkeyv(a, ak)
data.table::setkeyv(b, bk)
ac <- data.table::copy(a)
bc <- data.table::copy(b)

out <- join_fun(a, b, "x")
expect_is(out, "data.table")
expect_equal(a, ac)
expect_equal(b, bc)
}

for (ak in names(a)) {
for (bk in names(b)) {
test_join(left_join, ak, bk)
test_join(semi_join, ak, bk)
test_join(right_join, ak, bk)
test_join(full_join, ak, bk)
test_join(inner_join, ak, bk)
test_join(anti_join, ak, bk)
}
x <- data.table(x = c(1, 1, 2, 3), y = 4:1)
y <- data.table(x = c(1, 2, 2, 4), z = 1:4)

join_funs <- list(left_join, semi_join, anti_join)
for (join_fun in join_funs) {
xprime <- data.table::copy(x)
yprime <- data.table::copy(y)

out <- join_fun(xprime, yprime, "x")
expect_s3_class(out, "data.table")
expect_equal(xprime, x)
expect_equal(yprime, y)
}
})


test_that("joining data tables returns same result as dplyr", {
a_dt <- data.table(x = c(1, 1, 2, 3), y = 4:1)
b_dt <- data.table(x = c(1, 2, 2, 4), z = 1:4)
Expand Down

0 comments on commit d9c7583

Please sign in to comment.