diff --git a/NEWS.md b/NEWS.md index 1bbd7a935..da716bef5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -58,6 +58,7 @@ - `$glimpse()` for `DataFrame` has two new arguments `max_items_per_column` and `max_colname_length` (#1200). - New method `$list$sample()` (#1204). +- New argument `coalesce` in `$join_asof()` (#1205). ### Other changes diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index d54545d8a..fab5fd769 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -1448,7 +1448,8 @@ DataFrame_join_asof = function( suffix = "_right", tolerance = NULL, allow_parallel = TRUE, - force_parallel = FALSE) { + force_parallel = FALSE, + coalesce = TRUE) { # convert other to LazyFrame, capture any Error as a result, and pass it on other_df_result = pcase( @@ -1469,7 +1470,8 @@ DataFrame_join_asof = function( force_parallel = force_parallel, suffix = suffix, strategy = strategy, - tolerance = tolerance + tolerance = tolerance, + coalesce = coalesce )$collect() } diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index d3a1b55cd..da507e4db 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -1252,7 +1252,7 @@ RPolarsLazyFrame$group_by <- function(exprs, maintain_order) .Call(wrap__RPolars RPolarsLazyFrame$with_row_index <- function(name, offset) .Call(wrap__RPolarsLazyFrame__with_row_index, self, name, offset) -RPolarsLazyFrame$join_asof <- function(other, left_on, right_on, left_by, right_by, allow_parallel, force_parallel, suffix, strategy, tolerance, tolerance_str) .Call(wrap__RPolarsLazyFrame__join_asof, self, other, left_on, right_on, left_by, right_by, allow_parallel, force_parallel, suffix, strategy, tolerance, tolerance_str) +RPolarsLazyFrame$join_asof <- function(other, left_on, right_on, left_by, right_by, allow_parallel, force_parallel, suffix, strategy, tolerance, tolerance_str, coalesce) .Call(wrap__RPolarsLazyFrame__join_asof, self, other, left_on, right_on, left_by, right_by, allow_parallel, force_parallel, suffix, strategy, tolerance, tolerance_str, coalesce) RPolarsLazyFrame$join <- function(other, left_on, right_on, how, validate, join_nulls, suffix, allow_parallel, force_parallel, coalesce) .Call(wrap__RPolarsLazyFrame__join, self, other, left_on, right_on, how, validate, join_nulls, suffix, allow_parallel, force_parallel, coalesce) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 2d2839a7f..811e76fed 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -1440,6 +1440,12 @@ LazyFrame_sort = function( #' There may be a circumstance where R types are not sufficient to express a #' numeric tolerance. In that case, you can use the expression syntax like #' `tolerance = pl$lit(42)$cast(pl$Uint64)` +#' @param coalesce Coalescing behavior (merging of `on` / `left_on` / `right_on` +#' columns): +#' * `TRUE`: Always coalesce join columns; +#' * `FALSE`: Never coalesce join columns. +#' Note that joining on any other expressions than `col` will turn off coalescing. +#' #' @inheritSection polars_duration_string Polars duration string language #' @examples # #' # create two LazyFrame to join asof @@ -1488,19 +1494,27 @@ LazyFrame_join_asof = function( suffix = "_right", tolerance = NULL, allow_parallel = TRUE, - force_parallel = FALSE) { + force_parallel = FALSE, + coalesce = TRUE) { if (!is.null(by)) by_left = by_right = by if (!is.null(on)) left_on = right_on = on tolerance_str = if (is.character(tolerance)) tolerance else NULL tolerance_num = if (!is.character(tolerance)) tolerance else NULL .pr$LazyFrame$join_asof( - self, other, - left_on, right_on, - by_left, by_right, - allow_parallel, force_parallel, - suffix, strategy, - tolerance_num, tolerance_str + self = self, + other = other, + left_on = left_on, + right_on = right_on, + left_by = by_left, + right_by = by_right, + allow_parallel = allow_parallel, + force_parallel = force_parallel, + suffix = suffix, + strategy = strategy, + tolerance = tolerance_num, + tolerance_str = tolerance_str, + coalesce = coalesce ) |> unwrap("in join_asof( ):") } diff --git a/man/DataFrame_join_asof.Rd b/man/DataFrame_join_asof.Rd index da41a1905..4e6312cc7 100644 --- a/man/DataFrame_join_asof.Rd +++ b/man/DataFrame_join_asof.Rd @@ -17,7 +17,8 @@ DataFrame_join_asof( suffix = "_right", tolerance = NULL, allow_parallel = TRUE, - force_parallel = FALSE + force_parallel = FALSE, + coalesce = TRUE ) } \arguments{ @@ -67,6 +68,14 @@ computation of both DataFrames up to the join in parallel.} \item{force_parallel}{Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel.} + +\item{coalesce}{Coalescing behavior (merging of \code{on} / \code{left_on} / \code{right_on} +columns): +\itemize{ +\item \code{TRUE}: Always coalesce join columns; +\item \code{FALSE}: Never coalesce join columns. +Note that joining on any other expressions than \code{col} will turn off coalescing. +}} } \value{ New joined DataFrame diff --git a/man/LazyFrame_join_asof.Rd b/man/LazyFrame_join_asof.Rd index d2f02b895..e2ce6542b 100644 --- a/man/LazyFrame_join_asof.Rd +++ b/man/LazyFrame_join_asof.Rd @@ -17,7 +17,8 @@ LazyFrame_join_asof( suffix = "_right", tolerance = NULL, allow_parallel = TRUE, - force_parallel = FALSE + force_parallel = FALSE, + coalesce = TRUE ) } \arguments{ @@ -67,6 +68,14 @@ computation of both DataFrames up to the join in parallel.} \item{force_parallel}{Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel.} + +\item{coalesce}{Coalescing behavior (merging of \code{on} / \code{left_on} / \code{right_on} +columns): +\itemize{ +\item \code{TRUE}: Always coalesce join columns; +\item \code{FALSE}: Never coalesce join columns. +Note that joining on any other expressions than \code{col} will turn off coalescing. +}} } \description{ This is similar to a left-join except that we match on nearest key rather diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 483d04642..7566097ce 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -384,6 +384,7 @@ impl RPolarsLazyFrame { strategy: Robj, tolerance: Robj, tolerance_str: Robj, + coalesce: Robj, ) -> RResult { let left_by = robj_to!(Option, Vec, String, left_by)?; let right_by = robj_to!(Option, Vec, String, right_by)?; @@ -401,6 +402,13 @@ impl RPolarsLazyFrame { .map_err(|err| RPolarsErr::new().plain(err))?; let tolerance_str = robj_to!(Option, String, tolerance_str)?; + let coalesce = robj_to!(bool, coalesce)?; + let coalesce = if coalesce { + JoinCoalesce::CoalesceColumns + } else { + JoinCoalesce::KeepColumns + }; + Ok(self .0 .clone() @@ -418,6 +426,7 @@ impl RPolarsLazyFrame { tolerance, tolerance_str: tolerance_str.map(|s| s.into()), })) + .coalesce(coalesce) .suffix(robj_to!(str, suffix)?) .finish() .into()) diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index 0538f035a..23bffcf30 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -978,6 +978,12 @@ test_that("join_asof_simple", { pl$lit(NA_character_)$alias("group_right") )$to_list() ) + + # arg coalesce works + expect_identical( + pop$join_asof(gdp, left_on = "date", right_on = "date", strategy = "backward", coalesce = FALSE)$to_list()[["date_right"]], + as.Date(c("2016-01-01", "2017-01-01", "2018-01-01", "2019-01-01")) + ) }) test_that("n_chunks", {