diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 466272db461..cec638bce80 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -518,6 +518,34 @@ register_bindings_datetime_parsers <- function() { coalesce_output <- build_expr("coalesce", args = parse_attempt_expressions) - build_expr("assume_timezone", coalesce_output, options = list(timezone = tz)) + # we need this binding to be able to handle a NULL `tz`, which will then be + # used by bindings such as `ymd` to return, based on whether tz is NULL or + # not, a date or timestamp + if (!is.null(tz)) { + build_expr("assume_timezone", coalesce_output, options = list(timezone = tz)) + } else { + coalesce_output + } + }) + + ymd_parser_vec <- c("ymd", "ydm", "mdy", "myd", "dmy", "dym") + + ymd_parser_map_factory <- function(order) { + force(order) + function(x, tz = NULL) { + parse_x <- call_binding("parse_date_time", x, order, tz) + if (is.null(tz)) { + # we cast so we can mimic the behaviour of the `tz` argument in lubridate + # "If NULL (default), a Date object is returned. Otherwise a POSIXct with + # time zone attribute set to tz." + parse_x <- parse_x$cast(date32()) + } + parse_x + } + } + + for (ymd_order in ymd_parser_vec) { + register_binding(ymd_order, ymd_parser_map_factory(ymd_order)) + } } diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 859c20ce02f..42448e8243f 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -1254,7 +1254,7 @@ test_that("`decimal_date()` and `date_decimal()`", { mutate( decimal_date_from_POSIXct = decimal_date(b), decimal_date_from_r_POSIXct_obj = decimal_date(as.POSIXct("2022-03-25 15:37:01")), - decimal_date_from_r_date_obj = decimal_date(ymd("2022-03-25")), + decimal_date_from_r_date_obj = decimal_date(as.Date("2022-03-25")), decimal_date_from_date = decimal_date(c), date_from_decimal = date_decimal(a), date_from_decimal_r_obj = date_decimal(2022.178) @@ -1640,7 +1640,8 @@ test_that("`as_datetime()`", { }) test_that("parse_date_time() works with year, month, and date components", { - # string processing requires RE2 library (not available on Windows with R 3.6) + # these functions' internals use some string processing which requires the + # RE2 library (not available on Windows with R 3.6) skip_if_not_available("re2") compare_dplyr_binding( .input %>% @@ -1700,7 +1701,8 @@ test_that("parse_date_time() works with year, month, and date components", { }) test_that("parse_date_time() works with a mix of formats and orders", { - # string processing requires RE2 library (not available on Windows with R 3.6) + # these functions' internals use some string processing which requires the + # RE2 library (not available on Windows with R 3.6) skip_if_not_available("re2") test_df <- tibble( string_combi = c("2021-09-1", "2/09//2021", "09.3.2021") @@ -1732,3 +1734,45 @@ test_that("parse_date_time() doesn't work with hour, minutes, and second compone '"ymd_HMS" `orders` not supported in Arrow' ) }) + +test_that("year, month, day date/time parsers work", { + test_df <- tibble::tibble( + ymd_string = c("2022-05-11", "2022/05/12", "22.05-13"), + ydm_string = c("2022-11-05", "2022/12/05", "22.13-05"), + mdy_string = c("05-11-2022", "05/12/2022", "05.13-22"), + myd_string = c("05-2022-11", "05/2022/12", "05.22-14"), + dmy_string = c("11-05-2022", "12/05/2022", "13.05-22"), + dym_string = c("11-2022-05", "12/2022/05", "13.22-05") + ) + + # these functions' internals use some string processing which requires the + # RE2 library (not available on Windows with R 3.6) + skip_if_not_available("re2") + compare_dplyr_binding( + .input %>% + mutate( + ymd_date = ymd(ymd_string), + ydm_date = ydm(ydm_string), + mdy_date = mdy(mdy_string), + myd_date = myd(myd_string), + dmy_date = dmy(dmy_string), + dym_date = dym(dym_string) + ) %>% + collect(), + test_df + ) + + compare_dplyr_binding( + .input %>% + mutate( + ymd_date = ymd(ymd_string, tz = "Pacific/Marquesas"), + ydm_date = ydm(ydm_string, tz = "Pacific/Marquesas"), + mdy_date = mdy(mdy_string, tz = "Pacific/Marquesas"), + myd_date = myd(myd_string, tz = "Pacific/Marquesas"), + dmy_date = dmy(dmy_string, tz = "Pacific/Marquesas"), + dym_date = dym(dym_string, tz = "Pacific/Marquesas") + ) %>% + collect(), + test_df + ) +})