-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-41834: [R] Better error handling in dplyr code #41576
Merged
Merged
Changes from 11 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
5166cf7
arrow_not_supported() raises a classed error
nealrichardson 4e2c735
Better distinguish invalid from not supported; add try_arrow_dplyr wr…
nealrichardson fefbc0c
More classed error raising
nealrichardson be79caa
Use cli formatting so we can add bullets; wrap more in try_arrow_dplyr
nealrichardson 66ff9c0
Rename error functions. Start updating tests
nealrichardson 670e9e5
Handle assert_that and match.arg; implement alternative suggestions; …
nealrichardson 0bda4e3
Add some tests, fix some tests
nealrichardson 36bf23a
More test updating
nealrichardson 0cd2ff3
Add more direct tests of dplyr-eval; update remaining expectations
nealrichardson df9d081
Add some cases with alternatives suggested
nealrichardson 4df3a7d
Tidy up match.call
nealrichardson 20e218c
Remove writing_bindings.Rmd
nealrichardson File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,47 +19,46 @@ | |
# The following S3 methods are registered on load if dplyr is present | ||
|
||
arrange.arrow_dplyr_query <- function(.data, ..., .by_group = FALSE) { | ||
call <- match.call() | ||
.data <- as_adq(.data) | ||
exprs <- expand_across(.data, quos(...)) | ||
try_arrow_dplyr({ | ||
.data <- as_adq(.data) | ||
exprs <- expand_across(.data, quos(...)) | ||
|
||
if (.by_group) { | ||
# when the data is grouped and .by_group is TRUE, order the result by | ||
# the grouping columns first | ||
exprs <- c(quos(!!!dplyr::groups(.data)), exprs) | ||
} | ||
if (length(exprs) == 0) { | ||
# Nothing to do | ||
return(.data) | ||
} | ||
.data <- as_adq(.data) | ||
# find and remove any dplyr::desc() and tidy-eval | ||
# the arrange expressions inside an Arrow data_mask | ||
sorts <- vector("list", length(exprs)) | ||
descs <- logical(0) | ||
mask <- arrow_mask(.data) | ||
for (i in seq_along(exprs)) { | ||
x <- find_and_remove_desc(exprs[[i]]) | ||
exprs[[i]] <- x[["quos"]] | ||
sorts[[i]] <- arrow_eval(exprs[[i]], mask) | ||
names(sorts)[i] <- format_expr(exprs[[i]]) | ||
if (inherits(sorts[[i]], "try-error")) { | ||
msg <- paste("Expression", names(sorts)[i], "not supported in Arrow") | ||
return(abandon_ship(call, .data, msg)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here's an example of "not just an indentation change": in the new code, we don't have to evaluate, catch the error, and re-raise in abandon_ship, we just let |
||
if (.by_group) { | ||
# when the data is grouped and .by_group is TRUE, order the result by | ||
# the grouping columns first | ||
exprs <- c(quos(!!!dplyr::groups(.data)), exprs) | ||
} | ||
if (length(mask$.aggregations)) { | ||
# dplyr lets you arrange on e.g. x < mean(x), but we haven't implemented it. | ||
# But we could, the same way it works in mutate() via join, if someone asks. | ||
# Until then, just error. | ||
# TODO: add a test for this | ||
msg <- paste("Expression", format_expr(expr), "not supported in arrange() in Arrow") | ||
return(abandon_ship(call, .data, msg)) | ||
if (length(exprs) == 0) { | ||
# Nothing to do | ||
return(.data) | ||
} | ||
descs[i] <- x[["desc"]] | ||
} | ||
.data$arrange_vars <- c(sorts, .data$arrange_vars) | ||
.data$arrange_desc <- c(descs, .data$arrange_desc) | ||
.data | ||
.data <- as_adq(.data) | ||
# find and remove any dplyr::desc() and tidy-eval | ||
# the arrange expressions inside an Arrow data_mask | ||
sorts <- vector("list", length(exprs)) | ||
descs <- logical(0) | ||
mask <- arrow_mask(.data) | ||
for (i in seq_along(exprs)) { | ||
x <- find_and_remove_desc(exprs[[i]]) | ||
exprs[[i]] <- x[["quos"]] | ||
sorts[[i]] <- arrow_eval(exprs[[i]], mask) | ||
names(sorts)[i] <- format_expr(exprs[[i]]) | ||
if (length(mask$.aggregations)) { | ||
# dplyr lets you arrange on e.g. x < mean(x), but we haven't implemented it. | ||
# But we could, the same way it works in mutate() via join, if someone asks. | ||
# Until then, just error. | ||
# TODO: add a test for this | ||
arrow_not_supported( | ||
.actual_msg = "Expression not supported in arrange() in Arrow", | ||
call = expr | ||
) | ||
} | ||
descs[i] <- x[["desc"]] | ||
} | ||
.data$arrange_vars <- c(sorts, .data$arrange_vars) | ||
.data$arrange_desc <- c(descs, .data$arrange_desc) | ||
.data | ||
}) | ||
} | ||
arrange.Dataset <- arrange.ArrowTabular <- arrange.RecordBatchReader <- arrange.arrow_dplyr_query | ||
|
||
|
@@ -73,10 +72,9 @@ find_and_remove_desc <- function(quosure) { | |
expr <- quo_get_expr(quosure) | ||
descending <- FALSE | ||
if (length(all.vars(expr)) < 1L) { | ||
stop( | ||
"Expression in arrange() does not contain any field names: ", | ||
deparse(expr), | ||
call. = FALSE | ||
validation_error( | ||
"Expression in arrange() does not contain any field names", | ||
call = quosure | ||
) | ||
} | ||
# Use a while loop to remove any number of nested pairs of enclosing | ||
|
@@ -90,7 +88,10 @@ find_and_remove_desc <- function(quosure) { | |
# ensure desc() has only one argument (when an R expression is a function | ||
# call, length == 2 means it has exactly one argument) | ||
if (length(expr) > 2) { | ||
stop("desc() expects only one argument", call. = FALSE) | ||
validation_error( | ||
"desc() expects only one argument", | ||
call = expr | ||
) | ||
} | ||
# remove desc() and toggle descending | ||
expr <- expr[[2]] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TIL about this making arrows!