Skip to content

Commit

Permalink
Add more direct tests of dplyr-eval; update remaining expectations
Browse files Browse the repository at this point in the history
  • Loading branch information
nealrichardson committed May 26, 2024
1 parent 36bf23a commit 0cd2ff3
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 71 deletions.
27 changes: 12 additions & 15 deletions r/R/dplyr-eval.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ arrow_eval <- function(expr, mask) {
# Raise the original error: it's actually helpful here
validation_error(msg, call = expr)
}
# 3b. Check to see if this is from match.arg. Retry with dplyr won't help.
if (is.language(e$call) && identical(as.character(e$call[[1]]), "match.arg")) {
# Raise the original error: it's actually helpful here
validation_error(msg, call = expr)
}

# 4. Check for NotImplemented error raised from Arrow C++ code.
# Not sure where exactly we may raise this, but if we see it, it means
Expand Down Expand Up @@ -123,16 +128,11 @@ add_user_functions_to_mask <- function(expr, mask) {
}

get_standard_error_messages <- function() {
patterns <- .cache$i18ized_error_pattern
if (is.null(patterns)) {
patterns <- i18ize_error_messages()
# Add to the patterns something for match.arg() errors
# (the function name won't be internationalized, so we can't just look for it)
patterns <- paste0(patterns, "|match\\.arg")
if (is.null(.cache$i18ized_error_pattern)) {
# Memoize it
.cache$i18ized_error_pattern <- patterns
.cache$i18ized_error_pattern <- i18ize_error_messages()
}
patterns
.cache$i18ized_error_pattern
}

i18ize_error_messages <- function() {
Expand Down Expand Up @@ -199,13 +199,10 @@ validation_error <- function(msg, ...) {
# * If it's another error, just stop, retry with regular dplyr won't help
try_arrow_dplyr <- function(expr) {
parent <- caller_env()
tryCatch(eval(expr, parent), error = function(e) {
if (inherits(e, "arrow_not_supported")) {
abandon_ship(e, parent)
} else {
stop(e)
}
})
tryCatch(
eval(expr, parent),
arrow_not_supported = function(e) abandon_ship(e, parent)
)
}

# Helper to handle unsupported dplyr features
Expand Down
27 changes: 27 additions & 0 deletions r/tests/testthat/_snaps/dplyr-eval.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# try_arrow_dplyr/abandon_ship adds the right message about collect()

Code
tester(ds, i)
Condition
Error in `validation_error()`:
! arg is 0

---

Code
tester(ds, i)
Condition
Error in `arrow_not_supported()`:
! arg == 1 not supported in Arrow
> Call collect() first to pull data into R.

---

Code
tester(ds, i)
Condition
Error in `arrow_not_supported()`:
! arg greater than 0 not supported in Arrow
> Try setting arg to -1
> Or, call collect() first to pull data into R.

25 changes: 25 additions & 0 deletions r/tests/testthat/_snaps/dplyr-mutate.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# transmute() defuses dots arguments (ARROW-13262)

Code
tbl %>% Table$create() %>% transmute(a = stringr::str_c(padded_strings,
padded_strings), b = stringr::str_squish(a)) %>% collect()
Condition
Warning:
In stringr::str_squish(a):
i Expression not supported in Arrow
> Pulling data into R
Output
# A tibble: 10 x 2
a b
<chr> <chr>
1 " a a " a a
2 " b b " b b
3 " c c " c c
4 " d d " d d
5 " e e " e e
6 " f f " f f
7 " g g " g g
8 " h h " h h
9 " i i " i i
10 " j j " j j

4 changes: 3 additions & 1 deletion r/tests/testthat/_snaps/dplyr-query.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Scalars in expressions match the type of the field, if possible

Expression int == "5" not supported in Arrow; pulling data into R
In int == "5":
i Expression not supported in Arrow
> Pulling data into R

41 changes: 37 additions & 4 deletions r/tests/testthat/_snaps/dplyr-summarize.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,44 @@
Code
InMemoryDataset$create(tbl) %>% summarize(distinct = n_distinct())
Condition
Error:
! Error : In n_distinct(), n_distinct() with 0 arguments not supported in Arrow
Call collect() first to pull data into R.
Error in `n_distinct()`:
! n_distinct() with 0 arguments not supported in Arrow
> Call collect() first to pull data into R.

---

Error : In n_distinct(int, lgl), Multiple arguments to n_distinct() not supported in Arrow; pulling data into R
In n_distinct(int, lgl):
i Multiple arguments to n_distinct() not supported in Arrow
> Pulling data into R

# Expressions on aggregations

Code
record_batch(tbl) %>% summarise(any(any(lgl)))
Condition
Warning:
In any(any(lgl)):
i aggregate within aggregate expression not supported in Arrow
> Pulling data into R
Output
# A tibble: 1 x 1
`any(any(lgl))`
<lgl>
1 TRUE

# Can use across() within summarise()

Code
data.frame(x = 1, y = 2) %>% arrow_table() %>% group_by(x) %>% summarise(across(
everything())) %>% collect()
Condition
Warning:
In y:
i Expression is not a valid aggregation expression or is not supported in Arrow
> Pulling data into R
Output
# A tibble: 1 x 2
x y
<dbl> <dbl>
1 1 2

61 changes: 61 additions & 0 deletions r/tests/testthat/test-dplyr-eval.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

test_that("various paths in arrow_eval", {
expect_arrow_eval_error(
assert_is(1, "character"),
class = "validation_error"
)
expect_arrow_eval_error(
NoTaVaRiAbLe,
class = "validation_error"
)
expect_arrow_eval_error(
match.arg("z", c("a", "b")),
class = "validation_error"
)
expect_arrow_eval_error(
stop("something something NotImplementedError"),
class = "arrow_not_supported"
)
})

test_that("try_arrow_dplyr/abandon_ship adds the right message about collect()", {
tester <- function(.data, arg) {
call <- match.call()
try_arrow_dplyr({
if (arg == 0) {
# This one just stops and doesn't recommend calling collect()
validation_error("arg is 0")
} else if (arg == 1) {
# This one recommends calling collect()
arrow_not_supported("arg == 1")
} else {
# Because this one has an alternative suggested, it adds "Or, collect()"
arrow_not_supported(
"arg greater than 0",
body = c(">" = "Try setting arg to -1")
)
}
})
}

ds <- InMemoryDataset$create(arrow_table(x = 1))
for (i in 0:2) {
expect_snapshot(tester(ds, i), error = TRUE)
}
})
13 changes: 4 additions & 9 deletions r/tests/testthat/test-dplyr-mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -152,16 +152,14 @@ test_that("transmute() with unsupported arguments", {
})

test_that("transmute() defuses dots arguments (ARROW-13262)", {
expect_warning(
expect_snapshot(
tbl %>%
Table$create() %>%
transmute(
a = stringr::str_c(padded_strings, padded_strings),
b = stringr::str_squish(a)
) %>%
collect(),
"Expression stringr::str_squish(a) not supported in Arrow; pulling data into R",
fixed = TRUE
collect()
)
})

Expand Down Expand Up @@ -202,10 +200,7 @@ test_that("nchar() arguments", {
filter(line_lengths > 15) %>%
collect(),
tbl,
warning = paste0(
"In nchar\\(verses, type = \"bytes\", allowNA = TRUE\\), ",
"allowNA = TRUE not supported in Arrow; pulling data into R"
)
warning = "allowNA = TRUE not supported in Arrow"
)
})

Expand Down Expand Up @@ -538,7 +533,7 @@ test_that("Can't just add a vector column with mutate()", {
mutate(again = 1:10),
tibble::tibble(int = tbl$int, again = 1:10)
),
"In again = 1:10, only values of size one are recycled; pulling data into R"
"Recycling values of length != 1 not supported in Arrow"
)
})

Expand Down
55 changes: 13 additions & 42 deletions r/tests/testthat/test-dplyr-summarize.R
Original file line number Diff line number Diff line change
Expand Up @@ -832,28 +832,18 @@ test_that("Expressions on aggregations", {
)

# Aggregates on aggregates are not supported
expect_warning(
record_batch(tbl) %>% summarise(any(any(lgl))),
paste(
"In any\\(any\\(lgl\\)\\), aggregate within aggregate expression",
"not supported in Arrow"
)
expect_snapshot(
record_batch(tbl) %>% summarise(any(any(lgl)))
)

# Check aggregates on aggregates with more complex calls
expect_warning(
record_batch(tbl) %>% summarise(any(any(!lgl))),
paste(
"In any\\(any\\(!lgl\\)\\), aggregate within aggregate expression",
"not supported in Arrow"
)
"aggregate within aggregate expression not supported in Arrow"
)
expect_warning(
record_batch(tbl) %>% summarise(!any(any(lgl))),
paste(
"In \\!any\\(any\\(lgl\\)\\), aggregate within aggregate expression",
"not supported in Arrow"
)
"aggregate within aggregate expression not supported in Arrow"
)
})

Expand Down Expand Up @@ -965,7 +955,7 @@ test_that("Summarize with 0 arguments", {
)
})

test_that("Not (yet) supported: window functions", {
test_that("Not supported: window functions", {
compare_dplyr_binding(
.input %>%
group_by(some_grouping) %>%
Expand All @@ -974,10 +964,7 @@ test_that("Not (yet) supported: window functions", {
) %>%
collect(),
tbl,
warning = paste(
"In sum\\(\\(dbl - mean\\(dbl\\)\\)\\^2\\), aggregate within",
"aggregate expression not supported in Arrow; pulling data into R"
)
warning = "aggregate within aggregate expression not supported in Arrow"
)
compare_dplyr_binding(
.input %>%
Expand All @@ -987,10 +974,7 @@ test_that("Not (yet) supported: window functions", {
) %>%
collect(),
tbl,
warning = paste(
"In sum\\(dbl - mean\\(dbl\\)\\), aggregate within aggregate expression",
"not supported in Arrow; pulling data into R"
)
warning = "aggregate within aggregate expression not supported in Arrow"
)
compare_dplyr_binding(
.input %>%
Expand All @@ -1000,10 +984,7 @@ test_that("Not (yet) supported: window functions", {
) %>%
collect(),
tbl,
warning = paste(
"In sqrt\\(sum\\(\\(dbl - mean\\(dbl\\)\\)\\^2\\)/\\(n\\(\\) - 1L\\)\\), aggregate within",
"aggregate expression not supported in Arrow; pulling data into R"
)
warning = "aggregate within aggregate expression not supported in Arrow"
)

compare_dplyr_binding(
Expand All @@ -1012,10 +993,7 @@ test_that("Not (yet) supported: window functions", {
summarize(y - mean(y)) %>%
collect(),
data.frame(x = 1, y = 2),
warning = paste(
"Expression y - mean\\(y\\) is not a valid aggregation expression",
"or is not supported in Arrow; pulling data into R"
)
warning = "Expression is not a valid aggregation expression or is not supported in Arrow"
)

compare_dplyr_binding(
Expand All @@ -1024,10 +1002,7 @@ test_that("Not (yet) supported: window functions", {
summarize(y) %>%
collect(),
data.frame(x = 1, y = 2),
warning = paste(
"Expression y is not a valid aggregation expression",
"or is not supported in Arrow; pulling data into R"
)
warning = "Expression is not a valid aggregation expression or is not supported in Arrow"
)

# This one could possibly be supported--in mutate()
Expand All @@ -1037,10 +1012,7 @@ test_that("Not (yet) supported: window functions", {
summarize(x - y) %>%
collect(),
data.frame(x = 1, y = 2, z = 3),
warning = paste(
"Expression x - y is not a valid aggregation expression",
"or is not supported in Arrow; pulling data into R"
)
warning = "Expression is not a valid aggregation expression or is not supported in Arrow"
)
})

Expand Down Expand Up @@ -1274,13 +1246,12 @@ test_that("Can use across() within summarise()", {
)

# across() doesn't work in summarise when input expressions evaluate to bare field references
expect_warning(
expect_snapshot(
data.frame(x = 1, y = 2) %>%
arrow_table() %>%
group_by(x) %>%
summarise(across(everything())) %>%
collect(),
regexp = "Expression y is not a valid aggregation expression or is not supported in Arrow; pulling data into R"
collect()
)
})

Expand Down

0 comments on commit 0cd2ff3

Please sign in to comment.