diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R index 1714af8a509..deb26859e8b 100644 --- a/r/R/dplyr-collect.R +++ b/r/R/dplyr-collect.R @@ -50,9 +50,13 @@ pull.arrow_dplyr_query <- function(.data, var = -1) { .data <- as_adq(.data) var <- vars_pull(names(.data), !!enquo(var)) .data$selected_columns <- set_names(.data$selected_columns[var], var) - dplyr::collect(.data)[[1]] + dplyr::compute(.data)[[1]] +} +pull.Dataset <- pull.RecordBatchReader <- pull.arrow_dplyr_query + +pull.ArrowTabular <- function(x, var = -1) { + x[[vars_pull(names(x), !!enquo(var))]] } -pull.Dataset <- pull.ArrowTabular <- pull.RecordBatchReader <- pull.arrow_dplyr_query restore_dplyr_features <- function(df, query) { # An arrow_dplyr_query holds some attributes that Arrow doesn't know about diff --git a/r/tests/testthat/test-dataset-write.R b/r/tests/testthat/test-dataset-write.R index 7a5f861ca57..468d11676bb 100644 --- a/r/tests/testthat/test-dataset-write.R +++ b/r/tests/testthat/test-dataset-write.R @@ -739,7 +739,8 @@ test_that("Dataset min_rows_per_group", { row_group_sizes <- ds %>% map_batches(~ record_batch(nrows = .$num_rows)) %>% - pull(nrows) + pull(nrows) %>% + as.vector() index <- 1 # We expect there to be 3 row groups since 11/5 = 2.2 and 11/4 = 2.75 @@ -778,6 +779,7 @@ test_that("Dataset write max rows per group", { row_group_sizes <- ds %>% map_batches(~ record_batch(nrows = .$num_rows)) %>% pull(nrows) %>% + as.vector() %>% sort() expect_equal(row_group_sizes, c(12, 18)) diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R index ea3e1365d6b..5e28a3ddd5b 100644 --- a/r/tests/testthat/test-dataset.R +++ b/r/tests/testthat/test-dataset.R @@ -67,7 +67,7 @@ test_that("IPC/Feather format data", { # Collecting virtual partition column works expect_equal( - ds %>% arrange(part) %>% pull(part), + ds %>% arrange(part) %>% pull(part) %>% as.vector(), c(rep(3, 10), rep(4, 10)) ) }) @@ -306,7 +306,7 @@ test_that("Simple interface for datasets", { # Collecting virtual partition column works expect_equal( - ds %>% arrange(part) %>% pull(part), + ds %>% arrange(part) %>% pull(part) %>% as.vector(), c(rep(1, 10), rep(2, 10)) ) }) @@ -625,8 +625,16 @@ test_that("scalar aggregates with many batches (ARROW-16904)", { ds <- open_dataset(tf) replicate(100, ds %>% summarize(min(x)) %>% pull()) - expect_true(all(replicate(100, ds %>% summarize(min(x)) %>% pull()) == 1)) - expect_true(all(replicate(100, ds %>% summarize(max(x)) %>% pull()) == 100)) + expect_true( + all( + replicate(100, ds %>% summarize(min(x)) %>% pull() %>% as.vector()) == 1 + ) + ) + expect_true( + all( + replicate(100, ds %>% summarize(max(x)) %>% pull() %>% as.vector()) == 100 + ) + ) }) test_that("map_batches", { @@ -650,6 +658,7 @@ test_that("map_batches", { select(int, lgl) %>% map_batches(~ record_batch(nrows = .$num_rows)) %>% pull(nrows) %>% + as.vector() %>% sort(), c(5, 10) ) @@ -1170,7 +1179,8 @@ test_that("FileSystemFactoryOptions with DirectoryPartitioning", { expect_equal( ds %>% arrange(cyl) %>% - pull(cyl), + pull(cyl) %>% + as.vector(), sort(mtcars$cyl) ) @@ -1188,7 +1198,8 @@ test_that("FileSystemFactoryOptions with DirectoryPartitioning", { expect_equal( ds %>% arrange(cyl) %>% - pull(cyl), + pull(cyl) %>% + as.vector(), sort(mtcars$cyl) ) @@ -1204,7 +1215,8 @@ test_that("FileSystemFactoryOptions with DirectoryPartitioning", { expect_equal( ds %>% arrange(cyl) %>% - pull(cyl), + pull(cyl) %>% + as.vector(), sort(mtcars$cyl) ) @@ -1222,7 +1234,8 @@ test_that("FileSystemFactoryOptions with DirectoryPartitioning", { expect_equal( ds %>% arrange(cyl) %>% - pull(cyl), + pull(cyl) %>% + as.vector(), sort(mtcars$cyl) ) @@ -1256,7 +1269,8 @@ test_that("FileSystemFactoryOptions with HivePartitioning", { expect_equal( ds %>% arrange(cyl) %>% - pull(cyl), + pull(cyl) %>% + as.vector(), sort(mtcars$cyl) ) @@ -1272,7 +1286,8 @@ test_that("FileSystemFactoryOptions with HivePartitioning", { expect_equal( ds %>% arrange(cyl) %>% - pull(cyl), + pull(cyl) %>% + as.vector(), sort(mtcars$cyl) ) @@ -1286,7 +1301,8 @@ test_that("FileSystemFactoryOptions with HivePartitioning", { expect_equal( ds %>% arrange(cyl) %>% - pull(cyl), + pull(cyl) %>% + as.vector(), sort(mtcars$cyl) ) @@ -1302,7 +1318,8 @@ test_that("FileSystemFactoryOptions with HivePartitioning", { expect_equal( ds %>% arrange(cyl) %>% - pull(cyl), + pull(cyl) %>% + as.vector(), sort(mtcars$cyl) ) }) diff --git a/r/tests/testthat/test-dplyr-arrange.R b/r/tests/testthat/test-dplyr-arrange.R index d8afcc5d4a8..3444e3ace5f 100644 --- a/r/tests/testthat/test-dplyr-arrange.R +++ b/r/tests/testthat/test-dplyr-arrange.R @@ -118,7 +118,8 @@ test_that("arrange() on integer, double, and character columns", { .input %>% group_by(grp) %>% arrange(.by_group = TRUE) %>% - pull(grp), + pull(grp) %>% + as.vector(), tbl ) compare_dplyr_binding( diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 25a844e0333..2608f9d6545 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -168,7 +168,8 @@ test_that("strptime", { mutate( x = strptime(x, format = "%m-%d-%Y") ) %>% - pull(), + pull() %>% + as.vector(), # R's strptime returns POSIXlt (list type) as.POSIXct(tstamp), ignore_attr = "tzone" diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R index c40815df69d..db9a3bb30d0 100644 --- a/r/tests/testthat/test-dplyr-query.R +++ b/r/tests/testthat/test-dplyr-query.R @@ -70,22 +70,23 @@ See $.data for the source Arrow object', test_that("pull", { compare_dplyr_binding( - .input %>% pull(), + .input %>% pull() %>% as.vector(), tbl ) compare_dplyr_binding( - .input %>% pull(1), + .input %>% pull(1) %>% as.vector(), tbl ) compare_dplyr_binding( - .input %>% pull(chr), + .input %>% pull(chr) %>% as.vector(), tbl ) compare_dplyr_binding( .input %>% filter(int > 4) %>% rename(strng = chr) %>% - pull(strng), + pull(strng) %>% + as.vector(), tbl ) })