Skip to content

Commit

Permalink
ARROW-11766: [R] Better handling for missing compression codecs on Linux
Browse files Browse the repository at this point in the history
fixes merge problem in apache#9880

Closes apache#9893 from pachamaltese/arrow11766v3

Lead-authored-by: Mauricio Vargas <mvargas@dcc.uchile.cl>
Co-authored-by: Pachamaltese <mvargas@dcc.uchile.cl>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
  • Loading branch information
Mauricio Vargas authored and pull[bot] committed Dec 14, 2021
1 parent 090b923 commit 6b041c5
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 8 deletions.
11 changes: 7 additions & 4 deletions r/R/feather.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ write_feather <- function(x,
#'
#' @inheritParams read_ipc_stream
#' @inheritParams read_delim_arrow
#' @param ... additional parameters, passed to [FeatherReader$create()][FeatherReader]
#' @param ... additional parameters, passed to [make_readable_file()].
#'
#' @return A `data.frame` if `as_data_frame` is `TRUE` (the default), or an
#' Arrow [Table] otherwise
Expand All @@ -144,17 +144,20 @@ write_feather <- function(x,
#' }
read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) {
if (!inherits(file, "RandomAccessFile")) {
file <- make_readable_file(file)
file <- make_readable_file(file, ...)
on.exit(file$close())
}
reader <- FeatherReader$create(file, ...)
reader <- FeatherReader$create(file)

col_select <- enquo(col_select)
columns <- if (!quo_is_null(col_select)) {
vars_select(names(reader), !!col_select)
}

out <- reader$Read(columns)
out <- tryCatch(
reader$Read(columns),
error = read_compressed_error
)

if (isTRUE(as_data_frame)) {
out <- as.data.frame(out)
Expand Down
10 changes: 8 additions & 2 deletions r/R/parquet.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,16 @@ read_parquet <- function(file,
schema <- reader$GetSchema()
names <- names(schema)
indices <- match(vars_select(names, !!col_select), names) - 1L
tab <- reader$ReadTable(indices)
tab <- tryCatch(
reader$ReadTable(indices),
error = read_compressed_error
)
} else {
# read all columns
tab <- reader$ReadTable()
tab <- tryCatch(
reader$ReadTable(),
error = read_compressed_error
)
}

if (as_data_frame) {
Expand Down
18 changes: 17 additions & 1 deletion r/R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,26 @@ is_constant <- function(expr) {
length(all_vars(expr)) == 0
}

read_compressed_error <- function(e) {
msg <- conditionMessage(e)
if (grepl(" codec ", msg)) {
compression <- sub(".*Support for codec '(.*)'.*", "\\1", msg)
e$message <- paste0(
msg,
"\nIn order to read this file, you will need to reinstall arrow with additional features enabled.",
"\nSet one of these environment variables before installing:",
sprintf("\n\n * LIBARROW_MINIMAL=false (for all optional features, including '%s')", compression),
sprintf("\n * ARROW_WITH_%s=ON (for just '%s')", toupper(compression), compression),
"\n\nSee https://arrow.apache.org/docs/r/articles/install.html for details"
)
}
stop(e)
}

handle_embedded_nul_error <- function(e) {
msg <- conditionMessage(e)
if (grepl(" nul ", msg)) {
e$message <- paste0(msg, "; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)")
}
stop(e)
}
}
2 changes: 1 addition & 1 deletion r/man/read_feather.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions r/tests/testthat/test-feather.R
Original file line number Diff line number Diff line change
Expand Up @@ -220,3 +220,23 @@ test_that("FeatherReader methods", {
})

unlink(feather_file)

ft_file <- test_path("golden-files/data-arrow_2.0.0_lz4.feather")

test_that("Error messages are shown when the compression algorithm lz4 is not found", {
msg <- "NotImplemented: Support for codec 'lz4' not built\nIn order to read this file, you will need to reinstall arrow with additional features enabled.\nSet one of these environment variables before installing:\n\n * LIBARROW_MINIMAL=false (for all optional features, including 'lz4')\n * ARROW_WITH_LZ4=ON (for just 'lz4')\n\nSee https://arrow.apache.org/docs/r/articles/install.html for details"

if (codec_is_available("lz4")) {
d <- read_feather(ft_file)
expect_is(d, "data.frame")
} else {
expect_error(read_feather(ft_file), msg, fixed = TRUE)
}
})

test_that("Error is created when feather reads a parquet file", {
expect_error(
read_feather(system.file("v0.7.1.parquet", package = "arrow")),
"Not a Feather V1 or Arrow IPC file"
)
})
18 changes: 18 additions & 0 deletions r/tests/testthat/test-parquet.R
Original file line number Diff line number Diff line change
Expand Up @@ -234,3 +234,21 @@ test_that("ParquetFileReader $ReadRowGroup(s) methods", {
expect_true(reader$ReadRowGroups(c(0, 1), 0) == Table$create(x = 1:20))
expect_error(reader$ReadRowGroups(c(0, 1), 1))
})

test_that("Error messages are shown when the compression algorithm snappy is not found", {
msg <- "NotImplemented: Support for codec 'snappy' not built\nIn order to read this file, you will need to reinstall arrow with additional features enabled.\nSet one of these environment variables before installing:\n\n * LIBARROW_MINIMAL=false (for all optional features, including 'snappy')\n * ARROW_WITH_SNAPPY=ON (for just 'snappy')\n\nSee https://arrow.apache.org/docs/r/articles/install.html for details"

if (codec_is_available("snappy")) {
d <- read_parquet(pq_file)
expect_is(d, "data.frame")
} else {
expect_error(read_parquet(pq_file), msg, fixed = TRUE)
}
})

test_that("Error is created when parquet reads a feather file", {
expect_error(
read_parquet(test_path("golden-files/data-arrow_2.0.0_lz4.feather")),
"Parquet magic bytes not found in footer"
)
})

0 comments on commit 6b041c5

Please sign in to comment.