diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 2df07e68744..d2ecde3b1c0 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -28,7 +28,6 @@ LinkingTo: Imports: assertthat, bit64, - fs, purrr, R6, Rcpp (>= 1.0.1), @@ -39,6 +38,7 @@ Roxygen: list(markdown = TRUE) RoxygenNote: 6.1.1 Suggests: covr, + fs, hms, lubridate, rmarkdown, diff --git a/r/NAMESPACE b/r/NAMESPACE index 3a413c0e802..d97fa4c2b58 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -11,14 +11,11 @@ S3method(BufferReader,"arrow::Buffer") S3method(BufferReader,default) S3method(CompressedInputStream,"arrow::io::InputStream") S3method(CompressedInputStream,character) -S3method(CompressedInputStream,fs_path) S3method(CompressedOutputStream,"arrow::io::OutputStream") S3method(CompressedOutputStream,character) -S3method(CompressedOutputStream,fs_path) S3method(FeatherTableReader,"arrow::io::RandomAccessFile") S3method(FeatherTableReader,"arrow::ipc::feather::TableReader") S3method(FeatherTableReader,character) -S3method(FeatherTableReader,fs_path) S3method(FeatherTableReader,raw) S3method(FeatherTableWriter,"arrow::io::OutputStream") S3method(FixedSizeBufferWriter,"arrow::Buffer") @@ -28,17 +25,14 @@ S3method(MessageReader,default) S3method(RecordBatchFileReader,"arrow::Buffer") S3method(RecordBatchFileReader,"arrow::io::RandomAccessFile") S3method(RecordBatchFileReader,character) -S3method(RecordBatchFileReader,fs_path) S3method(RecordBatchFileReader,raw) S3method(RecordBatchFileWriter,"arrow::io::OutputStream") S3method(RecordBatchFileWriter,character) -S3method(RecordBatchFileWriter,fs_path) S3method(RecordBatchStreamReader,"arrow::Buffer") S3method(RecordBatchStreamReader,"arrow::io::InputStream") S3method(RecordBatchStreamReader,raw) S3method(RecordBatchStreamWriter,"arrow::io::OutputStream") S3method(RecordBatchStreamWriter,character) -S3method(RecordBatchStreamWriter,fs_path) S3method(as.data.frame,"arrow::RecordBatch") S3method(as.data.frame,"arrow::Table") S3method(as.raw,"arrow::Buffer") @@ -52,19 +46,16 @@ S3method(csv_table_reader,"arrow::csv::TableReader") S3method(csv_table_reader,"arrow::io::InputStream") S3method(csv_table_reader,character) S3method(csv_table_reader,default) -S3method(csv_table_reader,fs_path) S3method(dim,"arrow::RecordBatch") S3method(dim,"arrow::Table") S3method(json_table_reader,"arrow::io::InputStream") S3method(json_table_reader,"arrow::json::TableReader") S3method(json_table_reader,character) S3method(json_table_reader,default) -S3method(json_table_reader,fs_path) S3method(length,"arrow::Array") S3method(names,"arrow::RecordBatch") S3method(parquet_file_reader,"arrow::io::RandomAccessFile") S3method(parquet_file_reader,character) -S3method(parquet_file_reader,fs_path) S3method(parquet_file_reader,raw) S3method(print,"arrow-enum") S3method(read_message,"arrow::io::InputStream") @@ -81,7 +72,6 @@ S3method(read_schema,raw) S3method(read_table,"arrow::ipc::RecordBatchFileReader") S3method(read_table,"arrow::ipc::RecordBatchStreamReader") S3method(read_table,character) -S3method(read_table,fs_path) S3method(read_table,raw) S3method(type,"arrow::Array") S3method(type,"arrow::ChunkedArray") @@ -89,7 +79,6 @@ S3method(type,"arrow::Column") S3method(type,default) S3method(write_arrow,"arrow::ipc::RecordBatchWriter") S3method(write_arrow,character) -S3method(write_arrow,fs_path) S3method(write_arrow,raw) S3method(write_feather,"arrow::RecordBatch") S3method(write_feather,data.frame) @@ -97,7 +86,6 @@ S3method(write_feather,default) S3method(write_feather_RecordBatch,"arrow::io::OutputStream") S3method(write_feather_RecordBatch,character) S3method(write_feather_RecordBatch,default) -S3method(write_feather_RecordBatch,fs_path) export(BufferOutputStream) export(BufferReader) export(CompressedInputStream) diff --git a/r/NEWS.md b/r/NEWS.md index 5ab4e18794b..3cd2f32b362 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -21,6 +21,7 @@ * `read_csv_arrow()` supports more parsing options, including `col_names` and `skip` * `read_parquet()` and `read_feather()` can ingest data from a `raw` vector ([ARROW-6278](https://issues.apache.org/jira/browse/ARROW-6278)) +* File readers now properly handle paths that need expanding, such as `~/file.parquet` ([ARROW-6323](https://issues.apache.org/jira/browse/ARROW-6323)) # arrow 0.14.1 diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R index 6dab2d1ff76..ae3bd27a780 100644 --- a/r/R/RecordBatchReader.R +++ b/r/R/RecordBatchReader.R @@ -124,11 +124,6 @@ RecordBatchFileReader <- function(file) { #' @export `RecordBatchFileReader.character` <- function(file) { assert_that(length(file) == 1L) - RecordBatchFileReader(fs::path_abs(file)) -} - -#' @export -`RecordBatchFileReader.fs_path` <- function(file) { RecordBatchFileReader(ReadableFile(file)) } diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R index 59aa9847a1f..eb0a9c61d09 100644 --- a/r/R/RecordBatchWriter.R +++ b/r/R/RecordBatchWriter.R @@ -94,8 +94,7 @@ #' #' @param sink Where to write. Can either be: #' -#' - A string, meant as a file path, passed to [fs::path_abs()] -#' - a [file path][fs::path_abs()] +#' - A string file path #' - [arrow::io::OutputStream][arrow__io__OutputStream] #' #' @param schema The [arrow::Schema][arrow__Schema] for data to be written. @@ -109,11 +108,6 @@ RecordBatchStreamWriter <- function(sink, schema) { #' @export RecordBatchStreamWriter.character <- function(sink, schema){ - RecordBatchStreamWriter(fs::path_abs(sink), schema) -} - -#' @export -RecordBatchStreamWriter.fs_path <- function(sink, schema){ RecordBatchStreamWriter(FileOutputStream(sink), schema) } @@ -160,8 +154,7 @@ RecordBatchStreamWriter.fs_path <- function(sink, schema){ #' #' @param sink Where to write. Can either be: #' -#' - character vector of length one -#' - a [file path][fs::path_abs()] +#' - a string file path #' - [arrow::io::OutputStream][arrow__io__OutputStream] #' #' @param schema The [arrow::Schema][arrow__Schema] for data to be written. @@ -175,11 +168,6 @@ RecordBatchFileWriter <- function(sink, schema) { #' @export RecordBatchFileWriter.character <- function(sink, schema){ - RecordBatchFileWriter(fs::path_abs(sink), schema) -} - -#' @export -RecordBatchFileWriter.fs_path <- function(sink, schema){ RecordBatchFileWriter(FileOutputStream(sink), schema) } diff --git a/r/R/compression.R b/r/R/compression.R index e10fef1bd2e..399fcb81db2 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -50,11 +50,6 @@ CompressedOutputStream <- function(stream, codec = compression_codec("GZIP")){ #' @export CompressedOutputStream.character <- function(stream, codec = compression_codec("GZIP")){ - CompressedOutputStream(fs::path_abs(stream), codec = codec) -} - -#' @export -CompressedOutputStream.fs_path <- function(stream, codec = compression_codec("GZIP")){ CompressedOutputStream(FileOutputStream(stream), codec = codec) } @@ -75,11 +70,6 @@ CompressedInputStream <- function(stream, codec = codec("GZIP")){ #' @export CompressedInputStream.character <- function(stream, codec = compression_codec("GZIP")){ - CompressedInputStream(fs::path_abs(stream), codec = codec) -} - -#' @export -CompressedInputStream.fs_path <- function(stream, codec = compression_codec("GZIP")){ CompressedInputStream(ReadableFile(stream), codec = codec) } diff --git a/r/R/csv.R b/r/R/csv.R index 5b5d36cbe0b..3c5e5b7e8e4 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -347,21 +347,6 @@ csv_table_reader.default <- function(file, parse_options = csv_parse_options(), convert_options = csv_convert_options(), ... -){ - csv_table_reader(fs::path_abs(file), - read_options = read_options, - parse_options = parse_options, - convert_options = convert_options, - ... - ) -} - -#' @export -`csv_table_reader.fs_path` <- function(file, - read_options = csv_read_options(), - parse_options = csv_parse_options(), - convert_options = csv_convert_options(), - ... ){ csv_table_reader(mmap_open(file), read_options = read_options, diff --git a/r/R/feather.R b/r/R/feather.R index 8bcbe2b80e0..48123f7cdfd 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -108,13 +108,7 @@ write_feather_RecordBatch <- function(data, stream) { #' @export #' @method write_feather_RecordBatch character -`write_feather_RecordBatch.character` <- function(data, stream) { - `write_feather_RecordBatch.fs_path`(data, fs::path_abs(stream)) -} - -#' @export -#' @method write_feather_RecordBatch fs_path -`write_feather_RecordBatch.fs_path` <- function(data, stream) { +write_feather_RecordBatch.character <- function(data, stream) { file_stream <- FileOutputStream(stream) on.exit(file_stream$close()) `write_feather_RecordBatch.arrow::io::OutputStream`(data, file_stream) @@ -129,7 +123,7 @@ write_feather_RecordBatch <- function(data, stream) { #' A `arrow::ipc::feather::TableReader` to read from a file #' #' @param file A file path or `arrow::io::RandomAccessFile` -#' @param mmap Is the file memory mapped (applicable to the `character` and `fs_path` methods) +#' @param mmap Is the file memory mapped (applicable to the `character` method) #' @param ... extra parameters #' #' @export @@ -139,11 +133,6 @@ FeatherTableReader <- function(file, mmap = TRUE, ...){ #' @export FeatherTableReader.character <- function(file, mmap = TRUE, ...) { - FeatherTableReader(fs::path_abs(file), mmap = mmap, ...) -} - -#' @export -FeatherTableReader.fs_path <- function(file, mmap = TRUE, ...) { if (isTRUE(mmap)) { stream <- mmap_open(file, ...) } else { diff --git a/r/R/io.R b/r/R/io.R index 5d7d99cb5e8..3169a180eb9 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -253,7 +253,7 @@ #' #' @export mmap_create <- function(path, size) { - shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Create(fs::path_abs(path), size)) + shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Create(normalizePath(path, mustWork = FALSE), size)) } #' Open a memory mapped file @@ -264,7 +264,7 @@ mmap_create <- function(path, size) { #' @export mmap_open <- function(path, mode = c("read", "write", "readwrite")) { mode <- match(match.arg(mode), c("read", "write", "readwrite")) - 1L - shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Open(fs::path_abs(path), mode)) + shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Open(normalizePath(path), mode)) } #' open a [arrow::io::ReadableFile][arrow__io__ReadableFile] @@ -275,7 +275,7 @@ mmap_open <- function(path, mode = c("read", "write", "readwrite")) { #' #' @export ReadableFile <- function(path) { - shared_ptr(`arrow::io::ReadableFile`, io___ReadableFile__Open(fs::path_abs(path))) + shared_ptr(`arrow::io::ReadableFile`, io___ReadableFile__Open(normalizePath(path))) } #' Open a [arrow::io::FileOutputStream][arrow__io__FileOutputStream] @@ -286,7 +286,7 @@ ReadableFile <- function(path) { #' #' @export FileOutputStream <- function(path) { - shared_ptr(`arrow::io::FileOutputStream`, io___FileOutputStream__Open(path)) + shared_ptr(`arrow::io::FileOutputStream`, io___FileOutputStream__Open(normalizePath(path, mustWork = FALSE))) } #' Open a [arrow::io::MockOutputStream][arrow__io__MockOutputStream] diff --git a/r/R/json.R b/r/R/json.R index dce130e61a1..9573ff547b3 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -86,19 +86,6 @@ json_table_reader.default <- function(file, read_options = json_read_options(), parse_options = json_parse_options(), ... -){ - json_table_reader(fs::path_abs(file), - read_options = read_options, - parse_options = parse_options, - ... - ) -} - -#' @export -`json_table_reader.fs_path` <- function(file, - read_options = json_read_options(), - parse_options = json_parse_options(), - ... ){ json_table_reader(ReadableFile(file), read_options = read_options, diff --git a/r/R/parquet.R b/r/R/parquet.R index 4fcff6b7b1b..c76619c4597 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -85,7 +85,11 @@ parquet_file_reader <- function(file, props = parquet_arrow_reader_properties(), } #' @export -parquet_file_reader.fs_path <- function(file, props = parquet_arrow_reader_properties(), memory_map = TRUE, ...) { +parquet_file_reader.character <- function(file, + props = parquet_arrow_reader_properties(), + memory_map = TRUE, + ...) { + file <- normalizePath(file) if (isTRUE(memory_map)) { parquet_file_reader(mmap_open(file), props = props, ...) } else { @@ -94,13 +98,8 @@ parquet_file_reader.fs_path <- function(file, props = parquet_arrow_reader_prope } #' @export -parquet_file_reader.character <- function(file, props = parquet_arrow_reader_properties(), memory_map = TRUE, ...) { - parquet_file_reader(fs::path_abs(file), props = parquet_arrow_reader_properties(), memory_map = memory_map, ...) -} - -#' @export -parquet_file_reader.raw <- function(file, props = parquet_arrow_reader_properties(), memory_map = TRUE, ...) { - parquet_file_reader(BufferReader(file), props = parquet_arrow_reader_properties(), memory_map = memory_map, ...) +parquet_file_reader.raw <- function(file, props = parquet_arrow_reader_properties(), ...) { + parquet_file_reader(BufferReader(file), props = props, ...) } #' Read a Parquet file diff --git a/r/R/read_table.R b/r/R/read_table.R index ff2c5dd8c17..a05d15dff56 100644 --- a/r/R/read_table.R +++ b/r/R/read_table.R @@ -27,7 +27,7 @@ #' read an [arrow::Table][arrow__Table] from the remaining record batches #' in the reader #' -#' - a string or [file path][fs::path_abs()]: interpret the file as an arrow +#' - a string file path: interpret the file as an arrow #' binary file format, and uses a [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] #' to process it. #' @@ -60,13 +60,8 @@ read_table <- function(stream){ } #' @export -read_table.character <- function(stream){ +read_table.character <- function(stream) { assert_that(length(stream) == 1L) - read_table(fs::path_abs(stream)) -} - -#' @export -read_table.fs_path <- function(stream) { stream <- ReadableFile(stream) on.exit(stream$close()) batch_reader <- RecordBatchFileReader(stream) diff --git a/r/R/write_arrow.R b/r/R/write_arrow.R index 435fa82a40f..f57eff36c57 100644 --- a/r/R/write_arrow.R +++ b/r/R/write_arrow.R @@ -36,7 +36,7 @@ to_arrow <- function(x) { #' of `x` is used. The stream is left open. This uses the streaming format #' or the binary file format depending on the type of the writer. #' -#' - A string or [file path][fs::path_abs()]: `x` is serialized with +#' - A string file path: `x` is serialized with #' a [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter], i.e. #' using the binary file format. #' @@ -61,11 +61,6 @@ write_arrow <- function(x, stream, ...) { #' @export `write_arrow.character` <- function(x, stream, ...) { - write_arrow(x, fs::path_abs(stream), ...) -} - -#' @export -`write_arrow.fs_path` <- function(x, stream, ...) { assert_that(length(stream) == 1L) x <- to_arrow(x) file_stream <- FileOutputStream(stream) diff --git a/r/man/FeatherTableReader.Rd b/r/man/FeatherTableReader.Rd index 452291e7e61..3276628d50e 100644 --- a/r/man/FeatherTableReader.Rd +++ b/r/man/FeatherTableReader.Rd @@ -9,7 +9,7 @@ FeatherTableReader(file, mmap = TRUE, ...) \arguments{ \item{file}{A file path or \code{arrow::io::RandomAccessFile}} -\item{mmap}{Is the file memory mapped (applicable to the \code{character} and \code{fs_path} methods)} +\item{mmap}{Is the file memory mapped (applicable to the \code{character} method)} \item{...}{extra parameters} } diff --git a/r/man/RecordBatchFileWriter.Rd b/r/man/RecordBatchFileWriter.Rd index 90858304b0b..d89578f97be 100644 --- a/r/man/RecordBatchFileWriter.Rd +++ b/r/man/RecordBatchFileWriter.Rd @@ -9,8 +9,7 @@ RecordBatchFileWriter(sink, schema) \arguments{ \item{sink}{Where to write. Can either be: \itemize{ -\item character vector of length one -\item a \link[fs:path_abs]{file path} +\item a string file path \item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} }} diff --git a/r/man/RecordBatchStreamWriter.Rd b/r/man/RecordBatchStreamWriter.Rd index b9183a80719..9d9bbc9ceb0 100644 --- a/r/man/RecordBatchStreamWriter.Rd +++ b/r/man/RecordBatchStreamWriter.Rd @@ -9,8 +9,7 @@ RecordBatchStreamWriter(sink, schema) \arguments{ \item{sink}{Where to write. Can either be: \itemize{ -\item A string, meant as a file path, passed to \code{\link[fs:path_abs]{fs::path_abs()}} -\item a \link[fs:path_abs]{file path} +\item A string file path \item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} }} diff --git a/r/man/read_table.Rd b/r/man/read_table.Rd index c5863c1d43e..e556b8b0773 100644 --- a/r/man/read_table.Rd +++ b/r/man/read_table.Rd @@ -18,7 +18,7 @@ from all the record batches in the reader \item a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader}: read an \link[=arrow__Table]{arrow::Table} from the remaining record batches in the reader -\item a string or \link[fs:path_abs]{file path}: interpret the file as an arrow +\item a string file path: interpret the file as an arrow binary file format, and uses a \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} to process it. \item a raw vector: read using a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} diff --git a/r/man/write_arrow.Rd b/r/man/write_arrow.Rd index 7169ece6e77..9ba65cb18f3 100644 --- a/r/man/write_arrow.Rd +++ b/r/man/write_arrow.Rd @@ -14,7 +14,7 @@ write_arrow(x, stream, ...) \item A \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter}: the \code{$write()} of \code{x} is used. The stream is left open. This uses the streaming format or the binary file format depending on the type of the writer. -\item A string or \link[fs:path_abs]{file path}: \code{x} is serialized with +\item A string file path: \code{x} is serialized with a \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter}, i.e. using the binary file format. \item A raw vector: typically of length zero (its data is ignored, and only used for