Skip to content

Commit

Permalink
read parquet files
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffwong-nflx committed Dec 19, 2018
1 parent 22d89dd commit 456c5d2
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 0 deletions.
10 changes: 10 additions & 0 deletions r/R/read_parquet.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#' Read parquet file from disk
#'
#' @param files a vector of filenames
#' @export
read_parquet = function(files) {
tables = lapply(files, function(f) {
return (as_tibble(shared_ptr(`arrow::Table`, read_parquet_file(f))))
})
do.call('rbind', tables)
}
38 changes: 38 additions & 0 deletions r/src/parquetfilereader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// // Licensed to the Apache Software Foundation (ASF) under one
// // or more contributor license agreements. See the NOTICE file
// // distributed with this work for additional information
// // regarding copyright ownership. The ASF licenses this file
// // to you under the Apache License, Version 2.0 (the
// // "License"); you may not use this file except in compliance
// // with the License. You may obtain a copy of the License at
// //
// // http://www.apache.org/licenses/LICENSE-2.0
// //
// // Unless required by applicable law or agreed to in writing,
// // software distributed under the License is distributed on an
// // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// // KIND, either express or implied. See the License for the
// // specific language governing permissions and limitations
// // under the License.
//
//
#include <arrow/api.h>
#include <arrow/io/api.h>
#include <parquet/arrow/reader.h>
#include <parquet/arrow/writer.h>
#include <parquet/exception.h>

// [[Rcpp::export]]
std::shared_ptr<arrow::Table> read_parquet_file(std::string filename) {
std::shared_ptr<arrow::io::ReadableFile> infile;
PARQUET_THROW_NOT_OK(arrow::io::ReadableFile::Open(
filename, arrow::default_memory_pool(), &infile));

std::unique_ptr<parquet::arrow::FileReader> reader;
PARQUET_THROW_NOT_OK(
parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
std::shared_ptr<arrow::Table> table;
PARQUET_THROW_NOT_OK(reader->ReadTable(&table));

return table;
}

0 comments on commit 456c5d2

Please sign in to comment.