diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/HadoopReadOptions.java b/parquet-hadoop/src/main/java/org/apache/parquet/HadoopReadOptions.java index b16a8c4ffa..8e83bc9ba2 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/HadoopReadOptions.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/HadoopReadOptions.java @@ -28,6 +28,7 @@ import java.util.Map; +import static org.apache.parquet.hadoop.ParquetInputFormat.ALLOCATOR_DIRECT_BYTEBUFFER_ENABLED; import static org.apache.parquet.hadoop.ParquetInputFormat.COLUMN_INDEX_FILTERING_ENABLED; import static org.apache.parquet.hadoop.ParquetInputFormat.DICTIONARY_FILTERING_ENABLED; import static org.apache.parquet.hadoop.ParquetInputFormat.BLOOM_FILTERING_ENABLED; @@ -91,6 +92,7 @@ public Builder(Configuration conf) { useStatsFilter(conf.getBoolean(STATS_FILTERING_ENABLED, true)); useRecordFilter(conf.getBoolean(RECORD_FILTERING_ENABLED, true)); useColumnIndexFilter(conf.getBoolean(COLUMN_INDEX_FILTERING_ENABLED, true)); + useDirectByteBufferAllocator(conf.getBoolean(ALLOCATOR_DIRECT_BYTEBUFFER_ENABLED, false)); usePageChecksumVerification(conf.getBoolean(PAGE_VERIFY_CHECKSUM_ENABLED, usePageChecksumVerification)); useBloomFilter(conf.getBoolean(BLOOM_FILTERING_ENABLED, true)); diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java b/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java index 2fdca3be28..72fa7687dc 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java @@ -20,6 +20,7 @@ package org.apache.parquet; import org.apache.parquet.bytes.ByteBufferAllocator; +import org.apache.parquet.bytes.DirectByteBufferAllocator; import org.apache.parquet.bytes.HeapByteBufferAllocator; import org.apache.parquet.compression.CompressionCodecFactory; import org.apache.parquet.filter2.compat.FilterCompat; @@ -242,6 +243,13 @@ public Builder withRecordFilter(FilterCompat.Filter rowGroupFilter) { return this; } + public Builder useDirectByteBufferAllocator(boolean useDirectByteBuffer) { + if (useDirectByteBuffer) { + this.allocator = new DirectByteBufferAllocator(); + } + return this; + } + public Builder withRange(long start, long end) { this.metadataFilter = ParquetMetadataConverter.range(start, end); return this; diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetInputFormat.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetInputFormat.java index f46f18211a..c201e8d6c0 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetInputFormat.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetInputFormat.java @@ -144,6 +144,11 @@ public class ParquetInputFormat extends FileInputFormat { */ public static final String BLOOM_FILTERING_ENABLED = "parquet.filter.bloom.enabled"; + /** + * key to configure whether direct bytebuffer allocator is enabled + */ + public static final String ALLOCATOR_DIRECT_BYTEBUFFER_ENABLED = "parquet.allocator.direct.enabled"; + /** * key to turn on or off task side metadata loading (default true) * if true then metadata is read on the task side and some tasks may finish immediately.