Skip to content

Commit

Permalink
Add bloom filter benchmark (#3323)
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya authored Dec 10, 2022
1 parent 9e39f96 commit ad94368
Showing 1 changed file with 39 additions and 1 deletion.
40 changes: 39 additions & 1 deletion parquet/benches/arrow_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use std::sync::Arc;

use arrow::datatypes::*;
use arrow::{record_batch::RecordBatch, util::data_gen::*};
use parquet::file::properties::WriterProperties;
use parquet::{arrow::ArrowWriter, errors::Result};

fn create_primitive_bench_batch(
Expand Down Expand Up @@ -294,9 +295,26 @@ fn _create_nested_bench_batch(

#[inline]
fn write_batch(batch: &RecordBatch) -> Result<()> {
write_batch_with_option(batch, None)
}

#[inline]
fn write_batch_enable_bloom_filter(batch: &RecordBatch) -> Result<()> {
let option = WriterProperties::builder()
.set_bloom_filter_enabled(true)
.build();

write_batch_with_option(batch, Some(option))
}

#[inline]
fn write_batch_with_option(
batch: &RecordBatch,
props: Option<WriterProperties>,
) -> Result<()> {
// Write batch to an in-memory writer
let buffer = vec![];
let mut writer = ArrowWriter::try_new(buffer, batch.schema(), None)?;
let mut writer = ArrowWriter::try_new(buffer, batch.schema(), props)?;

writer.write(batch)?;
writer.close()?;
Expand All @@ -317,6 +335,10 @@ fn bench_primitive_writer(c: &mut Criterion) {
b.iter(|| write_batch(&batch).unwrap())
});

group.bench_function("4096 values primitive with bloom filter", |b| {
b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
});

let batch = create_primitive_bench_batch_non_null(4096, 0.25, 0.75).unwrap();
group.throughput(Throughput::Bytes(
batch
Expand All @@ -329,6 +351,10 @@ fn bench_primitive_writer(c: &mut Criterion) {
b.iter(|| write_batch(&batch).unwrap())
});

group.bench_function("4096 values primitive non-null with bloom filter", |b| {
b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
});

let batch = create_bool_bench_batch(4096, 0.25, 0.75).unwrap();
group.throughput(Throughput::Bytes(
batch
Expand Down Expand Up @@ -365,6 +391,10 @@ fn bench_primitive_writer(c: &mut Criterion) {
b.iter(|| write_batch(&batch).unwrap())
});

group.bench_function("4096 values string with bloom filter", |b| {
b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
});

let batch = create_string_dictionary_bench_batch(4096, 0.25, 0.75).unwrap();
group.throughput(Throughput::Bytes(
batch
Expand All @@ -377,6 +407,10 @@ fn bench_primitive_writer(c: &mut Criterion) {
b.iter(|| write_batch(&batch).unwrap())
});

group.bench_function("4096 values string dictionary with bloom filter", |b| {
b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
});

let batch = create_string_bench_batch_non_null(4096, 0.25, 0.75).unwrap();
group.throughput(Throughput::Bytes(
batch
Expand All @@ -389,6 +423,10 @@ fn bench_primitive_writer(c: &mut Criterion) {
b.iter(|| write_batch(&batch).unwrap())
});

group.bench_function("4096 values string non-null with bloom filter", |b| {
b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
});

group.finish();
}

Expand Down

0 comments on commit ad94368

Please sign in to comment.