Skip to content

Commit 89e9612

Browse files
authored
Add RecordBatch::project microbenchmark (#8592)
# Which issue does this PR close? - Related to #8591. # Rationale for this change Add a microbenchmark for `RecordBatch::project` to measure the performance impact of #8583 # What changes are included in this PR? Adds an additional micro benchmark to `arrow-rs`. # Are these changes tested? Not applicable for benchmark code. Benchmark manually tested. # Are there any user-facing changes? No
1 parent d268635 commit 89e9612

File tree

2 files changed

+96
-0
lines changed

2 files changed

+96
-0
lines changed

arrow-array/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,7 @@ harness = false
8080
[[bench]]
8181
name = "union_array"
8282
harness = false
83+
84+
[[bench]]
85+
name = "record_batch"
86+
harness = false
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow_array::{ArrayRef, Int64Array, RecordBatch, RecordBatchOptions};
19+
use arrow_schema::{DataType, Field, Schema, SchemaRef};
20+
use criterion::*;
21+
use num_integer::Integer;
22+
use std::sync::Arc;
23+
24+
fn make_record_batch(column_count: usize, row_count: usize) -> RecordBatch {
25+
let fields = (0..column_count)
26+
.map(|i| Field::new(format!("col_{}", i), DataType::Int64, i.is_even()))
27+
.collect::<Vec<_>>();
28+
29+
let columns = fields
30+
.iter()
31+
.map(|_| {
32+
let array_ref: ArrayRef = Arc::new(Int64Array::from_value(0, row_count));
33+
array_ref
34+
})
35+
.collect::<Vec<_>>();
36+
37+
let schema = Schema::new(fields);
38+
39+
let mut options = RecordBatchOptions::new();
40+
options.row_count = Some(row_count);
41+
42+
RecordBatch::try_new_with_options(SchemaRef::new(schema), columns, &options).unwrap()
43+
}
44+
45+
fn project_benchmark(
46+
c: &mut Criterion,
47+
column_count: usize,
48+
row_count: usize,
49+
projection_size: usize,
50+
) {
51+
let input = make_input(column_count, row_count, projection_size);
52+
53+
c.bench_with_input(
54+
BenchmarkId::new(
55+
"project",
56+
format!(
57+
"{:?}x{:?} -> {:?}x{:?}",
58+
input.0.num_columns(),
59+
input.0.num_rows(),
60+
input.1.len(),
61+
input.0.num_rows()
62+
),
63+
),
64+
&input,
65+
|b, (rb, projection)| {
66+
b.iter(|| black_box(rb.project(projection).unwrap()));
67+
},
68+
);
69+
}
70+
71+
fn make_input(
72+
column_count: usize,
73+
row_count: usize,
74+
projection_size: usize,
75+
) -> (RecordBatch, Vec<usize>) {
76+
let rb = make_record_batch(column_count, row_count);
77+
let projection = (0..projection_size).collect::<Vec<_>>();
78+
(rb, projection)
79+
}
80+
81+
fn criterion_benchmark(c: &mut Criterion) {
82+
[10, 100, 1000].iter().for_each(|&column_count| {
83+
[1, column_count / 2, column_count - 1]
84+
.iter()
85+
.for_each(|&projection_size| {
86+
project_benchmark(c, column_count, 8192, projection_size);
87+
})
88+
});
89+
}
90+
91+
criterion_group!(benches, criterion_benchmark);
92+
criterion_main!(benches);

0 commit comments

Comments
 (0)