Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved performance of SlicesIterator (15%) (#824)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Feb 8, 2022
1 parent 7e25ef2 commit d8093bf
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 22 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -306,3 +306,7 @@ harness = false
[[bench]]
name = "write_json"
harness = false

[[bench]]
name = "slices_iterator"
harness = false
25 changes: 25 additions & 0 deletions benches/slices_iterator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
use criterion::{criterion_group, criterion_main, Criterion};

use arrow2::bitmap::{utils::SlicesIterator, Bitmap};

fn bench_slices(lhs: &Bitmap) {
let set_count = lhs.len() - lhs.null_count();
let slices = SlicesIterator::new(lhs);

let count = slices.fold(0usize, |acc, v| acc + v.1);
assert_eq!(count, set_count);
}

fn add_benchmark(c: &mut Criterion) {
(10..=20).step_by(2).for_each(|log2_size| {
let size = 2usize.pow(log2_size);

let bitmap: Bitmap = (0..size).into_iter().map(|x| x % 3 == 0).collect();
c.bench_function(&format!("bitmap slices 2^{}", log2_size), |b| {
b.iter(|| bench_slices(&bitmap))
});
});
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);
35 changes: 13 additions & 22 deletions src/bitmap/utils/slice_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ impl<'a> SlicesIterator<'a> {
}
}

#[inline]
fn finish(&mut self) -> Option<(usize, usize)> {
self.state = State::Finished;
if self.on_region {
Expand All @@ -75,6 +76,7 @@ impl<'a> SlicesIterator<'a> {
impl<'a> Iterator for SlicesIterator<'a> {
type Item = (usize, usize);

#[inline]
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.state == State::Finished {
Expand All @@ -89,17 +91,15 @@ impl<'a> Iterator for SlicesIterator<'a> {
match (self.on_region, self.current_byte) {
(true, &255u8) => {
self.len = std::cmp::min(self.max_len - self.start, self.len + 8);
match self.values.next() {
Some(v) => self.current_byte = v,
None => return self.finish(),
if let Some(v) = self.values.next() {
self.current_byte = v;
};
continue;
}
(false, &0) => {
self.len = std::cmp::min(self.max_len - self.start, self.len + 8);
match self.values.next() {
Some(v) => self.current_byte = v,
None => return self.finish(),
if let Some(v) = self.values.next() {
self.current_byte = v;
};
continue;
}
Expand All @@ -114,26 +114,17 @@ impl<'a> Iterator for SlicesIterator<'a> {
(true, true) => self.len += 1,
(false, false) => self.len += 1,
(true, false) => {
self.on_region = false;
let result = (self.start, self.len);
self.start += self.len;
self.len = 1;
if self.mask == 1 {
// reached a new byte => try to fetch it from the iterator
match self.values.next() {
Some(v) => {
self.on_region = false;
let result = (self.start, self.len);
self.start += self.len;
self.len = 1;
self.current_byte = v;
return Some(result);
}
None => return self.finish(),
if let Some(v) = self.values.next() {
self.current_byte = v;
};
} else {
self.on_region = false;
let result = (self.start, self.len);
self.start += self.len;
self.len = 1;
return Some(result);
}
return Some(result);
}
(false, true) => {
self.start += self.len;
Expand Down

0 comments on commit d8093bf

Please sign in to comment.