-
Notifications
You must be signed in to change notification settings - Fork 875
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add BooleanArray::from_unary and BooleanArray::from_binary #3258
Merged
Merged
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
2147b6f
Add BooleanArray::from_unary and BooleanArray::from_binary
tustvold 5e9a7bc
Add docs
tustvold 12ad98b
Tweak signatures
tustvold d6f6755
Remove fallibility from combine_option_bitmap
tustvold a2d6809
Remove unused compare_option_bitmap
tustvold b99f7d7
Remove fallibility
tustvold 86c295c
Fix doc
tustvold File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,8 +17,11 @@ | |
|
||
//! Utils for working with packed bit masks | ||
|
||
use crate::ArrayData; | ||
use arrow_buffer::bit_chunk_iterator::BitChunks; | ||
use arrow_buffer::bit_util::{ceil, get_bit, set_bit}; | ||
use arrow_buffer::buffer::buffer_bin_and; | ||
use arrow_buffer::Buffer; | ||
|
||
/// Sets all bits on `write_data` in the range `[offset_write..offset_write+len]` to be equal to the | ||
/// bits in `data` in the range `[offset_read..offset_read+len]` | ||
|
@@ -62,9 +65,41 @@ pub fn set_bits( | |
null_count as usize | ||
} | ||
|
||
/// Combines the null bitmaps of multiple arrays using a bitwise `and` operation. | ||
/// | ||
/// This function is useful when implementing operations on higher level arrays. | ||
pub fn combine_option_bitmap( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously this would return an error if called with an empty In practice no codepaths could hit this, and so I removed it as it seemed unnecessary |
||
arrays: &[&ArrayData], | ||
len_in_bits: usize, | ||
) -> Option<Buffer> { | ||
let (buffer, offset) = arrays | ||
.iter() | ||
.map(|array| (array.null_buffer().cloned(), array.offset())) | ||
.reduce(|acc, buffer_and_offset| match (acc, buffer_and_offset) { | ||
((None, _), (None, _)) => (None, 0), | ||
((Some(buffer), offset), (None, _)) | ((None, _), (Some(buffer), offset)) => { | ||
(Some(buffer), offset) | ||
} | ||
((Some(buffer_left), offset_left), (Some(buffer_right), offset_right)) => ( | ||
Some(buffer_bin_and( | ||
&buffer_left, | ||
offset_left, | ||
&buffer_right, | ||
offset_right, | ||
len_in_bits, | ||
)), | ||
0, | ||
), | ||
})?; | ||
|
||
Some(buffer?.bit_slice(offset, len_in_bits)) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
use arrow_schema::DataType; | ||
use std::sync::Arc; | ||
|
||
#[test] | ||
fn test_set_bits_aligned() { | ||
|
@@ -187,4 +222,110 @@ mod tests { | |
assert_eq!(destination, expected_data); | ||
assert_eq!(result, expected_null_count); | ||
} | ||
|
||
fn make_data_with_null_bit_buffer( | ||
len: usize, | ||
offset: usize, | ||
null_bit_buffer: Option<Buffer>, | ||
) -> Arc<ArrayData> { | ||
let buffer = Buffer::from(&vec![11; len + offset]); | ||
|
||
Arc::new( | ||
ArrayData::try_new( | ||
DataType::UInt8, | ||
len, | ||
null_bit_buffer, | ||
offset, | ||
vec![buffer], | ||
vec![], | ||
) | ||
.unwrap(), | ||
) | ||
} | ||
|
||
#[test] | ||
fn test_combine_option_bitmap() { | ||
let none_bitmap = make_data_with_null_bit_buffer(8, 0, None); | ||
let some_bitmap = | ||
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010]))); | ||
let inverse_bitmap = | ||
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101]))); | ||
let some_other_bitmap = | ||
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b11010111]))); | ||
assert_eq!(None, combine_option_bitmap(&[], 8)); | ||
assert_eq!( | ||
Some(Buffer::from([0b01001010])), | ||
combine_option_bitmap(&[&some_bitmap], 8) | ||
); | ||
assert_eq!( | ||
None, | ||
combine_option_bitmap(&[&none_bitmap, &none_bitmap], 8) | ||
); | ||
assert_eq!( | ||
Some(Buffer::from([0b01001010])), | ||
combine_option_bitmap(&[&some_bitmap, &none_bitmap], 8) | ||
); | ||
assert_eq!( | ||
Some(Buffer::from([0b11010111])), | ||
combine_option_bitmap(&[&none_bitmap, &some_other_bitmap], 8) | ||
); | ||
assert_eq!( | ||
Some(Buffer::from([0b01001010])), | ||
combine_option_bitmap(&[&some_bitmap, &some_bitmap], 8,) | ||
); | ||
assert_eq!( | ||
Some(Buffer::from([0b0])), | ||
combine_option_bitmap(&[&some_bitmap, &inverse_bitmap], 8,) | ||
); | ||
assert_eq!( | ||
Some(Buffer::from([0b01000010])), | ||
combine_option_bitmap(&[&some_bitmap, &some_other_bitmap, &none_bitmap], 8,) | ||
); | ||
assert_eq!( | ||
Some(Buffer::from([0b00001001])), | ||
combine_option_bitmap( | ||
&[ | ||
&some_bitmap.slice(3, 5), | ||
&inverse_bitmap.slice(2, 5), | ||
&some_other_bitmap.slice(1, 5) | ||
], | ||
5, | ||
) | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_combine_option_bitmap_with_offsets() { | ||
let none_bitmap = make_data_with_null_bit_buffer(8, 0, None); | ||
let bitmap0 = | ||
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10101010]))); | ||
let bitmap1 = | ||
make_data_with_null_bit_buffer(8, 1, Some(Buffer::from([0b01010100, 0b1]))); | ||
let bitmap2 = | ||
make_data_with_null_bit_buffer(8, 2, Some(Buffer::from([0b10101000, 0b10]))); | ||
assert_eq!( | ||
Some(Buffer::from([0b10101010])), | ||
combine_option_bitmap(&[&bitmap1], 8) | ||
); | ||
assert_eq!( | ||
Some(Buffer::from([0b10101010])), | ||
combine_option_bitmap(&[&bitmap2], 8) | ||
); | ||
assert_eq!( | ||
Some(Buffer::from([0b10101010])), | ||
combine_option_bitmap(&[&bitmap1, &none_bitmap], 8) | ||
); | ||
assert_eq!( | ||
Some(Buffer::from([0b10101010])), | ||
combine_option_bitmap(&[&none_bitmap, &bitmap2], 8) | ||
); | ||
assert_eq!( | ||
Some(Buffer::from([0b10101010])), | ||
combine_option_bitmap(&[&bitmap0, &bitmap1], 8) | ||
); | ||
assert_eq!( | ||
Some(Buffer::from([0b10101010])), | ||
combine_option_bitmap(&[&bitmap1, &bitmap2], 8) | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I specifically want this to be infallible, because I want a future
try_from_binary
to be able to return a user-provided error, instead of bundling it up inArrowError
.