Skip to content

Commit 79575aa

Browse files
authored
perf: zero-copy path in RowConverter::from_binary (#8686)
# Which issue does this PR close? - Closes #8685. # What changes are included in this PR? In the implementation of `RowConverter::from_binary`, the `BinaryArray` is broken into parts and an attempt is made to convert the data buffer into `Vec` at no copying cost with `Buffer::into_vec`. Only if this fails, the data is copied out for a newly allocated `Vec`. # Are these changes tested? Passes existing tests using `RowConverter::from_binary`, which all convert a non-shared buffer taking advantage of the optimization. Another test is added to cover the copying path. # Are there any user-facing changes? No
1 parent 021090f commit 79575aa

File tree

1 file changed

+19
-2
lines changed

1 file changed

+19
-2
lines changed

arrow-row/src/lib.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -913,9 +913,13 @@ impl RowConverter {
913913
0,
914914
"can't construct Rows instance from array with nulls"
915915
);
916+
let (offsets, values, _) = array.into_parts();
917+
let offsets = offsets.iter().map(|&i| i.as_usize()).collect();
918+
// Try zero-copy, if it does not succeed, fall back to copying the values.
919+
let buffer = values.into_vec().unwrap_or_else(|values| values.to_vec());
916920
Rows {
917-
buffer: array.values().to_vec(),
918-
offsets: array.offsets().iter().map(|&i| i.as_usize()).collect(),
921+
buffer,
922+
offsets,
919923
config: RowConfig {
920924
fields: Arc::clone(&self.fields),
921925
validate_utf8: true,
@@ -2474,6 +2478,19 @@ mod tests {
24742478
assert!(rows.row(3) < rows.row(0));
24752479
}
24762480

2481+
#[test]
2482+
fn test_from_binary_shared_buffer() {
2483+
let converter = RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
2484+
let array = Arc::new(BinaryArray::from_iter_values([&[0xFF]])) as _;
2485+
let rows = converter.convert_columns(&[array]).unwrap();
2486+
let binary_rows = rows.try_into_binary().expect("known-small rows");
2487+
let _binary_rows_shared_buffer = binary_rows.clone();
2488+
2489+
let parsed = converter.from_binary(binary_rows);
2490+
2491+
converter.convert_rows(parsed.iter()).unwrap();
2492+
}
2493+
24772494
#[test]
24782495
#[should_panic(expected = "Encountered non UTF-8 data")]
24792496
fn test_invalid_utf8() {

0 commit comments

Comments
 (0)