Skip to content

Commit 0185da6

Browse files
Perf: fast CursorValues compare for StringViewArray using inline_key_… (#16630)
* Perf: fast CursorValues compare for StringViewArray using inline_key_fast * fix * polish * polish * add test --------- Co-authored-by: Daniël Heres <danielheres@gmail.com>
1 parent 8c5d06d commit 0185da6

File tree

2 files changed

+129
-13
lines changed

2 files changed

+129
-13
lines changed

datafusion/physical-plan/src/sorts/cursor.rs

Lines changed: 127 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,64 @@ impl CursorArray for StringViewArray {
289289
}
290290
}
291291

292+
/// Todo use arrow-rs side api after: <https://github.com/apache/arrow-rs/pull/7748> released
293+
/// Builds a 128-bit composite key for an inline value:
294+
///
295+
/// - High 96 bits: the inline data in big-endian byte order (for correct lexicographical sorting).
296+
/// - Low 32 bits: the length in big-endian byte order, acting as a tiebreaker so shorter strings
297+
/// (or those with fewer meaningful bytes) always numerically sort before longer ones.
298+
///
299+
/// This function extracts the length and the 12-byte inline string data from the raw
300+
/// little-endian `u128` representation, converts them to big-endian ordering, and packs them
301+
/// into a single `u128` value suitable for fast, branchless comparisons.
302+
///
303+
/// ### Why include length?
304+
///
305+
/// A pure 96-bit content comparison can’t distinguish between two values whose inline bytes
306+
/// compare equal—either because one is a true prefix of the other or because zero-padding
307+
/// hides extra bytes. By tucking the 32-bit length into the lower bits, a single `u128` compare
308+
/// handles both content and length in one go.
309+
///
310+
/// Example: comparing "bar" (3 bytes) vs "bar\0" (4 bytes)
311+
///
312+
/// | String | Bytes 0–4 (length LE) | Bytes 4–16 (data + padding) |
313+
/// |------------|-----------------------|---------------------------------|
314+
/// | `"bar"` | `03 00 00 00` | `62 61 72` + 9 × `00` |
315+
/// | `"bar\0"`| `04 00 00 00` | `62 61 72 00` + 8 × `00` |
316+
///
317+
/// Both inline parts become `62 61 72 00…00`, so they tie on content. The length field
318+
/// then differentiates:
319+
///
320+
/// ```text
321+
/// key("bar") = 0x0000000000000000000062617200000003
322+
/// key("bar\0") = 0x0000000000000000000062617200000004
323+
/// ⇒ key("bar") < key("bar\0")
324+
/// ```
325+
#[inline(always)]
326+
pub fn inline_key_fast(raw: u128) -> u128 {
327+
// Convert the raw u128 (little-endian) into bytes for manipulation
328+
let raw_bytes = raw.to_le_bytes();
329+
330+
// Extract the length (first 4 bytes), convert to big-endian u32, and promote to u128
331+
let len_le = &raw_bytes[0..4];
332+
let len_be = u32::from_le_bytes(len_le.try_into().unwrap()).to_be() as u128;
333+
334+
// Extract the inline string bytes (next 12 bytes), place them into the lower 12 bytes of a 16-byte array,
335+
// padding the upper 4 bytes with zero to form a little-endian u128 value
336+
let mut inline_bytes = [0u8; 16];
337+
inline_bytes[4..16].copy_from_slice(&raw_bytes[4..16]);
338+
339+
// Convert to big-endian to ensure correct lexical ordering
340+
let inline_u128 = u128::from_le_bytes(inline_bytes).to_be();
341+
342+
// Shift right by 32 bits to discard the zero padding (upper 4 bytes),
343+
// so that the inline string occupies the high 96 bits
344+
let inline_part = inline_u128 >> 32;
345+
346+
// Combine the inline string part (high 96 bits) and length (low 32 bits) into the final key
347+
(inline_part << 32) | len_be
348+
}
349+
292350
impl CursorValues for StringViewArray {
293351
fn len(&self) -> usize {
294352
self.views().len()
@@ -303,7 +361,7 @@ impl CursorValues for StringViewArray {
303361
let r_view = unsafe { r.views().get_unchecked(r_idx) };
304362

305363
if l.data_buffers().is_empty() && r.data_buffers().is_empty() {
306-
return l_view.eq(r_view);
364+
return l_view == r_view;
307365
}
308366

309367
let l_len = *l_view as u32;
@@ -323,12 +381,12 @@ impl CursorValues for StringViewArray {
323381
let l_view = unsafe { cursor.views().get_unchecked(idx) };
324382
let r_view = unsafe { cursor.views().get_unchecked(idx - 1) };
325383
if cursor.data_buffers().is_empty() {
326-
return l_view.eq(r_view);
384+
return l_view == r_view;
327385
}
328386

329387
let l_len = *l_view as u32;
330-
331388
let r_len = *r_view as u32;
389+
332390
if l_len != r_len {
333391
return false;
334392
}
@@ -346,11 +404,7 @@ impl CursorValues for StringViewArray {
346404
if l.data_buffers().is_empty() && r.data_buffers().is_empty() {
347405
let l_view = unsafe { l.views().get_unchecked(l_idx) };
348406
let r_view = unsafe { r.views().get_unchecked(r_idx) };
349-
let l_len = *l_view as u32;
350-
let r_len = *r_view as u32;
351-
let l_data = unsafe { StringViewArray::inline_value(l_view, l_len as usize) };
352-
let r_data = unsafe { StringViewArray::inline_value(r_view, r_len as usize) };
353-
return l_data.cmp(r_data);
407+
return inline_key_fast(*l_view).cmp(&inline_key_fast(*r_view));
354408
}
355409

356410
unsafe { GenericByteViewArray::compare_unchecked(l, l_idx, r, r_idx) }
@@ -445,11 +499,11 @@ impl<T: CursorValues> CursorValues for ArrayValues<T> {
445499

446500
#[cfg(test)]
447501
mod tests {
448-
use std::sync::Arc;
449-
502+
use arrow::array::GenericBinaryArray;
450503
use datafusion_execution::memory_pool::{
451504
GreedyMemoryPool, MemoryConsumer, MemoryPool,
452505
};
506+
use std::sync::Arc;
453507

454508
use super::*;
455509

@@ -610,4 +664,67 @@ mod tests {
610664
b.advance();
611665
assert_eq!(a.cmp(&b), Ordering::Less);
612666
}
667+
668+
/// Integration tests for `inline_key_fast` covering:
669+
///
670+
/// 1. Monotonic ordering across increasing lengths and lexical variations.
671+
/// 2. Cross-check against `GenericBinaryArray` comparison to ensure semantic equivalence.
672+
///
673+
/// This also includes a specific test for the “bar” vs. “bar\0” case, demonstrating why
674+
/// the length field is required even when all inline bytes fit in 12 bytes.
675+
#[test]
676+
fn test_inline_key_fast_various_lengths_and_lexical() {
677+
/// Helper to create a raw u128 value representing an inline ByteView
678+
/// - `length`: number of meaningful bytes (≤ 12)
679+
/// - `data`: the actual inline data
680+
fn make_raw_inline(length: u32, data: &[u8]) -> u128 {
681+
assert!(length as usize <= 12, "Inline length must be ≤ 12");
682+
assert!(data.len() == length as usize, "Data must match length");
683+
684+
let mut raw_bytes = [0u8; 16];
685+
raw_bytes[0..4].copy_from_slice(&length.to_le_bytes()); // little-endian length
686+
raw_bytes[4..(4 + data.len())].copy_from_slice(data); // inline data
687+
u128::from_le_bytes(raw_bytes)
688+
}
689+
690+
// Test inputs: include the specific "bar" vs "bar\0" case, plus length and lexical variations
691+
let test_inputs: Vec<&[u8]> = vec![
692+
b"a",
693+
b"aa",
694+
b"aaa",
695+
b"aab",
696+
b"abcd",
697+
b"abcde",
698+
b"abcdef",
699+
b"abcdefg",
700+
b"abcdefgh",
701+
b"abcdefghi",
702+
b"abcdefghij",
703+
b"abcdefghijk",
704+
b"abcdefghijkl", // 12 bytes, max inline
705+
b"bar",
706+
b"bar\0", // special case to test length tiebreaker
707+
b"xyy",
708+
b"xyz",
709+
];
710+
711+
// Monotonic key order: content then length,and cross-check against GenericBinaryArray comparison
712+
let array: GenericBinaryArray<i32> = GenericBinaryArray::from(
713+
test_inputs.iter().map(|s| Some(*s)).collect::<Vec<_>>(),
714+
);
715+
716+
for i in 0..array.len() - 1 {
717+
let v1 = array.value(i);
718+
let v2 = array.value(i + 1);
719+
// Ensure lexical ordering matches
720+
assert!(v1 < v2, "Array compare failed: {v1:?} !< {v2:?}");
721+
// Ensure fast key compare matches
722+
let key1 = inline_key_fast(make_raw_inline(v1.len() as u32, v1));
723+
let key2 = inline_key_fast(make_raw_inline(v2.len() as u32, v2));
724+
assert!(
725+
key1 < key2,
726+
"Key compare failed: key({v1:?})=0x{key1:032x} !< key({v2:?})=0x{key2:032x}",
727+
);
728+
}
729+
}
613730
}

datafusion/physical-plan/src/sorts/merge.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -493,13 +493,12 @@ impl<C: CursorValues> SortPreservingMergeStream<C> {
493493
if self.enable_round_robin_tie_breaker && cmp_node == 1 {
494494
match (&self.cursors[winner], &self.cursors[challenger]) {
495495
(Some(ac), Some(bc)) => {
496-
let ord = ac.cmp(bc);
497-
if ord.is_eq() {
496+
if ac == bc {
498497
self.handle_tie(cmp_node, &mut winner, challenger);
499498
} else {
500499
// Ends of tie breaker
501500
self.round_robin_tie_breaker_mode = false;
502-
if ord.is_gt() {
501+
if ac > bc {
503502
self.update_winner(cmp_node, &mut winner, challenger);
504503
}
505504
}

0 commit comments

Comments
 (0)