Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 146 additions & 12 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use arrow_schema::{ArrowError, DataType};
use core::str;
use num::ToPrimitive;
use std::any::Any;
use std::cmp::Ordering;
use std::fmt::Debug;
use std::marker::PhantomData;
use std::sync::Arc;
Expand Down Expand Up @@ -539,25 +540,30 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
left_idx: usize,
right: &GenericByteViewArray<T>,
right_idx: usize,
) -> std::cmp::Ordering {
) -> Ordering {
let l_view = left.views().get_unchecked(left_idx);
let l_len = *l_view as u32;
let l_byte_view = ByteView::from(*l_view);

let r_view = right.views().get_unchecked(right_idx);
let r_len = *r_view as u32;
let r_byte_view = ByteView::from(*r_view);

if l_len <= MAX_INLINE_VIEW_LEN && r_len <= MAX_INLINE_VIEW_LEN {
let l_data = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len as usize) };
let r_data = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len as usize) };
return l_data.cmp(r_data);
let l_len = l_byte_view.length;
let r_len = r_byte_view.length;

if l_len <= 12 && r_len <= 12 {
return Self::inline_key_fast(*l_view).cmp(&Self::inline_key_fast(*r_view));
}

// one of the string is larger than 12 bytes,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can change this code below to use (l_view >> 32) as u32 as well (or ByteView if it generates the same code). It seems that is a bit faster for the prefix comparison:

lt scalar StringViewArray
                        time:   [34.533 ms 34.567 ms 34.601 ms]
                        change: [−11.030% −10.827% −10.620%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 4 outliers among 100 measurements (4.00%)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point @Dandandan , i will change to ByteView prefix.

// we then try to compare the inlined data first
let l_inlined_data = unsafe { GenericByteViewArray::<T>::inline_value(l_view, 4) };
let r_inlined_data = unsafe { GenericByteViewArray::<T>::inline_value(r_view, 4) };
if r_inlined_data != l_inlined_data {
return l_inlined_data.cmp(r_inlined_data);

// Note: In theory, ByteView is only used for string which is larger than 12 bytes,
// but we can still use it to get the inlined prefix for shorter strings.
// The prefix is always the first 4 bytes of the view, for both short and long strings.
let l_inlined_be = l_byte_view.prefix.swap_bytes();
let r_inlined_be = r_byte_view.prefix.swap_bytes();
if l_inlined_be != r_inlined_be {
return l_inlined_be.cmp(&r_inlined_be);
}

// unfortunately, we need to compare the full data
Expand All @@ -566,6 +572,63 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {

l_full_data.cmp(r_full_data)
}

/// Builds a 128-bit composite key for an inline value:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry -- I left comments for this function on the wrong PR: #7792 (comment)

Basically I think it would be very helpful to explain what properties the resulting u128 has

///
/// - High 96 bits: the inline data in big-endian byte order (for correct lexicographical sorting).
/// - Low 32 bits: the length in big-endian byte order, acting as a tiebreaker so shorter strings
/// (or those with fewer meaningful bytes) always numerically sort before longer ones.
///
/// This function extracts the length and the 12-byte inline string data from the raw
/// little-endian `u128` representation, converts them to big-endian ordering, and packs them
/// into a single `u128` value suitable for fast, branchless comparisons.
///
/// ### Why include length?
///
/// A pure 96-bit content comparison can’t distinguish between two values whose inline bytes
/// compare equal—either because one is a true prefix of the other or because zero-padding
/// hides extra bytes. By tucking the 32-bit length into the lower bits, a single `u128` compare
/// handles both content and length in one go.
///
/// Example: comparing "bar" (3 bytes) vs "bar\0" (4 bytes)
///
/// | String | Bytes 0–4 (length LE) | Bytes 4–16 (data + padding) |
/// |------------|-----------------------|---------------------------------|
/// | `"bar"` | `03 00 00 00` | `62 61 72` + 9 × `00` |
/// | `"bar\0"`| `04 00 00 00` | `62 61 72 00` + 8 × `00` |
///
/// Both inline parts become `62 61 72 00…00`, so they tie on content. The length field
/// then differentiates:
///
/// ```text
/// key("bar") = 0x0000000000000000000062617200000003
/// key("bar\0") = 0x0000000000000000000062617200000004
/// ⇒ key("bar") < key("bar\0")
/// ```
#[inline(always)]
pub fn inline_key_fast(raw: u128) -> u128 {
// Convert the raw u128 (little-endian) into bytes for manipulation
let raw_bytes = raw.to_le_bytes();

// Extract the length (first 4 bytes), convert to big-endian u32, and promote to u128
let len_le = &raw_bytes[0..4];
let len_be = u32::from_le_bytes(len_le.try_into().unwrap()).to_be() as u128;

// Extract the inline string bytes (next 12 bytes), place them into the lower 12 bytes of a 16-byte array,
// padding the upper 4 bytes with zero to form a little-endian u128 value
let mut inline_bytes = [0u8; 16];
inline_bytes[4..16].copy_from_slice(&raw_bytes[4..16]);

// Convert to big-endian to ensure correct lexical ordering
let inline_u128 = u128::from_le_bytes(inline_bytes).to_be();

// Shift right by 32 bits to discard the zero padding (upper 4 bytes),
// so that the inline string occupies the high 96 bits
let inline_part = inline_u128 >> 32;

// Combine the inline string part (high 96 bits) and length (low 32 bits) into the final key
(inline_part << 32) | len_be
}
}

impl<T: ByteViewType + ?Sized> Debug for GenericByteViewArray<T> {
Expand Down Expand Up @@ -874,7 +937,10 @@ impl From<Vec<Option<String>>> for StringViewArray {
#[cfg(test)]
mod tests {
use crate::builder::{BinaryViewBuilder, StringViewBuilder};
use crate::{Array, BinaryViewArray, StringViewArray};
use crate::types::BinaryViewType;
use crate::{
Array, BinaryViewArray, GenericBinaryArray, GenericByteViewArray, StringViewArray,
};
use arrow_buffer::{Buffer, ScalarBuffer};
use arrow_data::ByteView;

Expand Down Expand Up @@ -1090,4 +1156,72 @@ mod tests {
assert_eq!(array2, array2.clone());
assert_eq!(array1, array2);
}

/// Integration tests for `inline_key_fast` covering:
///
/// 1. Monotonic ordering across increasing lengths and lexical variations.
/// 2. Cross-check against `GenericBinaryArray` comparison to ensure semantic equivalence.
///
/// This also includes a specific test for the “bar” vs. “bar\0” case, demonstrating why
/// the length field is required even when all inline bytes fit in 12 bytes.
#[test]
fn test_inline_key_fast_various_lengths_and_lexical() {
/// Helper to create a raw u128 value representing an inline ByteView
/// - `length`: number of meaningful bytes (≤ 12)
/// - `data`: the actual inline data
fn make_raw_inline(length: u32, data: &[u8]) -> u128 {
assert!(length as usize <= 12, "Inline length must be ≤ 12");
assert!(data.len() == length as usize, "Data must match length");

let mut raw_bytes = [0u8; 16];
raw_bytes[0..4].copy_from_slice(&length.to_le_bytes()); // little-endian length
raw_bytes[4..(4 + data.len())].copy_from_slice(data); // inline data
u128::from_le_bytes(raw_bytes)
}

// Test inputs: include the specific "bar" vs "bar\0" case, plus length and lexical variations
let test_inputs: Vec<&[u8]> = vec![
b"a",
b"aa",
b"aaa",
b"aab",
b"abcd",
b"abcde",
b"abcdef",
b"abcdefg",
b"abcdefgh",
b"abcdefghi",
b"abcdefghij",
b"abcdefghijk",
b"abcdefghijkl", // 12 bytes, max inline
b"bar",
b"bar\0", // special case to test length tiebreaker
b"xyy",
b"xyz",
];

// Monotonic key order: content then length,and cross-check against GenericBinaryArray comparison
let array: GenericBinaryArray<i32> =
GenericBinaryArray::from(test_inputs.iter().map(|s| Some(*s)).collect::<Vec<_>>());

for i in 0..array.len() - 1 {
let v1 = array.value(i);
let v2 = array.value(i + 1);
// Ensure lexical ordering matches
assert!(v1 < v2, "Array compare failed: {v1:?} !< {v2:?}");
// Ensure fast key compare matches
let key1 = GenericByteViewArray::<BinaryViewType>::inline_key_fast(make_raw_inline(
v1.len() as u32,
v1,
));
let key2 = GenericByteViewArray::<BinaryViewType>::inline_key_fast(make_raw_inline(
v2.len() as u32,
v2,
));
assert!(
key1 < key2,
"Key compare failed: key({v1:?})=0x{key1:032x} !< key({v2:?})=0x{key2:032x}",
);
}
}
}
22 changes: 10 additions & 12 deletions arrow-ord/src/cmp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use arrow_buffer::bit_util::ceil;
use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer};
use arrow_schema::ArrowError;
use arrow_select::take::take;
use std::cmp::Ordering;
use std::ops::Not;

#[derive(Debug, Copy, Clone)]
Expand Down Expand Up @@ -571,7 +572,7 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a GenericByteViewArray<T> {
let r_view = unsafe { r.0.views().get_unchecked(r.1) };
if l.0.data_buffers().is_empty() && r.0.data_buffers().is_empty() {
// For eq case, we can directly compare the inlined bytes
return l_view.cmp(r_view).is_eq();
return l_view == r_view;
}

let l_len = *l_view as u32;
Expand All @@ -592,15 +593,15 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a GenericByteViewArray<T> {

#[inline(always)]
fn is_lt(l: Self::Item, r: Self::Item) -> bool {
// If both arrays use only the inline buffer
if l.0.data_buffers().is_empty() && r.0.data_buffers().is_empty() {
let l_view = unsafe { l.0.views().get_unchecked(l.1) };
let r_view = unsafe { r.0.views().get_unchecked(r.1) };
let l_len = *l_view as u32 as usize;
let r_len = *r_view as u32 as usize;
let l_bytes = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len) };
let r_bytes = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len) };
return l_bytes.cmp(r_bytes).is_lt();
return GenericByteViewArray::<T>::inline_key_fast(*l_view)
< GenericByteViewArray::<T>::inline_key_fast(*r_view);
}

// Fallback to the generic, unchecked comparison for non-inline cases
// # Safety
// The index is within bounds as it is checked in value()
unsafe { GenericByteViewArray::compare_unchecked(l.0, l.1, r.0, r.1).is_lt() }
Expand Down Expand Up @@ -642,17 +643,14 @@ pub fn compare_byte_view<T: ByteViewType>(
left_idx: usize,
right: &GenericByteViewArray<T>,
right_idx: usize,
) -> std::cmp::Ordering {
) -> Ordering {
assert!(left_idx < left.len());
assert!(right_idx < right.len());
if left.data_buffers().is_empty() && right.data_buffers().is_empty() {
let l_view = unsafe { left.views().get_unchecked(left_idx) };
let r_view = unsafe { right.views().get_unchecked(right_idx) };
let l_len = *l_view as u32 as usize;
let r_len = *r_view as u32 as usize;
let l_bytes = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len) };
let r_bytes = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len) };
return l_bytes.cmp(r_bytes);
return GenericByteViewArray::<T>::inline_key_fast(*l_view)
.cmp(&GenericByteViewArray::<T>::inline_key_fast(*r_view));
}
unsafe { GenericByteViewArray::compare_unchecked(left, left_idx, right, right_idx) }
}
Expand Down
Loading