Skip to content

Commit 15a8738

Browse files
authored
Perf: Optimize CursorValues compare performance for StringViewArray (1.4X faster for sort-tpch Q11) (#16509)
* Perf: Optimize CursorValues compare performance for StringViewArray * fix
1 parent 1bfd888 commit 15a8738

File tree

1 file changed

+25
-3
lines changed

1 file changed

+25
-3
lines changed

datafusion/physical-plan/src/sorts/cursor.rs

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -293,14 +293,19 @@ impl CursorValues for StringViewArray {
293293
self.views().len()
294294
}
295295

296+
#[inline(always)]
296297
fn eq(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> bool {
297298
// SAFETY: Both l_idx and r_idx are guaranteed to be within bounds,
298299
// and any null-checks are handled in the outer layers.
299300
// Fast path: Compare the lengths before full byte comparison.
300-
301301
let l_view = unsafe { l.views().get_unchecked(l_idx) };
302-
let l_len = *l_view as u32;
303302
let r_view = unsafe { r.views().get_unchecked(r_idx) };
303+
304+
if l.data_buffers().is_empty() && r.data_buffers().is_empty() {
305+
return l_view.eq(r_view);
306+
}
307+
308+
let l_len = *l_view as u32;
304309
let r_len = *r_view as u32;
305310
if l_len != r_len {
306311
return false;
@@ -309,13 +314,19 @@ impl CursorValues for StringViewArray {
309314
unsafe { GenericByteViewArray::compare_unchecked(l, l_idx, r, r_idx).is_eq() }
310315
}
311316

317+
#[inline(always)]
312318
fn eq_to_previous(cursor: &Self, idx: usize) -> bool {
313319
// SAFETY: The caller guarantees that idx > 0 and the indices are valid.
314320
// Already checked it in is_eq_to_prev_one function
315321
// Fast path: Compare the lengths of the current and previous views.
316322
let l_view = unsafe { cursor.views().get_unchecked(idx) };
317-
let l_len = *l_view as u32;
318323
let r_view = unsafe { cursor.views().get_unchecked(idx - 1) };
324+
if cursor.data_buffers().is_empty() {
325+
return l_view.eq(r_view);
326+
}
327+
328+
let l_len = *l_view as u32;
329+
319330
let r_len = *r_view as u32;
320331
if l_len != r_len {
321332
return false;
@@ -326,10 +337,21 @@ impl CursorValues for StringViewArray {
326337
}
327338
}
328339

340+
#[inline(always)]
329341
fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering {
330342
// SAFETY: Prior assertions guarantee that l_idx and r_idx are valid indices.
331343
// Null-checks are assumed to have been handled in the wrapper (e.g., ArrayValues).
332344
// And the bound is checked in is_finished, it is safe to call get_unchecked
345+
if l.data_buffers().is_empty() && r.data_buffers().is_empty() {
346+
let l_view = unsafe { l.views().get_unchecked(l_idx) };
347+
let r_view = unsafe { r.views().get_unchecked(r_idx) };
348+
let l_len = *l_view as u32;
349+
let r_len = *r_view as u32;
350+
let l_data = unsafe { StringViewArray::inline_value(l_view, l_len as usize) };
351+
let r_data = unsafe { StringViewArray::inline_value(r_view, r_len as usize) };
352+
return l_data.cmp(r_data);
353+
}
354+
333355
unsafe { GenericByteViewArray::compare_unchecked(l, l_idx, r, r_idx) }
334356
}
335357
}

0 commit comments

Comments
 (0)