@@ -38,6 +38,9 @@ pub(crate) fn run_end_encoded_cast<K: RunEndIndexType>(
3838 to_type : & DataType ,
3939 cast_options : & CastOptions ,
4040) -> Result < ArrayRef , ArrowError > {
41+ // Fast-path dispatch: most physical types can reuse `runs_for_primitive`, the remainder fall
42+ // through to specialized implementations below.
43+ // Route to the most specialized helper for the physical layout of `array`.
4144 match array. data_type ( ) {
4245 DataType :: RunEndEncoded ( _, _) => {
4346 let run_array = array
@@ -269,6 +272,7 @@ fn runs_for_boolean(array: &BooleanArray) -> (Vec<usize>, Vec<usize>) {
269272 let mut current_value = if current_valid { array. value ( 0 ) } else { false } ;
270273
271274 for idx in 1 ..len {
275+ // Treat a change in validity the same as a change in value so null boundaries are recorded.
272276 let valid = array. is_valid ( idx) ;
273277 let mut boundary = false ;
274278 if current_valid && valid {
@@ -309,6 +313,7 @@ fn runs_for_primitive<T: ArrowPrimitiveType>(
309313 let mut current = unsafe { * values. get_unchecked ( 0 ) } ;
310314 let mut idx = 1 ;
311315 while idx < len {
316+ // Attempt to advance in 16-byte chunks before falling back to scalar comparison.
312317 let boundary = scan_run_end :: < T > ( values, current, idx) ;
313318 if boundary == len {
314319 break ;
@@ -394,6 +399,7 @@ fn runs_for_binary_like<T: Copy>(
394399 for idx in 1 ..len {
395400 let start = to_usize ( offsets[ idx] ) ;
396401 let end = to_usize ( offsets[ idx + 1 ] ) ;
402+ // Any difference in byte length or payload means a new run.
397403 if ( end - start) != ( current_end - current_start)
398404 || values[ start..end] != values[ current_start..current_end]
399405 {
@@ -413,6 +419,7 @@ fn runs_for_binary_like<T: Copy>(
413419 let start = to_usize ( offsets[ idx] ) ;
414420 let end = to_usize ( offsets[ idx + 1 ] ) ;
415421 let ( current_start, current_end) = current_range;
422+ // Keep reusing the current byte-range as long as both validity and payload match.
416423 if ( end - start) != ( current_end - current_start)
417424 || values[ start..end] != values[ current_start..current_end]
418425 {
@@ -482,6 +489,7 @@ fn runs_for_fixed_size_binary(array: &FixedSizeBinaryArray) -> (Vec<usize>, Vec<
482489 for idx in 1 ..len {
483490 let start = idx * width;
484491 let slice = & values[ start..start + width] ;
492+ // Width is constant, so a simple byte slice comparison suffices.
485493 if slice != current_slice {
486494 ensure_capacity ( & mut run_boundaries, len) ;
487495 run_boundaries. push ( idx) ;
@@ -538,6 +546,7 @@ fn runs_generic(array: &dyn Array) -> (Vec<usize>, Vec<usize>) {
538546 let mut current_data = array. slice ( 0 , 1 ) . to_data ( ) ;
539547 for idx in 1 ..len {
540548 let next_data = array. slice ( idx, 1 ) . to_data ( ) ;
549+ // Fallback for exotic types: compare `ArrayData` views directly.
541550 if current_data != next_data {
542551 ensure_capacity ( & mut run_boundaries, len) ;
543552 run_boundaries. push ( idx) ;
@@ -566,6 +575,7 @@ fn ensure_capacity(vec: &mut Vec<usize>, total_len: usize) {
566575
567576fn finalize_runs ( mut run_boundaries : Vec < usize > , len : usize ) -> ( Vec < usize > , Vec < usize > ) {
568577 let mut values_indexes = Vec :: with_capacity ( run_boundaries. len ( ) + 1 ) ;
578+ // Values array always pulls the first element of each run; index 0 is by definition a run start.
569579 values_indexes. push ( 0 ) ;
570580 values_indexes. extend_from_slice ( & run_boundaries) ;
571581 run_boundaries. push ( len) ;
@@ -579,6 +589,7 @@ fn scan_run_end<T: ArrowPrimitiveType>(
579589 start : usize ,
580590) -> usize {
581591 let element_size = std:: mem:: size_of :: < T :: Native > ( ) ;
592+ // Only attempt the chunked search when the element size divides evenly into 16 bytes.
582593 if element_size <= 8 && 16 % element_size == 0 {
583594 let elements_per_chunk = 16 / element_size;
584595 return scan_run_end_chunk :: < T > ( values, current, start, elements_per_chunk, element_size) ;
@@ -601,6 +612,9 @@ fn scan_run_end_chunk<T: ArrowPrimitiveType>(
601612 }
602613
603614 let mut pattern_bytes = [ 0u8 ; 16 ] ;
615+ // Safety: `T::Native` is guaranteed by `ArrowPrimitiveType` to have a plain-old-data layout,
616+ // allowing the value to be viewed as raw bytes. We copy exactly `element_size` bytes, so the
617+ // slice built from `current` stays within bounds.
604618 unsafe {
605619 let value_bytes =
606620 std:: slice:: from_raw_parts ( & current as * const T :: Native as * const u8 , element_size) ;
@@ -611,6 +625,7 @@ fn scan_run_end_chunk<T: ArrowPrimitiveType>(
611625 let pattern = u128:: from_ne_bytes ( pattern_bytes) ;
612626
613627 while idx + elements_per_chunk <= len {
628+ // SAFETY: pointer arithmetic stays within the backing slice; unaligned reads are allowed.
614629 let chunk = unsafe { ( values. as_ptr ( ) . add ( idx) as * const u128 ) . read_unaligned ( ) } ;
615630 if chunk != pattern {
616631 for offset in 0 ..elements_per_chunk {
@@ -619,7 +634,6 @@ fn scan_run_end_chunk<T: ArrowPrimitiveType>(
619634 return idx + offset;
620635 }
621636 }
622- unreachable ! ( "chunk mismatch without locating differing element" ) ;
623637 }
624638 idx += elements_per_chunk;
625639 }
0 commit comments