@@ -419,6 +419,11 @@ impl RleDecoder {
419419 & mut buffer[ values_read..values_read + num_values] ,
420420 self . bit_width as usize ,
421421 ) ;
422+ if num_values == 0 {
423+ // Handle writers which truncate the final block
424+ self . bit_packed_left = 0 ;
425+ continue ;
426+ }
422427 self . bit_packed_left -= num_values as u32 ;
423428 values_read += num_values;
424429 } else if !self . reload ( ) {
@@ -467,6 +472,11 @@ impl RleDecoder {
467472 & mut index_buf[ ..num_values] ,
468473 self . bit_width as usize ,
469474 ) ;
475+ if num_values == 0 {
476+ // Handle writers which truncate the final block
477+ self . bit_packed_left = 0 ;
478+ break ;
479+ }
470480 for i in 0 ..num_values {
471481 buffer[ values_read + i] . clone_from ( & dict[ index_buf[ i] as usize ] )
472482 }
@@ -743,6 +753,42 @@ mod tests {
743753 }
744754 }
745755
756+ #[ test]
757+ fn test_truncated_rle ( ) {
758+ // The final bit packed run within a page may not be a multiple of 8 values
759+ // Unfortunately the specification stores `(bit-packed-run-len) / 8`
760+ // This means we don't necessarily know how many values are present
761+ // and some writers may not add padding to compensate for this ambiguity
762+
763+ // Bit pack encode 20 values with a bit width of 8
764+ let mut data: Vec < u8 > = vec ! [
765+ ( 3 << 1 ) | 1 , // bit-packed run of 3 * 8
766+ ] ;
767+ data. extend ( std:: iter:: repeat ( 0xFF ) . take ( 20 ) ) ;
768+ let data = ByteBufferPtr :: new ( data) ;
769+
770+ let mut decoder = RleDecoder :: new ( 8 ) ;
771+ decoder. set_data ( data. clone ( ) ) ;
772+
773+ let mut output = vec ! [ 0_u16 ; 100 ] ;
774+ let read = decoder. get_batch ( & mut output) . unwrap ( ) ;
775+
776+ assert_eq ! ( read, 20 ) ;
777+ assert ! ( output. iter( ) . take( 20 ) . all( |x| * x == 255 ) ) ;
778+
779+ // Reset decoder
780+ decoder. set_data ( data) ;
781+
782+ let dict: Vec < u16 > = ( 0 ..256 ) . collect ( ) ;
783+ let mut output = vec ! [ 0_u16 ; 100 ] ;
784+ let read = decoder
785+ . get_batch_with_dict ( & dict, & mut output, 100 )
786+ . unwrap ( ) ;
787+
788+ assert_eq ! ( read, 20 ) ;
789+ assert ! ( output. iter( ) . take( 20 ) . all( |x| * x == 255 ) ) ;
790+ }
791+
746792 #[ test]
747793 fn test_rle_specific_roundtrip ( ) {
748794 let bit_width = 1 ;
0 commit comments