From 05373503914c2728a4b81b07afe35286fe22d6e4 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Wed, 22 Oct 2025 14:47:04 -0700 Subject: [PATCH 1/3] Fix(parquet): check bit width to avoid panic in DeltaBitPackDecoder --- parquet/src/encodings/decoding.rs | 56 +++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs index 91b31dbdfcd2..09f65eb356ba 100644 --- a/parquet/src/encodings/decoding.rs +++ b/parquet/src/encodings/decoding.rs @@ -631,6 +631,19 @@ where self.next_block() } } + + /// Verify the bit width is smaller then the integer type that it is trying to decode. + #[inline] + fn check_bit_width(&self, bit_width: usize) -> Result<()> { + if bit_width > std::mem::size_of::() * 8 { + return Err(general_err!( + "Invalid delta bit width {} which is larger than expected {} ", + bit_width, + std::mem::size_of::() * 8 + )); + } + Ok(()) + } } impl Decoder for DeltaBitPackDecoder @@ -726,6 +739,7 @@ where } let bit_width = self.mini_block_bit_widths[self.mini_block_idx] as usize; + self.check_bit_width(bit_width)?; let batch_to_read = self.mini_block_remaining.min(to_read - read); let batch_read = self @@ -796,6 +810,7 @@ where } let bit_width = self.mini_block_bit_widths[self.mini_block_idx] as usize; + self.check_bit_width(bit_width)?; let mini_block_to_skip = self.mini_block_remaining.min(to_skip - skip); let mini_block_should_skip = mini_block_to_skip; @@ -2091,4 +2106,45 @@ mod tests { v } } + + #[test] + fn test_delta_bit_packed_invalid_bit_width() { + // Manually craft a buffer with an invalid bit width + let mut buffer = vec![]; + // block_size = 128 + buffer.push(128); + buffer.push(1); + // mini_blocks_per_block = 4 + buffer.push(4); + // num_values = 32 + buffer.push(32); + // first_value = 0 + buffer.push(0); + // min_delta = 0 + buffer.push(0); + // bit_widths, one for each of the 4 mini blocks + buffer.push(33); // Invalid bit width + buffer.push(0); + buffer.push(0); + buffer.push(0); + + let corrupted_buffer = Bytes::from(buffer); + + let mut decoder = DeltaBitPackDecoder::::new(); + decoder.set_data(corrupted_buffer.clone(), 32).unwrap(); + let mut read_buffer = vec![0; 32]; + let err = decoder.get(&mut read_buffer).unwrap_err(); + assert!( + err.to_string().contains("Invalid delta bit width 33 which is larger than expected 32"), + "{}", err + ); + + let mut decoder = DeltaBitPackDecoder::::new(); + decoder.set_data(corrupted_buffer, 32).unwrap(); + let err = decoder.skip(32).unwrap_err(); + assert!( + err.to_string().contains("Invalid delta bit width 33 which is larger than expected 32"), + "{}", err + ); + } } From 6914219a972ff8007eca65eff88c26c5ca58ddc0 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 24 Oct 2025 11:31:44 -0400 Subject: [PATCH 2/3] Fix clippy --- parquet/src/encodings/decoding.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs index 09f65eb356ba..ab732ce1991f 100644 --- a/parquet/src/encodings/decoding.rs +++ b/parquet/src/encodings/decoding.rs @@ -2108,6 +2108,8 @@ mod tests { } #[test] + // Allow initializing a vector and pushing to it for clarity in this test + #[allow(clippy::vec_init_then_push)] fn test_delta_bit_packed_invalid_bit_width() { // Manually craft a buffer with an invalid bit width let mut buffer = vec![]; From 9ad5d4782e0a84be3e7292697a3efe0a5d79ca21 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 24 Oct 2025 11:32:58 -0400 Subject: [PATCH 3/3] fmt --- parquet/src/encodings/decoding.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs index ab732ce1991f..de8738cf09f9 100644 --- a/parquet/src/encodings/decoding.rs +++ b/parquet/src/encodings/decoding.rs @@ -2137,16 +2137,20 @@ mod tests { let mut read_buffer = vec![0; 32]; let err = decoder.get(&mut read_buffer).unwrap_err(); assert!( - err.to_string().contains("Invalid delta bit width 33 which is larger than expected 32"), - "{}", err + err.to_string() + .contains("Invalid delta bit width 33 which is larger than expected 32"), + "{}", + err ); let mut decoder = DeltaBitPackDecoder::::new(); decoder.set_data(corrupted_buffer, 32).unwrap(); let err = decoder.skip(32).unwrap_err(); assert!( - err.to_string().contains("Invalid delta bit width 33 which is larger than expected 32"), - "{}", err + err.to_string() + .contains("Invalid delta bit width 33 which is larger than expected 32"), + "{}", + err ); } }