Skip to content

Commit 99811f8

Browse files
rambleraptoralamb
andauthored
check bit width to avoid panic in DeltaBitPackDecoder (#8688)
# Which issue does this PR close? We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. - Part of #7806 # Rationale for this change The `DeltaBitPackDecoder` can panic if it encounters a bit width in the encoded data that is larger than the bit width of the data type being decoded. --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent e9a7fe5 commit 99811f8

File tree

1 file changed

+62
-0
lines changed

1 file changed

+62
-0
lines changed

parquet/src/encodings/decoding.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,19 @@ where
631631
self.next_block()
632632
}
633633
}
634+
635+
/// Verify the bit width is smaller then the integer type that it is trying to decode.
636+
#[inline]
637+
fn check_bit_width(&self, bit_width: usize) -> Result<()> {
638+
if bit_width > std::mem::size_of::<T::T>() * 8 {
639+
return Err(general_err!(
640+
"Invalid delta bit width {} which is larger than expected {} ",
641+
bit_width,
642+
std::mem::size_of::<T::T>() * 8
643+
));
644+
}
645+
Ok(())
646+
}
634647
}
635648

636649
impl<T: DataType> Decoder<T> for DeltaBitPackDecoder<T>
@@ -726,6 +739,7 @@ where
726739
}
727740

728741
let bit_width = self.mini_block_bit_widths[self.mini_block_idx] as usize;
742+
self.check_bit_width(bit_width)?;
729743
let batch_to_read = self.mini_block_remaining.min(to_read - read);
730744

731745
let batch_read = self
@@ -796,6 +810,7 @@ where
796810
}
797811

798812
let bit_width = self.mini_block_bit_widths[self.mini_block_idx] as usize;
813+
self.check_bit_width(bit_width)?;
799814
let mini_block_to_skip = self.mini_block_remaining.min(to_skip - skip);
800815
let mini_block_should_skip = mini_block_to_skip;
801816

@@ -2091,4 +2106,51 @@ mod tests {
20912106
v
20922107
}
20932108
}
2109+
2110+
#[test]
2111+
// Allow initializing a vector and pushing to it for clarity in this test
2112+
#[allow(clippy::vec_init_then_push)]
2113+
fn test_delta_bit_packed_invalid_bit_width() {
2114+
// Manually craft a buffer with an invalid bit width
2115+
let mut buffer = vec![];
2116+
// block_size = 128
2117+
buffer.push(128);
2118+
buffer.push(1);
2119+
// mini_blocks_per_block = 4
2120+
buffer.push(4);
2121+
// num_values = 32
2122+
buffer.push(32);
2123+
// first_value = 0
2124+
buffer.push(0);
2125+
// min_delta = 0
2126+
buffer.push(0);
2127+
// bit_widths, one for each of the 4 mini blocks
2128+
buffer.push(33); // Invalid bit width
2129+
buffer.push(0);
2130+
buffer.push(0);
2131+
buffer.push(0);
2132+
2133+
let corrupted_buffer = Bytes::from(buffer);
2134+
2135+
let mut decoder = DeltaBitPackDecoder::<Int32Type>::new();
2136+
decoder.set_data(corrupted_buffer.clone(), 32).unwrap();
2137+
let mut read_buffer = vec![0; 32];
2138+
let err = decoder.get(&mut read_buffer).unwrap_err();
2139+
assert!(
2140+
err.to_string()
2141+
.contains("Invalid delta bit width 33 which is larger than expected 32"),
2142+
"{}",
2143+
err
2144+
);
2145+
2146+
let mut decoder = DeltaBitPackDecoder::<Int32Type>::new();
2147+
decoder.set_data(corrupted_buffer, 32).unwrap();
2148+
let err = decoder.skip(32).unwrap_err();
2149+
assert!(
2150+
err.to_string()
2151+
.contains("Invalid delta bit width 33 which is larger than expected 32"),
2152+
"{}",
2153+
err
2154+
);
2155+
}
20942156
}

0 commit comments

Comments
 (0)