Skip to content

Commit 2788762

Browse files
authored
fix JSON decoder error checking for UTF16 / surrogate parsing panic (#7721)
# Which issue does this PR close? - Closes #7712 . # Rationale for this change Shouldn't panic, especially in a fallible function. # What changes are included in this PR? Validate that the high and low surrogates are in the expected range, which guarantees that the subtractions won't overflow. # Are there any user-facing changes? No (well, things that used to panic now won't, but I don't think that counts)
1 parent e54b72b commit 2788762

File tree

1 file changed

+21
-3
lines changed

1 file changed

+21
-3
lines changed

arrow-json/src/reader/tape.rs

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -705,9 +705,16 @@ fn err(b: u8, ctx: &str) -> ArrowError {
705705

706706
/// Creates a character from an UTF-16 surrogate pair
707707
fn char_from_surrogate_pair(low: u16, high: u16) -> Result<char, ArrowError> {
708-
let n = (((high - 0xD800) as u32) << 10) | ((low - 0xDC00) as u32 + 0x1_0000);
709-
char::from_u32(n)
710-
.ok_or_else(|| ArrowError::JsonError(format!("Invalid UTF-16 surrogate pair {n}")))
708+
match (low, high) {
709+
(0xDC00..=0xDFFF, 0xD800..=0xDBFF) => {
710+
let n = (((high - 0xD800) as u32) << 10) | ((low - 0xDC00) as u32 + 0x1_0000);
711+
char::from_u32(n)
712+
.ok_or_else(|| ArrowError::JsonError(format!("Invalid UTF-16 surrogate pair {n}")))
713+
}
714+
_ => Err(ArrowError::JsonError(format!(
715+
"Invalid UTF-16 surrogate pair. High: {high:#02X}, Low: {low:#02X}"
716+
))),
717+
}
711718
}
712719

713720
/// Writes `c` as UTF-8 to `out`
@@ -951,4 +958,15 @@ mod tests {
951958
let err = decoder.finish().unwrap_err().to_string();
952959
assert_eq!(err, "Json error: Encountered truncated UTF-8 sequence");
953960
}
961+
962+
#[test]
963+
fn test_invalid_surrogates() {
964+
let mut decoder = TapeDecoder::new(16, 2);
965+
let res = decoder.decode(b"{\"test\": \"\\ud800\\ud801\"}");
966+
assert!(res.is_err());
967+
968+
let mut decoder = TapeDecoder::new(16, 2);
969+
let res = decoder.decode(b"{\"test\": \"\\udc00\\udc01\"}");
970+
assert!(res.is_err());
971+
}
954972
}

0 commit comments

Comments
 (0)