From 5b20f3cd87d4a11429bca22c22df675577167300 Mon Sep 17 00:00:00 2001 From: Ekleog-NEAR <96595974+Ekleog-NEAR@users.noreply.github.com> Date: Fri, 26 May 2023 13:23:28 +0200 Subject: [PATCH] Make <&str as Arbitrary>::arbitrary_take_rest less likely to fail Before the changes, if any character in the remaining data were to be invalid utf8, it would fail to generate a string. After the change, it takes all it can up to the first invalid utf8 byte. --- src/lib.rs | 48 ++++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index dfaebd0..c665a4f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -834,29 +834,33 @@ where } } +fn arbitrary_str<'a>(u: &mut Unstructured<'a>, size: usize) -> Result<&'a str> { + match str::from_utf8(u.peek_bytes(size).unwrap()) { + Ok(s) => { + u.bytes(size).unwrap(); + Ok(s) + } + Err(e) => { + let i = e.valid_up_to(); + let valid = u.bytes(i).unwrap(); + let s = unsafe { + debug_assert!(str::from_utf8(valid).is_ok()); + str::from_utf8_unchecked(valid) + }; + Ok(s) + } + } +} + impl<'a> Arbitrary<'a> for &'a str { fn arbitrary(u: &mut Unstructured<'a>) -> Result { let size = u.arbitrary_len::()?; - match str::from_utf8(u.peek_bytes(size).unwrap()) { - Ok(s) => { - u.bytes(size).unwrap(); - Ok(s) - } - Err(e) => { - let i = e.valid_up_to(); - let valid = u.bytes(i).unwrap(); - let s = unsafe { - debug_assert!(str::from_utf8(valid).is_ok()); - str::from_utf8_unchecked(valid) - }; - Ok(s) - } - } + arbitrary_str(u, size) } - fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { - let bytes = u.take_rest(); - str::from_utf8(bytes).map_err(|_| Error::IncorrectFormat) + fn arbitrary_take_rest(mut u: Unstructured<'a>) -> Result { + let size = u.len(); + arbitrary_str(&mut u, size) } #[inline] @@ -1255,6 +1259,7 @@ mod test { #[test] fn arbitrary_take_rest() { + // Basic examples let x = [1, 2, 3, 4]; assert_eq!( checked_arbitrary_take_rest::<&[u8]>(Unstructured::new(&x)).unwrap(), @@ -1273,6 +1278,7 @@ mod test { "\x01\x02\x03\x04" ); + // Empty remainder assert_eq!( checked_arbitrary_take_rest::<&[u8]>(Unstructured::new(&[])).unwrap(), &[] @@ -1281,6 +1287,12 @@ mod test { checked_arbitrary_take_rest::>(Unstructured::new(&[])).unwrap(), &[] ); + + // Cannot consume all but can consume part of the input + assert_eq!( + checked_arbitrary_take_rest::(Unstructured::new(&[1, 0xFF, 2])).unwrap(), + "\x01" + ); } #[test]