Skip to content

Commit

Permalink
implement from_utf16_lossy
Browse files Browse the repository at this point in the history
* add proptests
* add doc tests
* add to fuzzing harness
  • Loading branch information
ParkMyCar committed Aug 21, 2022
1 parent 2dd09b0 commit f831227
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 0 deletions.
30 changes: 30 additions & 0 deletions compact_str/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,36 @@ impl CompactString {
Ok(ret)
}

/// Decode a UTF-16–encoded slice `v` into a `CompactString`, replacing invalid data with
/// the replacement character (`U+FFFD`), �.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// # use compact_str::CompactString;
/// // 𝄞mus<invalid>ic<invalid>
/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
/// 0x0073, 0xDD1E, 0x0069, 0x0063,
/// 0xD834];
///
/// assert_eq!(CompactString::from("𝄞mus\u{FFFD}ic\u{FFFD}"),
/// CompactString::from_utf16_lossy(v));
/// ```
#[inline]
pub fn from_utf16_lossy<B: AsRef<[u16]>>(buf: B) -> Self {
let buf = buf.as_ref();
let mut ret = CompactString::with_capacity(buf.len());
for c in std::char::decode_utf16(buf.iter().copied()) {
match c {
Ok(c) => ret.push(c),
Err(_) => ret.push_str("�"),
}
}
ret
}

/// Returns the length of the [`CompactString`] in `bytes`, not [`char`]s or graphemes.
///
/// When using `UTF-8` encoding (which all strings in Rust do) a single character will be 1 to 4
Expand Down
17 changes: 17 additions & 0 deletions compact_str/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,23 @@ fn proptest_from_utf16_random(#[strategy(rand_u16s())] buf: Vec<u16>) {
}
}

#[proptest]
#[cfg_attr(miri, ignore)]
fn proptest_from_utf16_lossy_roundtrips(#[strategy(rand_unicode())] control: String) {
let utf16_buf: Vec<u16> = control.encode_utf16().collect();
let compact = CompactString::from_utf16_lossy(&utf16_buf);

assert_eq!(compact, control);
}

#[proptest]
#[cfg_attr(miri, ignore)]
fn proptest_from_utf16_lossy_random(#[strategy(rand_u16s())] buf: Vec<u16>) {
let control = String::from_utf16_lossy(&buf);
let compact = CompactString::from_utf16_lossy(&buf);
assert_eq!(compact, control);
}

#[proptest]
#[cfg_attr(miri, ignore)]
fn proptest_remove(#[strategy(rand_unicode_with_range(1..80))] mut control: String, val: u8) {
Expand Down
9 changes: 9 additions & 0 deletions fuzz/src/creation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ pub enum Creation<'a> {
BytesUnchecked(&'a [u8]),
/// Create using [`CompactString::from_utf16`]
BytesUtf16(Vec<u16>),
/// Create using [`CompactString::from_utf16_lossy`]
BytesUtf16Lossy(Vec<u16>),
/// Create using [`CompactString::from_utf8_buf`]
Buf(&'a [u8]),
/// Create using [`CompactString::from_utf8_buf_unchecked`]
Expand Down Expand Up @@ -342,6 +344,13 @@ impl Creation<'_> {
_ => panic!("CompactString and String read UTF-16 differently?"),
}
}
BytesUtf16Lossy(data) => {
let compact = CompactString::from_utf16_lossy(&data);
let std_str = String::from_utf16_lossy(&data);

assert_eq!(compact, std_str);
Some((compact, std_str))
}
Buf(data) => {
let mut buffer = Cursor::new(data);

Expand Down

0 comments on commit f831227

Please sign in to comment.