diff --git a/library/core/src/char/decode.rs b/library/core/src/char/decode.rs index f3fef85ef1de5..81d49ffe06e6d 100644 --- a/library/core/src/char/decode.rs +++ b/library/core/src/char/decode.rs @@ -121,10 +121,15 @@ impl> Iterator for DecodeUtf16 { fn size_hint(&self) -> (usize, Option) { let (low, high) = self.iter.size_hint(); - // `self.buf` will never contain the first part of a surrogate, - // so the presence of `buf == Some(...)` always means +1 - // on lower and upper bound. - let addition_from_buf = self.buf.is_some() as usize; + // If + // - `self.buf` contains a non surrogate (`u < 0xD800 || 0xDFFF < u`), or + // - `high == Some(0)` (and `self.buf` contains a leading surrogate since + // it can never contain a trailing surrogate) + // + // then buf contains an additional character or error that doesn't + // need a pair from `self.iter`, so it's +1 additional element. + let addition_from_buf = + self.buf.map_or(false, |u| u < 0xD800 || 0xDFFF < u || high == Some(0)) as usize; // `self.iter` could contain entirely valid surrogates (2 elements per // char), or entirely non-surrogates (1 element per char). diff --git a/library/core/tests/char.rs b/library/core/tests/char.rs index d776a08323c03..347ac04feb31c 100644 --- a/library/core/tests/char.rs +++ b/library/core/tests/char.rs @@ -319,7 +319,7 @@ fn test_decode_utf16_size_hint() { assert!( lower <= count && count <= upper.unwrap(), - "lower = {lower}, upper = {upper:?}" + "lower = {lower}, count = {count}, upper = {upper:?}" ); if let None = iter.next() { @@ -328,6 +328,7 @@ fn test_decode_utf16_size_hint() { } } + check(&[0xD800, 0xD800, 0xDC00]); check(&[0xD800, 0x41, 0x42]); check(&[0xD800, 0]); check(&[0xD834, 0x006d]);