Skip to content

Commit 66195d8

Browse files
committed
optimization continuation byte validation of strings containing multibyte chars
``` old, -O2, x86-64 test str::str_validate_emoji ... bench: 4,606 ns/iter (+/- 64) new, -O2, x86-64 test str::str_validate_emoji ... bench: 3,837 ns/iter (+/- 60) ```
1 parent b627866 commit 66195d8

File tree

1 file changed

+4
-6
lines changed

1 file changed

+4
-6
lines changed

library/core/src/str/validations.rs

+4-6
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
163163
// %xF4 %x80-8F 2( UTF8-tail )
164164
match w {
165165
2 => {
166-
if next!() & !CONT_MASK != TAG_CONT_U8 {
166+
if !utf8_is_cont_byte(next!()) {
167167
err!(Some(1))
168168
}
169169
}
@@ -175,7 +175,7 @@ pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
175175
| (0xEE..=0xEF, 0x80..=0xBF) => {}
176176
_ => err!(Some(1)),
177177
}
178-
if next!() & !CONT_MASK != TAG_CONT_U8 {
178+
if !utf8_is_cont_byte(next!()) {
179179
err!(Some(2))
180180
}
181181
}
@@ -184,10 +184,10 @@ pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
184184
(0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {}
185185
_ => err!(Some(1)),
186186
}
187-
if next!() & !CONT_MASK != TAG_CONT_U8 {
187+
if !utf8_is_cont_byte(next!()) {
188188
err!(Some(2))
189189
}
190-
if next!() & !CONT_MASK != TAG_CONT_U8 {
190+
if !utf8_is_cont_byte(next!()) {
191191
err!(Some(3))
192192
}
193193
}
@@ -258,8 +258,6 @@ pub fn utf8_char_width(b: u8) -> usize {
258258

259259
/// Mask of the value bits of a continuation byte.
260260
const CONT_MASK: u8 = 0b0011_1111;
261-
/// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte.
262-
const TAG_CONT_U8: u8 = 0b1000_0000;
263261

264262
// truncate `&str` to length at most equal to `max`
265263
// return `true` if it were truncated, and the new str.

0 commit comments

Comments
 (0)