From c9ce45cb1e11408c2f0c7cdc89f22bf54406eb20 Mon Sep 17 00:00:00 2001 From: Karl Meakin Date: Sat, 16 Aug 2025 23:45:45 +0100 Subject: [PATCH] Optimize `char::encode_utf8` Save a few instructions in `encode_utf8_raw_unchecked` by performing manual CSE. --- library/core/src/char/methods.rs | 47 ++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 7ee0962721f5b..985e669c92d79 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -1872,28 +1872,33 @@ pub const unsafe fn encode_utf8_raw_unchecked(code: u32, dst: *mut u8) { // SAFETY: The caller must guarantee that the buffer pointed to by `dst` // is at least `len` bytes long. unsafe { - match len { - 1 => { - *dst = code as u8; - } - 2 => { - *dst = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; - *dst.add(1) = (code & 0x3F) as u8 | TAG_CONT; - } - 3 => { - *dst = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; - *dst.add(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; - *dst.add(2) = (code & 0x3F) as u8 | TAG_CONT; - } - 4 => { - *dst = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; - *dst.add(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; - *dst.add(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; - *dst.add(3) = (code & 0x3F) as u8 | TAG_CONT; - } - // SAFETY: `char` always takes between 1 and 4 bytes to encode in UTF-8. - _ => crate::hint::unreachable_unchecked(), + if len == 1 { + *dst = code as u8; + return; + } + + let last1 = (code >> 0 & 0x3F) as u8 | TAG_CONT; + let last2 = (code >> 6 & 0x3F) as u8 | TAG_CONT; + let last3 = (code >> 12 & 0x3F) as u8 | TAG_CONT; + let last4 = (code >> 18 & 0x3F) as u8 | TAG_FOUR_B; + + if len == 2 { + *dst = last2 | TAG_TWO_B; + *dst.add(1) = last1; + return; } + + if len == 3 { + *dst = last3 | TAG_THREE_B; + *dst.add(1) = last2; + *dst.add(2) = last1; + return; + } + + *dst = last4; + *dst.add(1) = last3; + *dst.add(2) = last2; + *dst.add(3) = last1; } }