refactor(codegen): use SliceIterExt in string printer (#12296)

overlookmotel · overlookmotel · commit bb047a96361b · 2025-07-16T01:31:51.000Z
Use `SliceIterExt` trait in the string printer. This allows removing a bunch of const generics. The dynamic implementations in `SliceIterExt` are just as efficient (or maybe more).
diff --git a/crates/oxc_codegen/Cargo.toml b/crates/oxc_codegen/Cargo.toml
@@ -22,7 +22,7 @@ doctest = true
 [dependencies]
 oxc_allocator = { workspace = true }
 oxc_ast = { workspace = true }
-oxc_data_structures = { workspace = true, features = ["code_buffer", "pointer_ext", "stack"] }
+oxc_data_structures = { workspace = true, features = ["code_buffer", "pointer_ext", "slice_iter_ext", "stack"] }
 oxc_index = { workspace = true }
 oxc_semantic = { workspace = true }
 oxc_sourcemap = { workspace = true }
diff --git a/crates/oxc_codegen/src/str.rs b/crates/oxc_codegen/src/str.rs
@@ -1,7 +1,9 @@
 use std::slice;
 
 use oxc_ast::ast::StringLiteral;
-use oxc_data_structures::{assert_unchecked, pointer_ext::PointerExt};
+use oxc_data_structures::{
+    assert_unchecked, pointer_ext::PointerExt, slice_iter_ext::SliceIterExt,
+};
 use oxc_syntax::identifier::{LS, NBSP, PS};
 
 use crate::Codegen;
@@ -118,34 +120,22 @@ impl PrintStringState<'_> {
     ///   before calling other methods e.g. `flush`.
     #[inline]
     unsafe fn consume_byte_unchecked(&mut self) {
-        // `assert_unchecked!` produces less instructions than `self.bytes.next().unwrap_unchecked()`
-        // https://godbolt.org/z/TWzfK1eKj
-
         // SAFETY: Caller guarantees there is a byte to consume in `bytes` iterator,
-        // and that consuming it leaves the iterator on a UTF-8 char boundary.
-        unsafe { assert_unchecked!(!self.bytes.as_slice().is_empty()) };
-        self.bytes.next().unwrap();
+        // and that consuming it leaves the iterator on a UTF-8 char boundary
+        unsafe { self.bytes.next_unchecked() };
     }
 
-    /// Advance the `bytes` iterator by `N` bytes.
+    /// Advance the `bytes` iterator by `count` bytes.
     ///
     /// # SAFETY
     ///
-    /// * There must be at least `N` more bytes in the `bytes` iterator.
+    /// * There must be at least `count` more bytes in the `bytes` iterator.
     /// * After this call, `bytes` iterator must be left on a UTF-8 character boundary.
     #[inline]
-    unsafe fn consume_bytes_unchecked<const N: usize>(&mut self) {
-        // `assert_unchecked!` produces many less instructions than
-        // `for _i in 0..N { self.bytes.next().unwrap_unchecked(); }`.
-        // The `unwrap` in loop below is required for compact assembly.
-        // https://godbolt.org/z/TWzfK1eKj
-
-        // SAFETY: Caller guarantees there are `N` bytes to consume in `bytes` iterator,
+    unsafe fn consume_bytes_unchecked(&mut self, count: usize) {
+        // SAFETY: Caller guarantees there are `count` bytes to consume in `bytes` iterator,
         // and that consuming them leaves the iterator on a UTF-8 char boundary.
-        unsafe { assert_unchecked!(self.bytes.as_slice().len() >= N) };
-        for _i in 0..N {
-            self.bytes.next().unwrap();
-        }
+        unsafe { self.bytes.advance_unchecked(count) };
     }
 
     /// Set the start of next chunk to be current position of `bytes` iterator.
@@ -164,23 +154,22 @@ impl PrintStringState<'_> {
     #[inline]
     unsafe fn flush_and_consume_byte(&mut self, codegen: &mut Codegen) {
         // SAFETY: Caller guarantees `flush_and_consume_bytes`'s requirements are met
-        unsafe { self.flush_and_consume_bytes::<1>(codegen) };
+        unsafe { self.flush_and_consume_bytes(codegen, 1) };
     }
 
-    /// Flush current chunk to buffer, consume `N` bytes, and start next chunk after those bytes.
+    /// Flush current chunk to buffer, consume `count` bytes, and start next chunk after those bytes.
     ///
     /// # SAFETY
     ///
-    /// * There must be at least `N` more bytes in the `bytes` iterator.
+    /// * There must be at least `count` more bytes in the `bytes` iterator.
     /// * After this call, `bytes` iterator must be left on a UTF-8 character boundary.
     #[inline]
-    unsafe fn flush_and_consume_bytes<const N: usize>(&mut self, codegen: &mut Codegen) {
+    unsafe fn flush_and_consume_bytes(&mut self, codegen: &mut Codegen, count: usize) {
         self.flush(codegen);
 
-        debug_assert!(self.bytes.as_slice().len() >= N);
-        // SAFETY: Caller guarantees there are `N` bytes to consume in `bytes` iterator,
+        // SAFETY: Caller guarantees there are `count` bytes to consume in `bytes` iterator,
         // and that consuming them leaves the iterator on a UTF-8 char boundary
-        unsafe { self.consume_bytes_unchecked::<N>() };
+        unsafe { self.consume_bytes_unchecked(count) };
 
         self.start_chunk();
     }
@@ -601,7 +590,7 @@ unsafe fn print_less_than(codegen: &mut Codegen, state: &mut PrintStringState) {
         // SAFETY: The check above ensures there are 6 bytes left, after consuming 2 already.
         // `script` / `SCRIPT` is all ASCII bytes, so skipping them leaves `bytes` iterator
         // positioned on UTF-8 char boundary.
-        unsafe { state.consume_bytes_unchecked::<6>() };
+        unsafe { state.consume_bytes_unchecked(6) };
     }
 }
 
@@ -622,13 +611,13 @@ unsafe fn print_ls_or_ps(codegen: &mut Codegen, state: &mut PrintStringState) {
         _ => {
             // Some other character starting with 0xE2. Advance past it.
             // SAFETY: 0xE2 is always the start of a 3-byte Unicode character
-            unsafe { state.consume_bytes_unchecked::<3>() };
+            unsafe { state.consume_bytes_unchecked(3) };
             return;
         }
     };
 
     // SAFETY: 0xE2 is always the start of a 3-byte Unicode character
-    unsafe { state.flush_and_consume_bytes::<3>(codegen) };
+    unsafe { state.flush_and_consume_bytes(codegen, 3) };
     codegen.print_str(replacement);
 }
 
@@ -642,12 +631,12 @@ unsafe fn print_non_breaking_space(codegen: &mut Codegen, state: &mut PrintStrin
     if next == NBSP_LAST_BYTE {
         // Character is NBSP.
         // SAFETY: 0xC2 is always the start of a 2-byte Unicode character.
-        unsafe { state.flush_and_consume_bytes::<2>(codegen) };
+        unsafe { state.flush_and_consume_bytes(codegen, 2) };
         codegen.print_str("\\xA0");
     } else {
         // Some other character starting with 0xC2. Advance past it.
         // SAFETY: 0xC2 is always the start of a 2-byte Unicode character.
-        unsafe { state.consume_bytes_unchecked::<2>() };
+        unsafe { state.consume_bytes_unchecked(2) };
     }
 }
 
@@ -675,12 +664,12 @@ unsafe fn print_lossy_replacement(codegen: &mut Codegen, state: &mut PrintString
                 // Actual lossy replacement character.
                 // Flush up to and including the lossy replacement character, then skip the 4 hex bytes.
                 // SAFETY: 0xEF is always the start of a 3-byte Unicode character
-                unsafe { state.consume_bytes_unchecked::<3>() };
+                unsafe { state.consume_bytes_unchecked(3) };
                 state.flush(codegen);
                 // SAFETY: 0xEF is always the start of a 3-byte Unicode character.
                 // `bytes.as_slice()[3..7]` would have panicked if there weren't 4 more bytes after it.
                 // All those bytes are ASCII, so this leaves `bytes` on a UTF-8 char boundary.
-                unsafe { state.consume_bytes_unchecked::<4>() };
+                unsafe { state.consume_bytes_unchecked(4) };
                 // Start next chunk after the 4 hex bytes
                 state.start_chunk();
                 return;
@@ -695,7 +684,7 @@ unsafe fn print_lossy_replacement(codegen: &mut Codegen, state: &mut PrintString
             // SAFETY: `bytes.as_slice()[3..7]` would have panicked if there weren't at least 7 bytes
             // remaining. First 3 bytes are lossy replacement character, and we just checked that
             // next 4 bytes are ASCII, so this leaves `bytes` on a UTF-8 char boundary.
-            unsafe { state.consume_bytes_unchecked::<7>() };
+            unsafe { state.consume_bytes_unchecked(7) };
 
             // Start next chunk after the 4 hex bytes
             state.start_chunk();
@@ -711,7 +700,7 @@ unsafe fn print_lossy_replacement(codegen: &mut Codegen, state: &mut PrintString
     // `lone_surrogates` is `false` or character is some other character starting with 0xEF.
     // Advance past the character.
     // SAFETY: 0xEF is always the start of a 3-byte Unicode character
-    unsafe { state.consume_bytes_unchecked::<3>() };
+    unsafe { state.consume_bytes_unchecked(3) };
 }
 
 /// Call a closure while hinting to compiler that this branch is rarely taken.