perf(codegen): optimize printing strings (#12040)

overlookmotel · overlookmotel · commit 5b99cad7c5a8 · 2025-07-03T07:24:28.000Z
Follow-on after #11782. That PR fixed escaping `</script` in strings, but it regressed some codegen benchmarks by 2%. Optimize the string search to win some of that perf back by: 1. Doing a preliminary search for `<` first, and only the more expensive search for `</script` once a `<` is found. 2. Searching longer strings for `<` in chunks of 16 bytes, using SIMD.
diff --git a/crates/oxc_codegen/src/lib.rs b/crates/oxc_codegen/src/lib.rs
@@ -2,8 +2,13 @@
 //!
 //! Code adapted from
 //! * [esbuild](https://github.com/evanw/esbuild/blob/v0.24.0/internal/js_printer/js_printer.go)
+
 #![warn(missing_docs)]
 
+use std::{cmp, slice};
+
+use oxc_data_structures::pointer_ext::PointerExt;
+
 mod binary_expr_visitor;
 mod comment;
 mod context;
@@ -30,7 +35,7 @@ use crate::{
     comment::CommentsMap,
     operator::Operator,
     sourcemap_builder::SourcemapBuilder,
-    str::{Quote, is_script_close_tag},
+    str::{Quote, cold_branch, is_script_close_tag},
 };
 pub use crate::{
     context::Context,
@@ -236,35 +241,113 @@ impl<'a> Codegen<'a> {
     /// Push str into the buffer, escaping `</script` to `<\/script`.
     #[inline]
     pub fn print_str_escaping_script_close_tag(&mut self, s: &str) {
-        let slice = s.as_bytes();
+        // `</script` will be very rare. So we try to make the search as quick as possible by:
+        // 1. Searching for `<` first, and only checking if followed by `/script` once `<` is found.
+        // 2. Searching longer strings for `<` in chunks of 16 bytes using SIMD, and only doing the
+        //    more expensive byte-by-byte search once a `<` is found.
+
+        let bytes = s.as_bytes();
         let mut consumed = 0;
-        let mut i = 0;
-
-        // Only check when remaining string has length larger than 8
-        while i + 8 <= slice.len() {
-            if is_script_close_tag(&slice[i..i + 8]) {
-                // Push str up to and including `<`. Skip `/`. Write `\/` instead.
-                // Skip over `script` - it'll be written in next chunk.
-                // SAFETY:
-                // The slice is guaranteed to be a valid UTF-8 string.
-                // `consumed` is always on a UTF-8 char boundary.
-                // `i` is on `<`, so `i + 1` is a UTF-8 char boundary.
-                unsafe {
-                    self.code.print_bytes_unchecked(&slice[consumed..=i]);
+
+        #[expect(clippy::unnecessary_safety_comment)]
+        // Search range of bytes for `</script`, byte by byte.
+        //
+        // Bytes between `ptr` and `last_ptr` (inclusive) are searched for `<`.
+        // If `<` is found, the following 7 bytes are checked to see if they're `/script`.
+        //
+        // SAFETY:
+        // * `ptr` and `last_ptr` must be within bounds of `bytes`.
+        // * `last_ptr` must be greater or equal to `ptr`.
+        // * `last_ptr` must be no later than 8 bytes before end of string.
+        //   i.e. safe to read 8 bytes at `end_ptr`.
+        let mut search_bytes = |mut ptr: *const u8, last_ptr| {
+            loop {
+                // SAFETY: `ptr` is always less than or equal to `last_ptr`.
+                // `last_ptr` is within bounds of `bytes`, so safe to read a byte at `ptr`.
+                let byte = unsafe { *ptr.as_ref().unwrap_unchecked() };
+                if byte == b'<' {
+                    // SAFETY: `ptr <= last_ptr`, and `last_ptr` points to no later than
+                    // 8 bytes before end of string, so safe to read 8 bytes from `ptr`
+                    let slice = unsafe { slice::from_raw_parts(ptr, 8) };
+                    if is_script_close_tag(slice) {
+                        // Push str up to and including `<`. Skip `/`. Write `\/` instead.
+                        // SAFETY:
+                        // `consumed` is initially 0, and only updated below to be after `/`,
+                        // so in bounds, and on a UTF-8 char boundary.
+                        // `index` is on `<`, so `index + 1` is in bounds and a UTF-8 char boundary.
+                        // `consumed` is always less than `index + 1` as it's set on a previous round.
+                        unsafe {
+                            let index = ptr.offset_from_usize(bytes.as_ptr());
+                            let before = bytes.get_unchecked(consumed..=index);
+                            self.code.print_bytes_unchecked(before);
+
+                            // Set `consumed` to after `/`
+                            consumed = index + 2;
+                        }
+                        self.print_str("\\/");
+                        // Note: We could advance `ptr` by 8 bytes here to skip over `</script`,
+                        // but this branch will be very rarely taken, so it's better to keep it simple
+                    }
                 }
-                self.code.print_str("\\/");
-                consumed = i + 2;
-                i += 8;
-            } else {
-                i += 1;
+
+                if ptr == last_ptr {
+                    break;
+                }
+                // SAFETY: `ptr` is less than `last_ptr`, which is in bounds, so safe to increment `ptr`
+                ptr = unsafe { ptr.add(1) };
+            }
+        };
+
+        // Search string in chunks of 16 bytes
+        let mut chunks = bytes.chunks_exact(16);
+        for (chunk_index, chunk) in chunks.by_ref().enumerate() {
+            #[expect(clippy::missing_panics_doc, reason = "infallible")]
+            let chunk: &[u8; 16] = chunk.try_into().unwrap();
+
+            // Compiler vectorizes this loop to a few SIMD ops
+            let mut contains_lt = false;
+            for &byte in chunk {
+                if byte == b'<' {
+                    contains_lt = true;
+                }
+            }
+
+            if contains_lt {
+                // Chunk contains at least one `<`.
+                // Find them, and check if they're the start of `</script`.
+                //
+                // SAFETY: `index` is byte index of start of chunk.
+                // We search bytes starting with first byte of chunk, and ending with last byte of chunk.
+                // i.e. `index` to `index + 15` (inclusive).
+                // If this chunk is towards the end of the string, reduce the range of bytes searched
+                // so the last byte searched has at least 7 further bytes after it.
+                // i.e. safe to read 8 bytes at `last_ptr`.
+                cold_branch(|| unsafe {
+                    let index = chunk_index * 16;
+                    let remaining_bytes = bytes.len() - index;
+                    let last_offset = cmp::min(remaining_bytes - 8, 15);
+                    let ptr = bytes.as_ptr().add(index);
+                    let last_ptr = ptr.add(last_offset);
+                    search_bytes(ptr, last_ptr);
+                });
             }
         }
 
-        // SAFETY:
-        // The slice guarantees to be a valid UTF-8 string.
-        // The consumed index is always pointed to a UTF-8 char boundary.
+        // Search last chunk byte-by-byte.
+        // Skip this if less than 8 bytes remaining, because less than 8 bytes can't contain `</script`.
+        let last_chunk = chunks.remainder();
+        if last_chunk.len() >= 8 {
+            let ptr = last_chunk.as_ptr();
+            // SAFETY: `last_chunk.len() >= 8`, so `- 8` cannot wrap.
+            // `last_chunk.as_ptr().add(last_chunk.len() - 8)` is in bounds of `last_chunk`.
+            let last_ptr = unsafe { ptr.add(last_chunk.len() - 8) };
+            search_bytes(ptr, last_ptr);
+        }
+
+        // SAFETY: `consumed` is either 0, or after `/`, so on a UTF-8 char boundary, and in bounds
         unsafe {
-            self.code.print_bytes_unchecked(&slice[consumed..]);
+            let remaining = bytes.get_unchecked(consumed..);
+            self.code.print_bytes_unchecked(remaining);
         }
     }