Skip to content

Commit 5b99cad

Browse files
committed
perf(codegen): optimize printing strings (#12040)
Follow-on after #11782. That PR fixed escaping `</script` in strings, but it regressed some codegen benchmarks by 2%. Optimize the string search to win some of that perf back by: 1. Doing a preliminary search for `<` first, and only the more expensive search for `</script` once a `<` is found. 2. Searching longer strings for `<` in chunks of 16 bytes, using SIMD.
1 parent 6d865af commit 5b99cad

File tree

1 file changed

+107
-24
lines changed

1 file changed

+107
-24
lines changed

crates/oxc_codegen/src/lib.rs

Lines changed: 107 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,13 @@
22
//!
33
//! Code adapted from
44
//! * [esbuild](https://github.com/evanw/esbuild/blob/v0.24.0/internal/js_printer/js_printer.go)
5+
56
#![warn(missing_docs)]
67

8+
use std::{cmp, slice};
9+
10+
use oxc_data_structures::pointer_ext::PointerExt;
11+
712
mod binary_expr_visitor;
813
mod comment;
914
mod context;
@@ -30,7 +35,7 @@ use crate::{
3035
comment::CommentsMap,
3136
operator::Operator,
3237
sourcemap_builder::SourcemapBuilder,
33-
str::{Quote, is_script_close_tag},
38+
str::{Quote, cold_branch, is_script_close_tag},
3439
};
3540
pub use crate::{
3641
context::Context,
@@ -236,35 +241,113 @@ impl<'a> Codegen<'a> {
236241
/// Push str into the buffer, escaping `</script` to `<\/script`.
237242
#[inline]
238243
pub fn print_str_escaping_script_close_tag(&mut self, s: &str) {
239-
let slice = s.as_bytes();
244+
// `</script` will be very rare. So we try to make the search as quick as possible by:
245+
// 1. Searching for `<` first, and only checking if followed by `/script` once `<` is found.
246+
// 2. Searching longer strings for `<` in chunks of 16 bytes using SIMD, and only doing the
247+
// more expensive byte-by-byte search once a `<` is found.
248+
249+
let bytes = s.as_bytes();
240250
let mut consumed = 0;
241-
let mut i = 0;
242-
243-
// Only check when remaining string has length larger than 8
244-
while i + 8 <= slice.len() {
245-
if is_script_close_tag(&slice[i..i + 8]) {
246-
// Push str up to and including `<`. Skip `/`. Write `\/` instead.
247-
// Skip over `script` - it'll be written in next chunk.
248-
// SAFETY:
249-
// The slice is guaranteed to be a valid UTF-8 string.
250-
// `consumed` is always on a UTF-8 char boundary.
251-
// `i` is on `<`, so `i + 1` is a UTF-8 char boundary.
252-
unsafe {
253-
self.code.print_bytes_unchecked(&slice[consumed..=i]);
251+
252+
#[expect(clippy::unnecessary_safety_comment)]
253+
// Search range of bytes for `</script`, byte by byte.
254+
//
255+
// Bytes between `ptr` and `last_ptr` (inclusive) are searched for `<`.
256+
// If `<` is found, the following 7 bytes are checked to see if they're `/script`.
257+
//
258+
// SAFETY:
259+
// * `ptr` and `last_ptr` must be within bounds of `bytes`.
260+
// * `last_ptr` must be greater or equal to `ptr`.
261+
// * `last_ptr` must be no later than 8 bytes before end of string.
262+
// i.e. safe to read 8 bytes at `end_ptr`.
263+
let mut search_bytes = |mut ptr: *const u8, last_ptr| {
264+
loop {
265+
// SAFETY: `ptr` is always less than or equal to `last_ptr`.
266+
// `last_ptr` is within bounds of `bytes`, so safe to read a byte at `ptr`.
267+
let byte = unsafe { *ptr.as_ref().unwrap_unchecked() };
268+
if byte == b'<' {
269+
// SAFETY: `ptr <= last_ptr`, and `last_ptr` points to no later than
270+
// 8 bytes before end of string, so safe to read 8 bytes from `ptr`
271+
let slice = unsafe { slice::from_raw_parts(ptr, 8) };
272+
if is_script_close_tag(slice) {
273+
// Push str up to and including `<`. Skip `/`. Write `\/` instead.
274+
// SAFETY:
275+
// `consumed` is initially 0, and only updated below to be after `/`,
276+
// so in bounds, and on a UTF-8 char boundary.
277+
// `index` is on `<`, so `index + 1` is in bounds and a UTF-8 char boundary.
278+
// `consumed` is always less than `index + 1` as it's set on a previous round.
279+
unsafe {
280+
let index = ptr.offset_from_usize(bytes.as_ptr());
281+
let before = bytes.get_unchecked(consumed..=index);
282+
self.code.print_bytes_unchecked(before);
283+
284+
// Set `consumed` to after `/`
285+
consumed = index + 2;
286+
}
287+
self.print_str("\\/");
288+
// Note: We could advance `ptr` by 8 bytes here to skip over `</script`,
289+
// but this branch will be very rarely taken, so it's better to keep it simple
290+
}
254291
}
255-
self.code.print_str("\\/");
256-
consumed = i + 2;
257-
i += 8;
258-
} else {
259-
i += 1;
292+
293+
if ptr == last_ptr {
294+
break;
295+
}
296+
// SAFETY: `ptr` is less than `last_ptr`, which is in bounds, so safe to increment `ptr`
297+
ptr = unsafe { ptr.add(1) };
298+
}
299+
};
300+
301+
// Search string in chunks of 16 bytes
302+
let mut chunks = bytes.chunks_exact(16);
303+
for (chunk_index, chunk) in chunks.by_ref().enumerate() {
304+
#[expect(clippy::missing_panics_doc, reason = "infallible")]
305+
let chunk: &[u8; 16] = chunk.try_into().unwrap();
306+
307+
// Compiler vectorizes this loop to a few SIMD ops
308+
let mut contains_lt = false;
309+
for &byte in chunk {
310+
if byte == b'<' {
311+
contains_lt = true;
312+
}
313+
}
314+
315+
if contains_lt {
316+
// Chunk contains at least one `<`.
317+
// Find them, and check if they're the start of `</script`.
318+
//
319+
// SAFETY: `index` is byte index of start of chunk.
320+
// We search bytes starting with first byte of chunk, and ending with last byte of chunk.
321+
// i.e. `index` to `index + 15` (inclusive).
322+
// If this chunk is towards the end of the string, reduce the range of bytes searched
323+
// so the last byte searched has at least 7 further bytes after it.
324+
// i.e. safe to read 8 bytes at `last_ptr`.
325+
cold_branch(|| unsafe {
326+
let index = chunk_index * 16;
327+
let remaining_bytes = bytes.len() - index;
328+
let last_offset = cmp::min(remaining_bytes - 8, 15);
329+
let ptr = bytes.as_ptr().add(index);
330+
let last_ptr = ptr.add(last_offset);
331+
search_bytes(ptr, last_ptr);
332+
});
260333
}
261334
}
262335

263-
// SAFETY:
264-
// The slice guarantees to be a valid UTF-8 string.
265-
// The consumed index is always pointed to a UTF-8 char boundary.
336+
// Search last chunk byte-by-byte.
337+
// Skip this if less than 8 bytes remaining, because less than 8 bytes can't contain `</script`.
338+
let last_chunk = chunks.remainder();
339+
if last_chunk.len() >= 8 {
340+
let ptr = last_chunk.as_ptr();
341+
// SAFETY: `last_chunk.len() >= 8`, so `- 8` cannot wrap.
342+
// `last_chunk.as_ptr().add(last_chunk.len() - 8)` is in bounds of `last_chunk`.
343+
let last_ptr = unsafe { ptr.add(last_chunk.len() - 8) };
344+
search_bytes(ptr, last_ptr);
345+
}
346+
347+
// SAFETY: `consumed` is either 0, or after `/`, so on a UTF-8 char boundary, and in bounds
266348
unsafe {
267-
self.code.print_bytes_unchecked(&slice[consumed..]);
349+
let remaining = bytes.get_unchecked(consumed..);
350+
self.code.print_bytes_unchecked(remaining);
268351
}
269352
}
270353

0 commit comments

Comments
 (0)