diff --git a/crates/oxc_codegen/src/str.rs b/crates/oxc_codegen/src/str.rs index af68886a2fbb8..ee84822d7e45d 100644 --- a/crates/oxc_codegen/src/str.rs +++ b/crates/oxc_codegen/src/str.rs @@ -335,9 +335,10 @@ enum Escape { DQ = 11, // " - Double quote BQ = 12, // ` - Backtick quote DO = 13, // $ - Dollar sign - LS = 14, // LS/PS - U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR (first byte) - NB = 15, // NBSP - Non-breaking space (first byte) - LO = 16, // � - U+FFFD lossy replacement character (first byte) + LT = 14, // < - Less-than sign + LS = 15, // LS/PS - U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR (first byte) + NB = 16, // NBSP - Non-breaking space (first byte) + LO = 17, // � - U+FFFD lossy replacement character (first byte) } /// Struct which ensures content is aligned on 128. @@ -357,7 +358,7 @@ static ESCAPES: Aligned128<[Escape; 256]> = { NU, __, __, __, __, __, __, BE, BK, __, NL, VT, FF, CR, __, __, // 0 __, __, __, __, __, __, __, __, __, __, __, ES, __, __, __, __, // 1 __, __, DQ, __, DO, __, __, SQ, __, __, __, __, __, __, __, __, // 2 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3 + __, __, __, __, __, __, __, __, __, __, __, __, LT, __, __, __, // 3 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4 __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5 BQ, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6 @@ -380,9 +381,10 @@ type ByteHandler = unsafe fn(&mut Codegen, &mut PrintStringState); /// Indexed by `escape as usize - 1` (where `escape` is not `Escape::__`). /// Must be in same order as discriminants in `Escape`. /// -/// Function pointers are 8 bytes each, so `BYTE_HANDLERS` is 128 bytes in total. -/// Aligned on 128, so occupies a pair of L1 cache lines. -static BYTE_HANDLERS: Aligned128<[ByteHandler; 16]> = Aligned128([ +/// Function pointers are 8 bytes each, so `BYTE_HANDLERS` is 136 bytes in total. +/// Aligned on 128, so first 16 occupy a pair of L1 cache lines. +/// The last will be in separate cache line, but it should be vanishingly rare that it's accessed. +static BYTE_HANDLERS: Aligned128<[ByteHandler; 17]> = Aligned128([ print_null, print_bell, print_backspace, @@ -396,6 +398,7 @@ static BYTE_HANDLERS: Aligned128<[ByteHandler; 16]> = Aligned128([ print_double_quote, print_backtick, print_dollar, + print_less_than, print_ls_or_ps, print_non_breaking_space, print_lossy_replacement, @@ -574,6 +577,42 @@ unsafe fn print_dollar(codegen: &mut Codegen, state: &mut PrintStringState) { } } +// < +unsafe fn print_less_than(codegen: &mut Codegen, state: &mut PrintStringState) { + debug_assert_eq!(state.peek(), Some(b'<')); + + // Get slice of remaining bytes, including leading `<` + let slice = state.bytes.as_slice(); + + // SAFETY: Next byte is `<`, which is ASCII + unsafe { state.consume_byte_unchecked() }; + + // We have to check 2nd byte separately as `next8_lower_case == *b"= 8 && slice[1] == b'/' { + // Compiler condenses these operations to an 8-byte read, u64 AND, and u64 compare. + // https://godbolt.org/z/9ndYnbj53 + let next8: [u8; 8] = slice[0..8].try_into().unwrap(); + let mut next8_lower_case = [0; 8]; + for i in 0..8 { + // `| 32` converts ASCII upper case letters to lower case. `<` and `/` are unaffected. + next8_lower_case[i] = next8[i] | 32; + } + + if next8_lower_case == *b"= 8` check above ensures there are 6 bytes left, after consuming 2 already. + // `script` / `SCRIPT` is all ASCII bytes, so skipping them leaves `bytes` iterator + // positioned on UTF-8 char boundary. + unsafe { state.consume_bytes_unchecked::<6>() }; + codegen.print_str("\\/"); + } + } +} + // 0xE2 - first byte of or unsafe fn print_ls_or_ps(codegen: &mut Codegen, state: &mut PrintStringState) { debug_assert_eq!(state.peek(), Some(0xE2)); diff --git a/crates/oxc_codegen/tests/integration/esbuild.rs b/crates/oxc_codegen/tests/integration/esbuild.rs index 6e75a376d4939..3e00a70cef7be 100644 --- a/crates/oxc_codegen/tests/integration/esbuild.rs +++ b/crates/oxc_codegen/tests/integration/esbuild.rs @@ -1011,6 +1011,10 @@ fn test_jsx_single_line() { fn test_avoid_slash_script() { // Positive cases test("x = '