Skip to content

Commit b52ac61

Browse files
committed
fix(codegen): escape "</script"
1 parent 0370363 commit b52ac61

File tree

2 files changed

+43
-7
lines changed

2 files changed

+43
-7
lines changed

crates/oxc_codegen/src/str.rs

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -335,9 +335,10 @@ enum Escape {
335335
DQ = 11, // " - Double quote
336336
BQ = 12, // ` - Backtick quote
337337
DO = 13, // $ - Dollar sign
338-
LS = 14, // LS/PS - U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR (first byte)
339-
NB = 15, // NBSP - Non-breaking space (first byte)
340-
LO = 16, // � - U+FFFD lossy replacement character (first byte)
338+
LT = 14, // < - Less-than sign
339+
LS = 15, // LS/PS - U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR (first byte)
340+
NB = 16, // NBSP - Non-breaking space (first byte)
341+
LO = 17, // � - U+FFFD lossy replacement character (first byte)
341342
}
342343

343344
/// Struct which ensures content is aligned on 128.
@@ -357,7 +358,7 @@ static ESCAPES: Aligned128<[Escape; 256]> = {
357358
NU, __, __, __, __, __, __, BE, BK, __, NL, VT, FF, CR, __, __, // 0
358359
__, __, __, __, __, __, __, __, __, __, __, ES, __, __, __, __, // 1
359360
__, __, DQ, __, DO, __, __, SQ, __, __, __, __, __, __, __, __, // 2
360-
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
361+
__, __, __, __, __, __, __, __, __, __, __, __, LT, __, __, __, // 3
361362
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
362363
__, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
363364
BQ, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
@@ -380,9 +381,10 @@ type ByteHandler = unsafe fn(&mut Codegen, &mut PrintStringState);
380381
/// Indexed by `escape as usize - 1` (where `escape` is not `Escape::__`).
381382
/// Must be in same order as discriminants in `Escape`.
382383
///
383-
/// Function pointers are 8 bytes each, so `BYTE_HANDLERS` is 128 bytes in total.
384-
/// Aligned on 128, so occupies a pair of L1 cache lines.
385-
static BYTE_HANDLERS: Aligned128<[ByteHandler; 16]> = Aligned128([
384+
/// Function pointers are 8 bytes each, so `BYTE_HANDLERS` is 136 bytes in total.
385+
/// Aligned on 128, so first 16 occupy a pair of L1 cache lines.
386+
/// The last will be in separate cache line, but it should be vanishingly rare that it's accessed.
387+
static BYTE_HANDLERS: Aligned128<[ByteHandler; 17]> = Aligned128([
386388
print_null,
387389
print_bell,
388390
print_backspace,
@@ -396,6 +398,7 @@ static BYTE_HANDLERS: Aligned128<[ByteHandler; 16]> = Aligned128([
396398
print_double_quote,
397399
print_backtick,
398400
print_dollar,
401+
print_less_than,
399402
print_ls_or_ps,
400403
print_non_breaking_space,
401404
print_lossy_replacement,
@@ -574,6 +577,37 @@ unsafe fn print_dollar(codegen: &mut Codegen, state: &mut PrintStringState) {
574577
}
575578
}
576579

580+
// <
581+
unsafe fn print_less_than(codegen: &mut Codegen, state: &mut PrintStringState) {
582+
debug_assert_eq!(state.peek(), Some(b'<'));
583+
584+
// Get slice of remaining bytes, including leading `<`
585+
let slice = state.bytes.as_slice();
586+
587+
// SAFETY: Next byte is `<`, which is ASCII
588+
unsafe { state.consume_byte_unchecked() };
589+
590+
// We have to check 2nd byte separately as `next8_lower_case == *b"</script"`
591+
// would also match `<\x0Fscript` (0xF | 32 == b'/').
592+
if slice.len() >= 8 && slice[1] == b'/' {
593+
// Compiler condenses these operations to an 8-byte read, u64 AND, and u64 compare.
594+
// https://godbolt.org/z/9ndYnbj53
595+
let next8: [u8; 8] = slice[0..8].try_into().unwrap();
596+
let mut next8_lower_case = [0; 8];
597+
for i in 0..8 {
598+
// `| 32` converts ASCII upper case letters to lower case. `<` and `/` are unaffected.
599+
next8_lower_case[i] = next8[i] | 32;
600+
}
601+
602+
if next8_lower_case == *b"</script" {
603+
// Flush up to and including `<`. Skip `/`. Write `\/` instead.
604+
// SAFETY: We already consumed `<`. Next byte is `/`, which is ASCII.
605+
unsafe { state.flush_and_consume_byte(codegen) };
606+
codegen.print_str("\\/");
607+
}
608+
}
609+
}
610+
577611
// 0xE2 - first byte of <LS> or <PS>
578612
unsafe fn print_ls_or_ps(codegen: &mut Codegen, state: &mut PrintStringState) {
579613
debug_assert_eq!(state.peek(), Some(0xE2));

crates/oxc_codegen/tests/integration/esbuild.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,6 +1011,8 @@ fn test_jsx_single_line() {
10111011
fn test_avoid_slash_script() {
10121012
// Positive cases
10131013
test("x = '</script'", "x = \"<\\/script\";\n");
1014+
test("x = '</SCRIPT'", "x = \"<\\/SCRIPT\";\n");
1015+
test("x = '</ScRiPt'", "x = \"<\\/ScRiPt\";\n");
10141016
test("x = `</script`", "x = `<\\/script`;\n");
10151017
test("x = `</SCRIPT`", "x = `<\\/SCRIPT`;\n");
10161018
test("x = `</ScRiPt`", "x = `<\\/ScRiPt`;\n");

0 commit comments

Comments
 (0)