diff --git a/crates/oxc_codegen/src/comment.rs b/crates/oxc_codegen/src/comment.rs index 746d60edc2a05..5ef6fed220d64 100644 --- a/crates/oxc_codegen/src/comment.rs +++ b/crates/oxc_codegen/src/comment.rs @@ -128,7 +128,7 @@ impl Codegen<'_> { let comment_source = comment.span.source_text(source_text); match comment.kind { CommentKind::Line => { - self.print_str(comment_source); + self.print_str_escaping_script_close_tag(comment_source); } CommentKind::Block => { // Print block comments with our own indentation. @@ -136,7 +136,7 @@ impl Codegen<'_> { if !line.starts_with("/*") { self.print_indent(); } - self.print_str(line.trim_start()); + self.print_str_escaping_script_close_tag(line.trim_start()); if !line.ends_with("*/") { self.print_hard_newline(); } diff --git a/crates/oxc_codegen/src/gen.rs b/crates/oxc_codegen/src/gen.rs index 010df8b8815ca..c36137311f5c9 100644 --- a/crates/oxc_codegen/src/gen.rs +++ b/crates/oxc_codegen/src/gen.rs @@ -2083,7 +2083,7 @@ impl Gen for TemplateLiteral<'_> { for quasi in &self.quasis { p.add_source_mapping(quasi.span); - p.print_str(quasi.value.raw.as_str()); + p.print_str_escaping_script_close_tag(quasi.value.raw.as_str()); p.add_source_mapping_end(quasi.span); if let Some(expr) = expressions.next() { diff --git a/crates/oxc_codegen/src/lib.rs b/crates/oxc_codegen/src/lib.rs index f71bd0a420b68..78e17e978bfa0 100644 --- a/crates/oxc_codegen/src/lib.rs +++ b/crates/oxc_codegen/src/lib.rs @@ -26,8 +26,11 @@ use oxc_syntax::{ }; use crate::{ - binary_expr_visitor::BinaryExpressionVisitor, comment::CommentsMap, operator::Operator, - sourcemap_builder::SourcemapBuilder, str::Quote, + binary_expr_visitor::BinaryExpressionVisitor, + comment::CommentsMap, + operator::Operator, + sourcemap_builder::SourcemapBuilder, + str::{Quote, is_script_close_tag}, }; pub use crate::{ context::Context, @@ -230,6 +233,40 @@ impl<'a> Codegen<'a> { self.code.print_str(s); } + /// Push str into the buffer, escaping ` = { NU, __, __, __, __, __, __, BE, BK, __, NL, VT, FF, CR, __, __, // 0 __, __, __, __, __, __, __, __, __, __, __, ES, __, __, __, __, // 1 __, __, DQ, __, DO, __, __, SQ, __, __, __, __, __, __, __, __, // 2 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3 + __, __, __, __, __, __, __, __, __, __, __, __, LT, __, __, __, // 3 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4 __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5 BQ, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6 @@ -385,9 +386,10 @@ type ByteHandler = unsafe fn(&mut Codegen, &mut PrintStringState); /// Indexed by `escape as usize - 1` (where `escape` is not `Escape::__`). /// Must be in same order as discriminants in `Escape`. /// -/// Function pointers are 8 bytes each, so `BYTE_HANDLERS` is 128 bytes in total. -/// Aligned on 128, so occupies a pair of L1 cache lines. -static BYTE_HANDLERS: Aligned128<[ByteHandler; 16]> = Aligned128([ +/// Function pointers are 8 bytes each, so `BYTE_HANDLERS` is 136 bytes in total. +/// Aligned on 128, so first 16 occupy a pair of L1 cache lines. +/// The last will be in separate cache line, but it should be vanishingly rare that it's accessed. +static BYTE_HANDLERS: Aligned128<[ByteHandler; 17]> = Aligned128([ print_null, print_bell, print_backspace, @@ -401,6 +403,7 @@ static BYTE_HANDLERS: Aligned128<[ByteHandler; 16]> = Aligned128([ print_double_quote, print_backtick, print_dollar, + print_less_than, print_ls_or_ps, print_non_breaking_space, print_lossy_replacement, @@ -579,6 +582,29 @@ unsafe fn print_dollar(codegen: &mut Codegen, state: &mut PrintStringState) { } } +// < +unsafe fn print_less_than(codegen: &mut Codegen, state: &mut PrintStringState) { + debug_assert_eq!(state.peek(), Some(b'<')); + + // Get slice of remaining bytes, including leading `<` + let slice = state.bytes.as_slice(); + + // SAFETY: Next byte is `<`, which is ASCII + unsafe { state.consume_byte_unchecked() }; + + if slice.len() >= 8 && is_script_close_tag(&slice[0..8]) { + // Flush up to and including `<`. Skip `/`. Write `\/` instead. Then skip over `script`. + // Next chunk starts with `script`. + // SAFETY: We already consumed `<`. Next byte is `/`, which is ASCII. + unsafe { state.flush_and_consume_byte(codegen) }; + codegen.print_str("\\/"); + // SAFETY: The check above ensures there are 6 bytes left, after consuming 2 already. + // `script` / `SCRIPT` is all ASCII bytes, so skipping them leaves `bytes` iterator + // positioned on UTF-8 char boundary. + unsafe { state.consume_bytes_unchecked::<6>() }; + } +} + // 0xE2 - first byte of or unsafe fn print_ls_or_ps(codegen: &mut Codegen, state: &mut PrintStringState) { debug_assert_eq!(state.peek(), Some(0xE2)); @@ -696,3 +722,20 @@ unsafe fn print_lossy_replacement(codegen: &mut Codegen, state: &mut PrintString pub fn cold_branch T, T>(f: F) -> T { f() } + +/// Check if the slice is ` bool { + if slice.len() == 8 { + // Compiler condenses these operations to an 8-byte read, u64 AND, and u64 compare. + // https://godbolt.org/z/oGG16fK6v + let mut slice: [u8; 8] = slice.try_into().unwrap(); + for b in slice.iter_mut().skip(2) { + // `| 32` converts ASCII upper case letters to lower case. + *b |= 32; + } + + slice == *b"/script\n//! /script", "//! <\\/script\n//! >/script\n//! /script\n"); test("//! /SCRIPT\n//! /SCRIPT", "//! <\\/SCRIPT\n//! >/SCRIPT\n//! /SCRIPT\n"); test("//! /ScRiPt\n//! /ScRiPt", "//! <\\/ScRiPt\n//! >/ScRiPt\n//! /ScRiPt\n"); - test("/*! \";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/script\"])));\n", - ); - test( - "String.raw`\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/script\", \"\"])), a);\n", - ); - test( - "String.raw`${a}\";\nvar _a;\nString.raw(_a || (_a = __template([\"\", \"<\\/script\"])), a);\n", - ); - test( - "String.raw`\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/SCRIPT\"])));\n", - ); - test( - "String.raw`\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/ScRiPt\"])));\n", - ); + test("/*!