Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions crates/oxc_codegen/src/comment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,15 @@ impl Codegen<'_> {
let comment_source = comment.span.source_text(source_text);
match comment.kind {
CommentKind::Line => {
self.print_str(comment_source);
self.print_str_escaping_script_close_tag(comment_source);
}
CommentKind::Block => {
// Print block comments with our own indentation.
for line in comment_source.split(is_line_terminator) {
if !line.starts_with("/*") {
self.print_indent();
}
self.print_str(line.trim_start());
self.print_str_escaping_script_close_tag(line.trim_start());
if !line.ends_with("*/") {
self.print_hard_newline();
}
Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_codegen/src/gen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2083,7 +2083,7 @@ impl Gen for TemplateLiteral<'_> {

for quasi in &self.quasis {
p.add_source_mapping(quasi.span);
p.print_str(quasi.value.raw.as_str());
p.print_str_escaping_script_close_tag(quasi.value.raw.as_str());
p.add_source_mapping_end(quasi.span);

if let Some(expr) = expressions.next() {
Expand Down
41 changes: 39 additions & 2 deletions crates/oxc_codegen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ use oxc_syntax::{
};

use crate::{
binary_expr_visitor::BinaryExpressionVisitor, comment::CommentsMap, operator::Operator,
sourcemap_builder::SourcemapBuilder, str::Quote,
binary_expr_visitor::BinaryExpressionVisitor,
comment::CommentsMap,
operator::Operator,
sourcemap_builder::SourcemapBuilder,
str::{Quote, is_script_close_tag},
};
pub use crate::{
context::Context,
Expand Down Expand Up @@ -230,6 +233,40 @@ impl<'a> Codegen<'a> {
self.code.print_str(s);
}

/// Push str into the buffer, escaping `</script` to `<\/script`.
#[inline]
pub fn print_str_escaping_script_close_tag(&mut self, s: &str) {
let slice = s.as_bytes();
let mut consumed = 0;
let mut i = 0;

// Only check when remaining string has length larger than 8.
while i + 8 <= slice.len() {
if is_script_close_tag(&slice[i..i + 8]) {
// Push str up to and including `<`. Skip `/`. Write `\/` instead.
// SAFETY:
// The slice guarantees to be a valid UTF-8 string.
// The consumed index is always pointed to a UTF-8 char boundary.
// Current byte is `<`, a UTF-8 char boundary.
unsafe {
self.code.print_bytes_unchecked(&slice[consumed..=i]);
}
self.code.print_str("\\/");
consumed = i + 2;
i += 8;
} else {
i += 1;
}
}

// SAFETY:
// The slice guarantees to be a valid UTF-8 string.
// The consumed index is always pointed to a UTF-8 char boundary.
unsafe {
self.code.print_bytes_unchecked(&slice[consumed..]);
}
}

/// Print a single [`Expression`], adding it to the code generator's
/// internal buffer. Unlike [`Codegen::build`], this does not consume `self`.
#[inline]
Expand Down
57 changes: 50 additions & 7 deletions crates/oxc_codegen/src/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,9 +340,10 @@ enum Escape {
DQ = 11, // " - Double quote
BQ = 12, // ` - Backtick quote
DO = 13, // $ - Dollar sign
LS = 14, // LS/PS - U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR (first byte)
NB = 15, // NBSP - Non-breaking space (first byte)
LO = 16, // � - U+FFFD lossy replacement character (first byte)
LT = 14, // < - Less-than sign
LS = 15, // LS/PS - U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR (first byte)
NB = 16, // NBSP - Non-breaking space (first byte)
LO = 17, // � - U+FFFD lossy replacement character (first byte)
}

/// Struct which ensures content is aligned on 128.
Expand All @@ -362,7 +363,7 @@ static ESCAPES: Aligned128<[Escape; 256]> = {
NU, __, __, __, __, __, __, BE, BK, __, NL, VT, FF, CR, __, __, // 0
__, __, __, __, __, __, __, __, __, __, __, ES, __, __, __, __, // 1
__, __, DQ, __, DO, __, __, SQ, __, __, __, __, __, __, __, __, // 2
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
__, __, __, __, __, __, __, __, __, __, __, __, LT, __, __, __, // 3
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
__, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
BQ, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
Expand All @@ -385,9 +386,10 @@ type ByteHandler = unsafe fn(&mut Codegen, &mut PrintStringState);
/// Indexed by `escape as usize - 1` (where `escape` is not `Escape::__`).
/// Must be in same order as discriminants in `Escape`.
///
/// Function pointers are 8 bytes each, so `BYTE_HANDLERS` is 128 bytes in total.
/// Aligned on 128, so occupies a pair of L1 cache lines.
static BYTE_HANDLERS: Aligned128<[ByteHandler; 16]> = Aligned128([
/// Function pointers are 8 bytes each, so `BYTE_HANDLERS` is 136 bytes in total.
/// Aligned on 128, so first 16 occupy a pair of L1 cache lines.
/// The last will be in separate cache line, but it should be vanishingly rare that it's accessed.
static BYTE_HANDLERS: Aligned128<[ByteHandler; 17]> = Aligned128([
print_null,
print_bell,
print_backspace,
Expand All @@ -401,6 +403,7 @@ static BYTE_HANDLERS: Aligned128<[ByteHandler; 16]> = Aligned128([
print_double_quote,
print_backtick,
print_dollar,
print_less_than,
print_ls_or_ps,
print_non_breaking_space,
print_lossy_replacement,
Expand Down Expand Up @@ -579,6 +582,29 @@ unsafe fn print_dollar(codegen: &mut Codegen, state: &mut PrintStringState) {
}
}

// <
unsafe fn print_less_than(codegen: &mut Codegen, state: &mut PrintStringState) {
debug_assert_eq!(state.peek(), Some(b'<'));

// Get slice of remaining bytes, including leading `<`
let slice = state.bytes.as_slice();

// SAFETY: Next byte is `<`, which is ASCII
unsafe { state.consume_byte_unchecked() };

if slice.len() >= 8 && is_script_close_tag(&slice[0..8]) {
// Flush up to and including `<`. Skip `/`. Write `\/` instead. Then skip over `script`.
// Next chunk starts with `script`.
// SAFETY: We already consumed `<`. Next byte is `/`, which is ASCII.
unsafe { state.flush_and_consume_byte(codegen) };
codegen.print_str("\\/");
// SAFETY: The check above ensures there are 6 bytes left, after consuming 2 already.
// `script` / `SCRIPT` is all ASCII bytes, so skipping them leaves `bytes` iterator
// positioned on UTF-8 char boundary.
unsafe { state.consume_bytes_unchecked::<6>() };
}
}

// 0xE2 - first byte of <LS> or <PS>
unsafe fn print_ls_or_ps(codegen: &mut Codegen, state: &mut PrintStringState) {
debug_assert_eq!(state.peek(), Some(0xE2));
Expand Down Expand Up @@ -696,3 +722,20 @@ unsafe fn print_lossy_replacement(codegen: &mut Codegen, state: &mut PrintString
pub fn cold_branch<F: FnOnce() -> T, T>(f: F) -> T {
f()
}

/// Check if the slice is `</script` regardless of case.
pub fn is_script_close_tag(slice: &[u8]) -> bool {
if slice.len() == 8 {
// Compiler condenses these operations to an 8-byte read, u64 AND, and u64 compare.
// https://godbolt.org/z/oGG16fK6v
let mut slice: [u8; 8] = slice.try_into().unwrap();
for b in slice.iter_mut().skip(2) {
// `| 32` converts ASCII upper case letters to lower case.
*b |= 32;
}

slice == *b"</script"
Comment on lines +729 to +737
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! You shaved a branch off my version.

} else {
false
}
}
39 changes: 15 additions & 24 deletions crates/oxc_codegen/tests/integration/esbuild.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1014,45 +1014,36 @@ fn test_jsx_single_line() {
}

#[test]
#[ignore]
fn test_avoid_slash_script() {
// Positive cases
test("x = '</script'", "x = \"<\\/script\";\n");
test("x = '</SCRIPT'", "x = \"<\\/SCRIPT\";\n");
test("x = '</ScRiPt'", "x = \"<\\/ScRiPt\";\n");
test("x = 'abc </script def'", "x = \"abc <\\/script def\";\n");
test("x = 'abc </ScRiPt def'", "x = \"abc <\\/ScRiPt def\";\n");
test("x = `</script`", "x = `<\\/script`;\n");
test("x = `</SCRIPT`", "x = `<\\/SCRIPT`;\n");
test("x = `</ScRiPt`", "x = `<\\/ScRiPt`;\n");
test("x = `</script${y}`", "x = `<\\/script${y}`;\n");
test("x = `${y}</script`", "x = `${y}<\\/script`;\n");
test("x = `<</script`", "x = `<<\\/script`;\n");
test("x = `</</script`", "x = `</<\\/script`;\n");
test("x = `</script</script`", "x = `<\\/script<\\/script`;\n");
test_minify("x = 1 < /script/.exec(y).length", "x=1< /script/.exec(y).length;");
test_minify("x = 1 < /SCRIPT/.exec(y).length", "x=1< /SCRIPT/.exec(y).length;");
test_minify("x = 1 < /ScRiPt/.exec(y).length", "x=1< /ScRiPt/.exec(y).length;");
test_minify("x = 1 << /script/.exec(y).length", "x=1<< /script/.exec(y).length;");
test("//! </script\n//! >/script\n//! /script", "//! <\\/script\n//! >/script\n//! /script\n");
test("//! </SCRIPT\n//! >/SCRIPT\n//! /SCRIPT", "//! <\\/SCRIPT\n//! >/SCRIPT\n//! /SCRIPT\n");
test("//! </ScRiPt\n//! >/ScRiPt\n//! /ScRiPt", "//! <\\/ScRiPt\n//! >/ScRiPt\n//! /ScRiPt\n");
test("/*! </script \n </script */", "/*! <\\/script \n <\\/script */\n");
test("/*! </SCRIPT \n </SCRIPT */", "/*! <\\/SCRIPT \n <\\/SCRIPT */\n");
test("/*! </ScRiPt \n </ScRiPt */", "/*! <\\/ScRiPt \n <\\/ScRiPt */\n");
test(
"String.raw`</script`",
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/script\"])));\n",
);
test(
"String.raw`</script${a}`",
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/script\", \"\"])), a);\n",
);
test(
"String.raw`${a}</script`",
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"\", \"<\\/script\"])), a);\n",
);
test(
"String.raw`</SCRIPT`",
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/SCRIPT\"])));\n",
);
test(
"String.raw`</ScRiPt`",
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/ScRiPt\"])));\n",
);
test("/*! </script \n</script */", "/*! <\\/script \n<\\/script */");
test("/*! </SCRIPT \n</SCRIPT */", "/*! <\\/SCRIPT \n<\\/SCRIPT */");
test("/*! </ScRiPt \n</ScRiPt */", "/*! <\\/ScRiPt \n<\\/ScRiPt */");
test("String.raw`</script`", "String.raw`<\\/script`;\n");
test("String.raw`</script${a}`", "String.raw`<\\/script${a}`;\n");
test("String.raw`${a}</script`", "String.raw`${a}<\\/script`;\n");
test("String.raw`</SCRIPT`", "String.raw`<\\/SCRIPT`;\n");
test("String.raw`</ScRiPt`", "String.raw`<\\/ScRiPt`;\n");

// Negative cases
test("x = '</'", "x = \"</\";\n");
Expand Down
Loading