Skip to content

Commit 7865eef

Browse files
committed
fix(codgen): escape </script for template literals and comments
1 parent 27427a3 commit 7865eef

File tree

5 files changed

+78
-52
lines changed

5 files changed

+78
-52
lines changed

crates/oxc_codegen/src/comment.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,15 +128,15 @@ impl Codegen<'_> {
128128
let comment_source = comment.span.source_text(source_text);
129129
match comment.kind {
130130
CommentKind::Line => {
131-
self.print_str(comment_source);
131+
self.print_str_escaping_script_close_tag(comment_source);
132132
}
133133
CommentKind::Block => {
134134
// Print block comments with our own indentation.
135135
for line in comment_source.split(is_line_terminator) {
136136
if !line.starts_with("/*") {
137137
self.print_indent();
138138
}
139-
self.print_str(line.trim_start());
139+
self.print_str_escaping_script_close_tag(line.trim_start());
140140
if !line.ends_with("*/") {
141141
self.print_hard_newline();
142142
}

crates/oxc_codegen/src/gen.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2082,7 +2082,7 @@ impl Gen for TemplateLiteral<'_> {
20822082

20832083
for quasi in &self.quasis {
20842084
p.add_source_mapping(quasi.span);
2085-
p.print_str(quasi.value.raw.as_str());
2085+
p.print_str_escaping_script_close_tag(quasi.value.raw.as_str());
20862086

20872087
if let Some(expr) = expressions.next() {
20882088
p.print_str("${");

crates/oxc_codegen/src/lib.rs

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,11 @@ use oxc_syntax::{
2626
};
2727

2828
use crate::{
29-
binary_expr_visitor::BinaryExpressionVisitor, comment::CommentsMap, operator::Operator,
30-
sourcemap_builder::SourcemapBuilder, str::Quote,
29+
binary_expr_visitor::BinaryExpressionVisitor,
30+
comment::CommentsMap,
31+
operator::Operator,
32+
sourcemap_builder::SourcemapBuilder,
33+
str::{Quote, is_script_close_tag},
3134
};
3235
pub use crate::{
3336
context::Context,
@@ -230,6 +233,39 @@ impl<'a> Codegen<'a> {
230233
self.code.print_str(s);
231234
}
232235

236+
/// Push str into the buffer, escaping `</script` to `<\/script`.
237+
#[inline]
238+
pub fn print_str_escaping_script_close_tag(&mut self, s: &str) {
239+
let slice = s.as_bytes();
240+
let mut consumed = 0;
241+
let mut i = 0;
242+
243+
// Only check when remaining string has length larger than 8.
244+
while i + 8 <= slice.len() {
245+
if is_script_close_tag(&slice[i..i+8]) {
246+
// Push up to and including `<`. Skip `/`. Write `\/` instead.
247+
// SAFETY:
248+
// The slice guarantees to be a valid UTF-8 string.
249+
// The consumed index is always pointed to a UTF-8 char boundary.
250+
// Current byte is `<`, thus i - 1 is also at a UTF-8 char boundary.
251+
unsafe {
252+
self.code.print_bytes_unchecked(&slice[consumed..=i]);
253+
}
254+
self.code.print_str("\\/");
255+
consumed = i + 2;
256+
i += 8;
257+
}
258+
i += 1;
259+
}
260+
261+
// SAFETY:
262+
// The slice guarantees to be a valid UTF-8 string.
263+
// The consumed index is always pointed to a UTF-8 char boundary.
264+
unsafe {
265+
self.code.print_bytes_unchecked(&slice[consumed..]);
266+
}
267+
}
268+
233269
/// Print a single [`Expression`], adding it to the code generator's
234270
/// internal buffer. Unlike [`Codegen::build`], this does not consume `self`.
235271
#[inline]

crates/oxc_codegen/src/str.rs

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -592,29 +592,16 @@ unsafe fn print_less_than(codegen: &mut Codegen, state: &mut PrintStringState) {
592592
// SAFETY: Next byte is `<`, which is ASCII
593593
unsafe { state.consume_byte_unchecked() };
594594

595-
// We have to check 2nd byte separately as `next8_lower_case == *b"</script"`
596-
// would also match `<\x0Fscript` (0xF | 32 == b'/').
597-
if slice.len() >= 8 && slice[1] == b'/' {
598-
// Compiler condenses these operations to an 8-byte read, u64 AND, and u64 compare.
599-
// https://godbolt.org/z/9ndYnbj53
600-
let next8: [u8; 8] = slice[0..8].try_into().unwrap();
601-
let mut next8_lower_case = [0; 8];
602-
for i in 0..8 {
603-
// `| 32` converts ASCII upper case letters to lower case. `<` and `/` are unaffected.
604-
next8_lower_case[i] = next8[i] | 32;
605-
}
606-
607-
if next8_lower_case == *b"</script" {
608-
// Flush up to and including `<`. Skip `/`. Write `\/` instead. Then skip over `script`.
609-
// Next chunk starts with `script`.
610-
// SAFETY: We already consumed `<`. Next byte is `/`, which is ASCII.
611-
unsafe { state.flush_and_consume_byte(codegen) };
612-
// SAFETY: `slice.len() >= 8` check above ensures there are 6 bytes left, after consuming 2 already.
613-
// `script` / `SCRIPT` is all ASCII bytes, so skipping them leaves `bytes` iterator
614-
// positioned on UTF-8 char boundary.
615-
unsafe { state.consume_bytes_unchecked::<6>() };
616-
codegen.print_str("\\/");
617-
}
595+
if slice.len() >= 8 && is_script_close_tag(&slice[0..8]) {
596+
// Flush up to and including `<`. Skip `/`. Write `\/` instead. Then skip over `script`.
597+
// Next chunk starts with `script`.
598+
// SAFETY: We already consumed `<`. Next byte is `/`, which is ASCII.
599+
unsafe { state.flush_and_consume_byte(codegen) };
600+
codegen.print_str("\\/");
601+
// SAFETY: The check above ensures there are 6 bytes left, after consuming 2 already.
602+
// `script` / `SCRIPT` is all ASCII bytes, so skipping them leaves `bytes` iterator
603+
// positioned on UTF-8 char boundary.
604+
unsafe { state.consume_bytes_unchecked::<6>() };
618605
}
619606
}
620607

@@ -735,3 +722,20 @@ unsafe fn print_lossy_replacement(codegen: &mut Codegen, state: &mut PrintString
735722
pub fn cold_branch<F: FnOnce() -> T, T>(f: F) -> T {
736723
f()
737724
}
725+
726+
/// Check if the slice is `</script` regardless of case.
727+
pub fn is_script_close_tag(slice: &[u8]) -> bool {
728+
if slice.len() == 8 {
729+
// Compiler condenses these operations to an 8-byte read, u64 AND, and u64 compare.
730+
// https://godbolt.org/z/oGG16fK6v
731+
let mut slice: [u8; 8] = slice.try_into().unwrap();
732+
for b in slice.iter_mut().skip(2) {
733+
// `| 32` converts ASCII upper case letters to lower case.
734+
*b |= 32;
735+
}
736+
737+
slice == *b"</script"
738+
} else {
739+
false
740+
}
741+
}

crates/oxc_codegen/tests/integration/esbuild.rs

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,7 +1014,6 @@ fn test_jsx_single_line() {
10141014
}
10151015

10161016
#[test]
1017-
#[ignore]
10181017
fn test_avoid_slash_script() {
10191018
// Positive cases
10201019
test("x = '</script'", "x = \"<\\/script\";\n");
@@ -1027,36 +1026,23 @@ fn test_avoid_slash_script() {
10271026
test("x = `</ScRiPt`", "x = `<\\/ScRiPt`;\n");
10281027
test("x = `</script${y}`", "x = `<\\/script${y}`;\n");
10291028
test("x = `${y}</script`", "x = `${y}<\\/script`;\n");
1029+
test("x = `<</script`", "x = `<<\\/script`;\n");
1030+
test("x = `</</script`", "x = `</<\\/script`;\n");
10301031
test_minify("x = 1 < /script/.exec(y).length", "x=1< /script/.exec(y).length;");
10311032
test_minify("x = 1 < /SCRIPT/.exec(y).length", "x=1< /SCRIPT/.exec(y).length;");
10321033
test_minify("x = 1 < /ScRiPt/.exec(y).length", "x=1< /ScRiPt/.exec(y).length;");
10331034
test_minify("x = 1 << /script/.exec(y).length", "x=1<< /script/.exec(y).length;");
10341035
test("//! </script\n//! >/script\n//! /script", "//! <\\/script\n//! >/script\n//! /script\n");
10351036
test("//! </SCRIPT\n//! >/SCRIPT\n//! /SCRIPT", "//! <\\/SCRIPT\n//! >/SCRIPT\n//! /SCRIPT\n");
10361037
test("//! </ScRiPt\n//! >/ScRiPt\n//! /ScRiPt", "//! <\\/ScRiPt\n//! >/ScRiPt\n//! /ScRiPt\n");
1037-
test("/*! </script \n </script */", "/*! <\\/script \n <\\/script */\n");
1038-
test("/*! </SCRIPT \n </SCRIPT */", "/*! <\\/SCRIPT \n <\\/SCRIPT */\n");
1039-
test("/*! </ScRiPt \n </ScRiPt */", "/*! <\\/ScRiPt \n <\\/ScRiPt */\n");
1040-
test(
1041-
"String.raw`</script`",
1042-
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/script\"])));\n",
1043-
);
1044-
test(
1045-
"String.raw`</script${a}`",
1046-
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/script\", \"\"])), a);\n",
1047-
);
1048-
test(
1049-
"String.raw`${a}</script`",
1050-
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"\", \"<\\/script\"])), a);\n",
1051-
);
1052-
test(
1053-
"String.raw`</SCRIPT`",
1054-
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/SCRIPT\"])));\n",
1055-
);
1056-
test(
1057-
"String.raw`</ScRiPt`",
1058-
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/ScRiPt\"])));\n",
1059-
);
1038+
test("/*! </script \n</script */", "/*! <\\/script \n<\\/script */");
1039+
test("/*! </SCRIPT \n</SCRIPT */", "/*! <\\/SCRIPT \n<\\/SCRIPT */");
1040+
test("/*! </ScRiPt \n</ScRiPt */", "/*! <\\/ScRiPt \n<\\/ScRiPt */");
1041+
test("String.raw`</script`", "String.raw`<\\/script`;\n");
1042+
test("String.raw`</script${a}`", "String.raw`<\\/script${a}`;\n");
1043+
test("String.raw`${a}</script`", "String.raw`${a}<\\/script`;\n");
1044+
test("String.raw`</SCRIPT`", "String.raw`<\\/SCRIPT`;\n");
1045+
test("String.raw`</ScRiPt`", "String.raw`<\\/ScRiPt`;\n");
10601046

10611047
// Negative cases
10621048
test("x = '</'", "x = \"</\";\n");

0 commit comments

Comments
 (0)