Skip to content

codegen: escape </script #10334

@Boshen

Description

@Boshen

We should pass these tests

#[test]
#[ignore]
fn test_avoid_slash_script() {
// Positive cases
test("x = '</script'", "x = \"<\\/script\";\n");
test("x = `</script`", "x = `<\\/script`;\n");
test("x = `</SCRIPT`", "x = `<\\/SCRIPT`;\n");
test("x = `</ScRiPt`", "x = `<\\/ScRiPt`;\n");
test("x = `</script${y}`", "x = `<\\/script${y}`;\n");
test("x = `${y}</script`", "x = `${y}<\\/script`;\n");
test_minify("x = 1 < /script/.exec(y).length", "x=1< /script/.exec(y).length;");
test_minify("x = 1 < /SCRIPT/.exec(y).length", "x=1< /SCRIPT/.exec(y).length;");
test_minify("x = 1 < /ScRiPt/.exec(y).length", "x=1< /ScRiPt/.exec(y).length;");
test_minify("x = 1 << /script/.exec(y).length", "x=1<< /script/.exec(y).length;");
test("//! </script\n//! >/script\n//! /script", "//! <\\/script\n//! >/script\n//! /script\n");
test("//! </SCRIPT\n//! >/SCRIPT\n//! /SCRIPT", "//! <\\/SCRIPT\n//! >/SCRIPT\n//! /SCRIPT\n");
test("//! </ScRiPt\n//! >/ScRiPt\n//! /ScRiPt", "//! <\\/ScRiPt\n//! >/ScRiPt\n//! /ScRiPt\n");
test("/*! </script \n </script */", "/*! <\\/script \n <\\/script */\n");
test("/*! </SCRIPT \n </SCRIPT */", "/*! <\\/SCRIPT \n <\\/SCRIPT */\n");
test("/*! </ScRiPt \n </ScRiPt */", "/*! <\\/ScRiPt \n <\\/ScRiPt */\n");
test(
"String.raw`</script`",
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/script\"])));\n",
);
test(
"String.raw`</script${a}`",
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/script\", \"\"])), a);\n",
);
test(
"String.raw`${a}</script`",
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"\", \"<\\/script\"])), a);\n",
);
test(
"String.raw`</SCRIPT`",
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/SCRIPT\"])));\n",
);
test(
"String.raw`</ScRiPt`",
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/ScRiPt\"])));\n",
);
// Negative cases
test("x = '</'", "x = \"</\";\n");
test("x = '</ script'", "x = \"</ script\";\n");
test("x = '< /script'", "x = \"< /script\";\n");
test("x = '/script>'", "x = \"/script>\";\n");
test("x = '<script>'", "x = \"<script>\";\n");
test_minify("x = 1 < / script/.exec(y).length", "x=1</ script/.exec(y).length;");
test_minify("x = 1 << / script/.exec(y).length", "x=1<</ script/.exec(y).length;");
}

esbuild commits:

I thought this is easy before looking at the test cases - we need to handle strings, templates, comments and regexes as well as disabling this feature.

This is what I have written so far:

diff --git a/crates/oxc_codegen/src/str.rs b/crates/oxc_codegen/src/str.rs
index af68886a2..4c24b5841 100644
--- a/crates/oxc_codegen/src/str.rs
+++ b/crates/oxc_codegen/src/str.rs
@@ -338,6 +338,7 @@ enum Escape {
     LS = 14, // LS/PS - U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR (first byte)
     NB = 15, // NBSP  - Non-breaking space (first byte)
     LO = 16, // �     - U+FFFD lossy replacement character (first byte)
+    LT = 17, // <     - Less-than sign
 }
 
 /// Struct which ensures content is aligned on 128.
@@ -357,7 +358,7 @@ static ESCAPES: Aligned128<[Escape; 256]> = {
         NU, __, __, __, __, __, __, BE, BK, __, NL, VT, FF, CR, __, __, // 0
         __, __, __, __, __, __, __, __, __, __, __, ES, __, __, __, __, // 1
         __, __, DQ, __, DO, __, __, SQ, __, __, __, __, __, __, __, __, // 2
-        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
+        __, __, __, __, __, __, __, __, __, __, __, __, LT, __, __, __, // 3
         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
         __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
         BQ, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
@@ -382,7 +383,7 @@ type ByteHandler = unsafe fn(&mut Codegen, &mut PrintStringState);
 ///
 /// Function pointers are 8 bytes each, so `BYTE_HANDLERS` is 128 bytes in total.
 /// Aligned on 128, so occupies a pair of L1 cache lines.
-static BYTE_HANDLERS: Aligned128<[ByteHandler; 16]> = Aligned128([
+static BYTE_HANDLERS: Aligned128<[ByteHandler; 17]> = Aligned128([
     print_null,
     print_bell,
     print_backspace,
@@ -399,6 +400,7 @@ static BYTE_HANDLERS: Aligned128<[ByteHandler; 16]> = Aligned128([
     print_ls_or_ps,
     print_non_breaking_space,
     print_lossy_replacement,
+    print_less_than,
 ]);
 
 /// Call byte handler for byte which needs escaping.
@@ -574,6 +576,29 @@ unsafe fn print_dollar(codegen: &mut Codegen, state: &mut PrintStringState) {
     }
 }
 
+// <
+unsafe fn print_less_than(codegen: &mut Codegen, state: &mut PrintStringState) {
+    dbg!("in");
+    debug_assert_eq!(state.peek(), Some(b'<'));
+    // SAFETY: Next byte is <, which is ASCII.
+    unsafe { state.consume_byte_unchecked() };
+
+    if state.peek() == Some(b'/') {
+        let slice = state.bytes.as_slice();
+        if slice.len() >= 7 {
+            for (i, c) in "script".bytes().enumerate() {
+                let d = slice[i + 1];
+                if c != d.to_ascii_lowercase() {
+                    return;
+                }
+            }
+            // SAFETY: Next byte is `/`, which is ASCII
+            unsafe { state.flush_and_consume_byte(codegen) };
+            codegen.print_str("\\/");
+        }
+    }
+}
+
 // 0xE2 - first byte of <LS> or <PS>
 unsafe fn print_ls_or_ps(codegen: &mut Codegen, state: &mut PrintStringState) {
     debug_assert_eq!(state.peek(), Some(0xE2));

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-codegenArea - Code GenerationE-Help WantedExperience level - For the experienced collaborators

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions