Merge pull request #452 from phoenixframework/sd-css-escape

add Phoenix.HTML.css_escape/1
phoenixframework · Oct 21, 2024 · c1ad602 · c1ad602
2 parents 6b67a08 + cdbc402
commit c1ad602
Show file tree

Hide file tree

Showing 2 changed files with 148 additions and 1 deletion.
diff --git a/lib/phoenix_html.ex b/lib/phoenix_html.ex
@@ -119,7 +119,7 @@ defmodule Phoenix.HTML do
       iex> html_escape("<hello>")
       {:safe, [[[] | "&lt;"], "hello" | "&gt;"]}
 
-      iex> html_escape('<hello>')
+      iex> html_escape(~c"<hello>")
       {:safe, ["&lt;", 104, 101, 108, 108, 111, "&gt;"]}
 
       iex> html_escape(1)
@@ -337,4 +337,75 @@ defmodule Phoenix.HTML do
     do: javascript_escape(t, <<acc::binary, h>>)
 
   defp javascript_escape(<<>>, acc), do: acc
+
+  @doc """
+  Escapes a string for use as a CSS identifier.
+
+  ## Examples
+
+      iex> css_escape("hello world")
+      "hello\\\\ world"
+
+      iex> css_escape("-123")
+      "-\\\\31 23"
+
+  """
+  @spec css_escape(String.t()) :: String.t()
+  def css_escape(value) when is_binary(value) do
+    # This is a direct translation of
+    # https://github.com/mathiasbynens/CSS.escape/blob/master/css.escape.js
+    # into Elixir.
+    value
+    |> String.to_charlist()
+    |> escape_css_chars()
+    |> IO.iodata_to_binary()
+  end
+
+  defp escape_css_chars(chars) do
+    case chars do
+      # If the character is the first character and is a `-` (U+002D), and
+      # there is no second character, […]
+      [?- | []] -> ["\\-"]
+      _ -> escape_css_chars(chars, 0, [])
+    end
+  end
+
+  defp escape_css_chars([], _, acc), do: Enum.reverse(acc)
+
+  defp escape_css_chars([char | rest], index, acc) do
+    escaped =
+      cond do
+        # If the character is NULL (U+0000), then the REPLACEMENT CHARACTER
+        # (U+FFFD).
+        char == 0 ->
+          <<0xFFFD::utf8>>
+
+        # If the character is in the range [\1-\1F] (U+0001 to U+001F) or is
+        # U+007F,
+        # if the character is the first character and is in the range [0-9]
+        # (U+0030 to U+0039),
+        # if the character is the second character and is in the range [0-9]
+        # (U+0030 to U+0039) and the first character is a `-` (U+002D),
+        char in 0x0001..0x001F or char == 0x007F or
+          (index == 0 and char in ?0..?9) or
+            (index == 1 and char in ?0..?9 and hd(acc) == "-") ->
+          # https://drafts.csswg.org/cssom/#escape-a-character-as-code-point
+          ["\\", Integer.to_string(char, 16), " "]
+
+        # If the character is not handled by one of the above rules and is
+        # greater than or equal to U+0080, is `-` (U+002D) or `_` (U+005F), or
+        # is in one of the ranges [0-9] (U+0030 to U+0039), [A-Z] (U+0041 to
+        # U+005A), or [a-z] (U+0061 to U+007A), […]
+        char >= 0x0080 or char in [?-, ?_] or char in ?0..?9 or char in ?A..?Z or char in ?a..?z ->
+          # the character itself
+          <<char::utf8>>
+
+        true ->
+          # Otherwise, the escaped character.
+          # https://drafts.csswg.org/cssom/#escape-a-character
+          ["\\", <<char::utf8>>]
+      end
+
+    escape_css_chars(rest, index + 1, [escaped | acc])
+  end
 end
diff --git a/test/phoenix_html_test.exs b/test/phoenix_html_test.exs
@@ -142,4 +142,80 @@ defmodule Phoenix.HTMLTest do
       assert attributes_escape([{"selected", true}]) |> safe_to_string() == ~s( selected)
     end
   end
+
+  describe "css_escape" do
+    test "null character" do
+      assert css_escape(<<0>>) == <<0xFFFD::utf8>>
+      assert css_escape("a\u0000") == "a\ufffd"
+      assert css_escape("\u0000b") == "\ufffdb"
+      assert css_escape("a\u0000b") == "a\ufffdb"
+    end
+
+    test "replacement character" do
+      assert css_escape(<<0xFFFD::utf8>>) == <<0xFFFD::utf8>>
+      assert css_escape("a\ufffd") == "a\ufffd"
+      assert css_escape("\ufffdb") == "\ufffdb"
+      assert css_escape("a\ufffdb") == "a\ufffdb"
+    end
+
+    test "invalid input" do
+      assert_raise FunctionClauseError, fn -> css_escape(nil) end
+    end
+
+    test "control characters" do
+      assert css_escape(<<0x01, 0x02, 0x1E, 0x1F>>) == "\\1 \\2 \\1E \\1F "
+    end
+
+    test "leading digit" do
+      for {digit, expected} <- Enum.zip(0..9, ~w(30 31 32 33 34 35 36 37 38 39)) do
+        assert css_escape("#{digit}a") == "\\#{expected} a"
+      end
+    end
+
+    test "non-leading digit" do
+      for digit <- 0..9 do
+        assert css_escape("a#{digit}b") == "a#{digit}b"
+      end
+    end
+
+    test "leading hyphen and digit" do
+      for {digit, expected} <- Enum.zip(0..9, ~w(30 31 32 33 34 35 36 37 38 39)) do
+        assert css_escape("-#{digit}a") == "-\\#{expected} a"
+      end
+    end
+
+    test "hyphens" do
+      assert css_escape("-") == "\\-"
+      assert css_escape("-a") == "-a"
+      assert css_escape("--") == "--"
+      assert css_escape("--a") == "--a"
+    end
+
+    test "non-ASCII and special characters" do
+      assert css_escape("🤷🏻‍♂️-_©") == "🤷🏻‍♂️-_©"
+
+      assert css_escape(
+               <<0x7F,
+                 "\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f">>
+             ) ==
+               "\\7F \u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f"
+
+      assert css_escape("\u00a0\u00a1\u00a2") == "\u00a0\u00a1\u00a2"
+    end
+
+    test "alphanumeric characters" do
+      assert css_escape("a0123456789b") == "a0123456789b"
+      assert css_escape("abcdefghijklmnopqrstuvwxyz") == "abcdefghijklmnopqrstuvwxyz"
+      assert css_escape("ABCDEFGHIJKLMNOPQRSTUVWXYZ") == "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+    end
+
+    test "space and exclamation mark" do
+      assert css_escape(<<0x20, 0x21, 0x78, 0x79>>) == "\\ \\!xy"
+    end
+
+    test "Unicode characters" do
+      # astral symbol (U+1D306 TETRAGRAM FOR CENTRE)
+      assert css_escape(<<0x1D306::utf8>>) == <<0x1D306::utf8>>
+    end
+  end
 end