From cdbc402aa48bbc1e914a770bb2e7e47833342669 Mon Sep 17 00:00:00 2001 From: Steffen Deusch Date: Mon, 21 Oct 2024 14:04:32 +0200 Subject: [PATCH] add Phoenix.HTML.css_escape/1 --- lib/phoenix_html.ex | 73 +++++++++++++++++++++++++++++++++++- test/phoenix_html_test.exs | 76 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 1 deletion(-) diff --git a/lib/phoenix_html.ex b/lib/phoenix_html.ex index 0494ea5..218b6c0 100644 --- a/lib/phoenix_html.ex +++ b/lib/phoenix_html.ex @@ -119,7 +119,7 @@ defmodule Phoenix.HTML do iex> html_escape("") {:safe, [[[] | "<"], "hello" | ">"]} - iex> html_escape('') + iex> html_escape(~c"") {:safe, ["<", 104, 101, 108, 108, 111, ">"]} iex> html_escape(1) @@ -337,4 +337,75 @@ defmodule Phoenix.HTML do do: javascript_escape(t, <>) defp javascript_escape(<<>>, acc), do: acc + + @doc """ + Escapes a string for use as a CSS identifier. + + ## Examples + + iex> css_escape("hello world") + "hello\\\\ world" + + iex> css_escape("-123") + "-\\\\31 23" + + """ + @spec css_escape(String.t()) :: String.t() + def css_escape(value) when is_binary(value) do + # This is a direct translation of + # https://github.com/mathiasbynens/CSS.escape/blob/master/css.escape.js + # into Elixir. + value + |> String.to_charlist() + |> escape_css_chars() + |> IO.iodata_to_binary() + end + + defp escape_css_chars(chars) do + case chars do + # If the character is the first character and is a `-` (U+002D), and + # there is no second character, […] + [?- | []] -> ["\\-"] + _ -> escape_css_chars(chars, 0, []) + end + end + + defp escape_css_chars([], _, acc), do: Enum.reverse(acc) + + defp escape_css_chars([char | rest], index, acc) do + escaped = + cond do + # If the character is NULL (U+0000), then the REPLACEMENT CHARACTER + # (U+FFFD). + char == 0 -> + <<0xFFFD::utf8>> + + # If the character is in the range [\1-\1F] (U+0001 to U+001F) or is + # U+007F, + # if the character is the first character and is in the range [0-9] + # (U+0030 to U+0039), + # if the character is the second character and is in the range [0-9] + # (U+0030 to U+0039) and the first character is a `-` (U+002D), + char in 0x0001..0x001F or char == 0x007F or + (index == 0 and char in ?0..?9) or + (index == 1 and char in ?0..?9 and hd(acc) == "-") -> + # https://drafts.csswg.org/cssom/#escape-a-character-as-code-point + ["\\", Integer.to_string(char, 16), " "] + + # If the character is not handled by one of the above rules and is + # greater than or equal to U+0080, is `-` (U+002D) or `_` (U+005F), or + # is in one of the ranges [0-9] (U+0030 to U+0039), [A-Z] (U+0041 to + # U+005A), or [a-z] (U+0061 to U+007A), […] + char >= 0x0080 or char in [?-, ?_] or char in ?0..?9 or char in ?A..?Z or char in ?a..?z -> + # the character itself + <> + + true -> + # Otherwise, the escaped character. + # https://drafts.csswg.org/cssom/#escape-a-character + ["\\", <>] + end + + escape_css_chars(rest, index + 1, [escaped | acc]) + end end diff --git a/test/phoenix_html_test.exs b/test/phoenix_html_test.exs index 32f3153..30b5c86 100644 --- a/test/phoenix_html_test.exs +++ b/test/phoenix_html_test.exs @@ -142,4 +142,80 @@ defmodule Phoenix.HTMLTest do assert attributes_escape([{"selected", true}]) |> safe_to_string() == ~s( selected) end end + + describe "css_escape" do + test "null character" do + assert css_escape(<<0>>) == <<0xFFFD::utf8>> + assert css_escape("a\u0000") == "a\ufffd" + assert css_escape("\u0000b") == "\ufffdb" + assert css_escape("a\u0000b") == "a\ufffdb" + end + + test "replacement character" do + assert css_escape(<<0xFFFD::utf8>>) == <<0xFFFD::utf8>> + assert css_escape("a\ufffd") == "a\ufffd" + assert css_escape("\ufffdb") == "\ufffdb" + assert css_escape("a\ufffdb") == "a\ufffdb" + end + + test "invalid input" do + assert_raise FunctionClauseError, fn -> css_escape(nil) end + end + + test "control characters" do + assert css_escape(<<0x01, 0x02, 0x1E, 0x1F>>) == "\\1 \\2 \\1E \\1F " + end + + test "leading digit" do + for {digit, expected} <- Enum.zip(0..9, ~w(30 31 32 33 34 35 36 37 38 39)) do + assert css_escape("#{digit}a") == "\\#{expected} a" + end + end + + test "non-leading digit" do + for digit <- 0..9 do + assert css_escape("a#{digit}b") == "a#{digit}b" + end + end + + test "leading hyphen and digit" do + for {digit, expected} <- Enum.zip(0..9, ~w(30 31 32 33 34 35 36 37 38 39)) do + assert css_escape("-#{digit}a") == "-\\#{expected} a" + end + end + + test "hyphens" do + assert css_escape("-") == "\\-" + assert css_escape("-a") == "-a" + assert css_escape("--") == "--" + assert css_escape("--a") == "--a" + end + + test "non-ASCII and special characters" do + assert css_escape("🤷🏻‍♂️-_©") == "🤷🏻‍♂️-_©" + + assert css_escape( + <<0x7F, + "\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f">> + ) == + "\\7F \u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f" + + assert css_escape("\u00a0\u00a1\u00a2") == "\u00a0\u00a1\u00a2" + end + + test "alphanumeric characters" do + assert css_escape("a0123456789b") == "a0123456789b" + assert css_escape("abcdefghijklmnopqrstuvwxyz") == "abcdefghijklmnopqrstuvwxyz" + assert css_escape("ABCDEFGHIJKLMNOPQRSTUVWXYZ") == "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + end + + test "space and exclamation mark" do + assert css_escape(<<0x20, 0x21, 0x78, 0x79>>) == "\\ \\!xy" + end + + test "Unicode characters" do + # astral symbol (U+1D306 TETRAGRAM FOR CENTRE) + assert css_escape(<<0x1D306::utf8>>) == <<0x1D306::utf8>> + end + end end