Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add Phoenix.HTML.css_escape/1 #452

Merged
merged 1 commit into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 72 additions & 1 deletion lib/phoenix_html.ex
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ defmodule Phoenix.HTML do
iex> html_escape("<hello>")
{:safe, [[[] | "&lt;"], "hello" | "&gt;"]}

iex> html_escape('<hello>')
iex> html_escape(~c"<hello>")
{:safe, ["&lt;", 104, 101, 108, 108, 111, "&gt;"]}

iex> html_escape(1)
Expand Down Expand Up @@ -337,4 +337,75 @@ defmodule Phoenix.HTML do
do: javascript_escape(t, <<acc::binary, h>>)

defp javascript_escape(<<>>, acc), do: acc

@doc """
Escapes a string for use as a CSS identifier.

## Examples

iex> css_escape("hello world")
"hello\\\\ world"

iex> css_escape("-123")
"-\\\\31 23"

"""
@spec css_escape(String.t()) :: String.t()
def css_escape(value) when is_binary(value) do
# This is a direct translation of
# https://github.com/mathiasbynens/CSS.escape/blob/master/css.escape.js
# into Elixir.
value
|> String.to_charlist()
|> escape_css_chars()
|> IO.iodata_to_binary()
end

defp escape_css_chars(chars) do
case chars do
# If the character is the first character and is a `-` (U+002D), and
# there is no second character, […]
[?- | []] -> ["\\-"]
_ -> escape_css_chars(chars, 0, [])
end
end

defp escape_css_chars([], _, acc), do: Enum.reverse(acc)

defp escape_css_chars([char | rest], index, acc) do
escaped =
cond do
# If the character is NULL (U+0000), then the REPLACEMENT CHARACTER
# (U+FFFD).
char == 0 ->
<<0xFFFD::utf8>>

# If the character is in the range [\1-\1F] (U+0001 to U+001F) or is
# U+007F,
# if the character is the first character and is in the range [0-9]
# (U+0030 to U+0039),
# if the character is the second character and is in the range [0-9]
# (U+0030 to U+0039) and the first character is a `-` (U+002D),
char in 0x0001..0x001F or char == 0x007F or
(index == 0 and char in ?0..?9) or
(index == 1 and char in ?0..?9 and hd(acc) == "-") ->
# https://drafts.csswg.org/cssom/#escape-a-character-as-code-point
["\\", Integer.to_string(char, 16), " "]

# If the character is not handled by one of the above rules and is
# greater than or equal to U+0080, is `-` (U+002D) or `_` (U+005F), or
# is in one of the ranges [0-9] (U+0030 to U+0039), [A-Z] (U+0041 to
# U+005A), or [a-z] (U+0061 to U+007A), […]
char >= 0x0080 or char in [?-, ?_] or char in ?0..?9 or char in ?A..?Z or char in ?a..?z ->
# the character itself
<<char::utf8>>

true ->
# Otherwise, the escaped character.
# https://drafts.csswg.org/cssom/#escape-a-character
["\\", <<char::utf8>>]
end

escape_css_chars(rest, index + 1, [escaped | acc])
end
end
76 changes: 76 additions & 0 deletions test/phoenix_html_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -142,4 +142,80 @@ defmodule Phoenix.HTMLTest do
assert attributes_escape([{"selected", true}]) |> safe_to_string() == ~s( selected)
end
end

describe "css_escape" do
test "null character" do
assert css_escape(<<0>>) == <<0xFFFD::utf8>>
assert css_escape("a\u0000") == "a\ufffd"
assert css_escape("\u0000b") == "\ufffdb"
assert css_escape("a\u0000b") == "a\ufffdb"
end

test "replacement character" do
assert css_escape(<<0xFFFD::utf8>>) == <<0xFFFD::utf8>>
assert css_escape("a\ufffd") == "a\ufffd"
assert css_escape("\ufffdb") == "\ufffdb"
assert css_escape("a\ufffdb") == "a\ufffdb"
end

test "invalid input" do
assert_raise FunctionClauseError, fn -> css_escape(nil) end
end

test "control characters" do
assert css_escape(<<0x01, 0x02, 0x1E, 0x1F>>) == "\\1 \\2 \\1E \\1F "
end

test "leading digit" do
for {digit, expected} <- Enum.zip(0..9, ~w(30 31 32 33 34 35 36 37 38 39)) do
assert css_escape("#{digit}a") == "\\#{expected} a"
end
end

test "non-leading digit" do
for digit <- 0..9 do
assert css_escape("a#{digit}b") == "a#{digit}b"
end
end

test "leading hyphen and digit" do
for {digit, expected} <- Enum.zip(0..9, ~w(30 31 32 33 34 35 36 37 38 39)) do
assert css_escape("-#{digit}a") == "-\\#{expected} a"
end
end

test "hyphens" do
assert css_escape("-") == "\\-"
assert css_escape("-a") == "-a"
assert css_escape("--") == "--"
assert css_escape("--a") == "--a"
end

test "non-ASCII and special characters" do
assert css_escape("🤷🏻‍♂️-_©") == "🤷🏻‍♂️-_©"

assert css_escape(
<<0x7F,
"\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f">>
) ==
"\\7F \u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f"

assert css_escape("\u00a0\u00a1\u00a2") == "\u00a0\u00a1\u00a2"
end

test "alphanumeric characters" do
assert css_escape("a0123456789b") == "a0123456789b"
assert css_escape("abcdefghijklmnopqrstuvwxyz") == "abcdefghijklmnopqrstuvwxyz"
assert css_escape("ABCDEFGHIJKLMNOPQRSTUVWXYZ") == "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
end

test "space and exclamation mark" do
assert css_escape(<<0x20, 0x21, 0x78, 0x79>>) == "\\ \\!xy"
end

test "Unicode characters" do
# astral symbol (U+1D306 TETRAGRAM FOR CENTRE)
assert css_escape(<<0x1D306::utf8>>) == <<0x1D306::utf8>>
end
end
end
Loading