diff --git a/lib/elixir/src/elixir_interpolation.erl b/lib/elixir/src/elixir_interpolation.erl index ab52f8d91d1..d46d7ce9594 100644 --- a/lib/elixir/src/elixir_interpolation.erl +++ b/lib/elixir/src/elixir_interpolation.erl @@ -77,7 +77,13 @@ extract_char(Rest, Buffer, Output, Line, Column, Scope, Interpol, Last) -> ?bidi(Char) -> Token = io_lib:format("\\u~4.16.0B", [Char]), Pre = "invalid bidirectional formatting character in string: ", - Pos = io_lib:format(". If you want to use such character, use it in its escaped ~ts form instead", [Token]), + Pos = io_lib:format(". If you want to use such a character, use it in its escaped ~ts form instead", [Token]), + {error, {Line, Column, {Pre, Pos}, Token}}; + + ?confusable(Char) -> + Token = io_lib:format("\\u~4.16.0B", [Char]), + Pre = "invalid confusable character in string: ", + Pos = io_lib:format(". If you want to use such a character, use it in its escaped ~ts form instead", [Token]), {error, {Line, Column, {Pre, Pos}, Token}}; true -> diff --git a/lib/elixir/src/elixir_tokenizer.hrl b/lib/elixir/src/elixir_tokenizer.hrl index 6857db4b834..7024a8f740c 100644 --- a/lib/elixir/src/elixir_tokenizer.hrl +++ b/lib/elixir/src/elixir_tokenizer.hrl @@ -29,3 +29,54 @@ C =:= 16#2068; C =:= 16#202C; C =:= 16#2069). + +%% Confusables +%% Processed from http://www.unicode.org/reports/tr39 +-define(confusable(C), C =:= 16#3164; + C =:= 16#7FA; + C =:= 16#30FC; + C =:= 16#A7F7; + C =:= 16#1173; + C =:= 16#3161; + C =:= 16#A4F9; + C =:= 16#2D0; + C =:= 16#A4FD; + C =:= 16#1C3; + C =:= 16#2D51; + C =:= 16#294; + C =:= 16#97D; + C =:= 16#A6EB; + C =:= 16#A4F8; + C =:= 16#A78F; + C =:= 16#1427; + C =:= 16#1427; + C =:= 16#2B9; + C =:= 16#2C8; + C =:= 16#2CA; + C =:= 16#2CB; + C =:= 16#2BB; + C =:= 16#2BD; + C =:= 16#2BC; + C =:= 16#2BE; + C =:= 16#A78C; + C =:= 16#7F4; + C =:= 16#7F5; + C =:= 16#144A; + C =:= 16#16CC; + C =:= 16#2BA; + C =:= 16#2EE; + C =:= 16#5F2; + C =:= 16#1031F; + C =:= 16#3033; + C =:= 16#30CE; + C =:= 16#4E3F; + C =:= 16#4E36; + C =:= 16#30FD; + C =:= 16#A778; + C =:= 16#1029B; + C =:= 16#1438; + C =:= 16#16B2; + C =:= 16#304F; + C =:= 16#21FE8; + C =:= 16#1433; + C =:= 16#16F3F). diff --git a/lib/elixir/test/elixir/kernel/errors_test.exs b/lib/elixir/test/elixir/kernel/errors_test.exs index 94aa18c38af..aef2fe6a7eb 100644 --- a/lib/elixir/test/elixir/kernel/errors_test.exs +++ b/lib/elixir/test/elixir/kernel/errors_test.exs @@ -816,7 +816,7 @@ defmodule Kernel.ErrorsTest do 'x = 8; <> = <>' end - test "invalid bidi in source" do + test "invalid bidi or confusable in source" do assert_eval_raise SyntaxError, ~r"nofile:1:1: invalid bidirectional formatting character in comment: \\u202A", '# This is a \u202A' @@ -826,12 +826,18 @@ defmodule Kernel.ErrorsTest do 'foo. # This is a \u202A' assert_eval_raise SyntaxError, - ~r"nofile:1:12: invalid bidirectional formatting character in string: \\u202A. If you want to use such character, use it in its escaped \\u202A form instead", + ~r"nofile:1:12: invalid bidirectional formatting character in string: \\u202A. If you want to use such a character, use it in its escaped \\u202A form instead", '"this is a \u202A"' assert_eval_raise SyntaxError, - ~r"nofile:1:13: invalid bidirectional formatting character in string: \\u202A. If you want to use such character, use it in its escaped \\u202A form instead", + ~r"nofile:1:13: invalid bidirectional formatting character in string: \\u202A. If you want to use such a character, use it in its escaped \\u202A form instead", '"this is a \\\u202A"' + + assert_eval_raise SyntaxError, + ~r"nofile:1:12: invalid confusable character in string: \\u3164. If you want to use such a character, use it in its escaped \\u3164 form instead", + '"this is a \u3164"' + + Code.eval_string("# confusables are ๐–ฟ๐—‚ีธะต in comments, even hangul filler: #{"\u3164"}") end test "function head with guard" do