diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl
index 4774c4bf19ee..3fdc7385d455 100644
--- a/lib/stdlib/src/erl_scan.erl
+++ b/lib/stdlib/src/erl_scan.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2019. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -249,7 +249,7 @@ string_thing(_) -> "string".
-define(WHITE_SPACE(C),
is_integer(C) andalso
(C >= $\000 andalso C =< $\s orelse C >= $\200 andalso C =< $\240)).
--define(DIGIT(C), C >= $0, C =< $9).
+-define(DIGIT(C), C >= $0 andalso C =< $9).
-define(CHAR(C), is_integer(C), C >= 0).
-define(UNICODE(C),
is_integer(C) andalso
@@ -379,7 +379,7 @@ scan1([$\%|Cs], St, Line, Col, Toks) when not St#erl_scan.comment ->
scan1([$\%=C|Cs], St, Line, Col, Toks) ->
scan_comment(Cs, St, Line, Col, Toks, [C]);
scan1([C|Cs], St, Line, Col, Toks) when ?DIGIT(C) ->
- scan_number(Cs, St, Line, Col, Toks, [C]);
+ scan_number(Cs, St, Line, Col, Toks, [C], no_underscore);
scan1("..."++Cs, St, Line, Col, Toks) ->
tok2(Cs, St, Line, Col, Toks, "...", '...', 3);
scan1(".."=Cs, _St, Line, Col, Toks) ->
@@ -938,27 +938,35 @@ escape_char($s) -> $\s; % \s = SPC
escape_char($d) -> $\d; % \d = DEL
escape_char(C) -> C.
-scan_number([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) ->
- scan_number(Cs, St, Line, Col, Toks, [C|Ncs]);
-scan_number([$.,C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) ->
- scan_fraction(Cs, St, Line, Col, Toks, [C,$.|Ncs]);
-scan_number([$.]=Cs, _St, Line, Col, Toks, Ncs) ->
- {more,{Cs,Col,Toks,Line,Ncs,fun scan_number/6}};
-scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0) ->
+scan_number(Cs, St, Line, Col, Toks, {Ncs, Us}) ->
+ scan_number(Cs, St, Line, Col, Toks, Ncs, Us).
+
+scan_number([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) ->
+ scan_number(Cs, St, Line, Col, Toks, [C|Ncs], Us);
+scan_number([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs, _Us) when
+ ?DIGIT(Next) andalso ?DIGIT(Prev) ->
+ scan_number(Cs, St, Line, Col, Toks, [Next,$_|Ncs], with_underscore);
+scan_number([$_]=Cs, _St, Line, Col, Toks, Ncs, Us) ->
+ {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_number/6}};
+scan_number([$.,C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) ->
+ scan_fraction(Cs, St, Line, Col, Toks, [C,$.|Ncs], Us);
+scan_number([$.]=Cs, _St, Line, Col, Toks, Ncs, Us) ->
+ {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_number/6}};
+scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0, Us) ->
Ncs = lists:reverse(Ncs0),
- case catch list_to_integer(Ncs) of
+ case catch list_to_integer(remove_digit_separators(Ncs, Us)) of
B when B >= 2, B =< 1+$Z-$A+10 ->
Bcs = Ncs++[$#],
- scan_based_int(Cs, St, Line, Col, Toks, {B,[],Bcs});
+ scan_based_int(Cs, St, Line, Col, Toks, B, [], Bcs, no_underscore);
B ->
Len = length(Ncs),
scan_error({base,B}, Line, Col, Line, incr_column(Col, Len), Cs0)
end;
-scan_number([]=Cs, _St, Line, Col, Toks, Ncs) ->
- {more,{Cs,Col,Toks,Line,Ncs,fun scan_number/6}};
-scan_number(Cs, St, Line, Col, Toks, Ncs0) ->
+scan_number([]=Cs, _St, Line, Col, Toks, Ncs, Us) ->
+ {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_number/6}};
+scan_number(Cs, St, Line, Col, Toks, Ncs0, Us) ->
Ncs = lists:reverse(Ncs0),
- case catch list_to_integer(Ncs) of
+ case catch list_to_integer(remove_digit_separators(Ncs, Us)) of
N when is_integer(N) ->
tok3(Cs, St, Line, Col, Toks, integer, Ncs, N);
_ ->
@@ -966,20 +974,33 @@ scan_number(Cs, St, Line, Col, Toks, Ncs0) ->
scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs)
end.
-scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs})
- when ?DIGIT(C), C < $0+B ->
- scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs});
-scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs})
- when C >= $A, B > 10, C < $A+B-10 ->
- scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs});
-scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs})
- when C >= $a, B > 10, C < $a+B-10 ->
- scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs});
-scan_based_int([]=Cs, _St, Line, Col, Toks, State) ->
- {more,{Cs,Col,Toks,Line,State,fun scan_based_int/6}};
-scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs0,Bcs}) ->
+remove_digit_separators(Number, no_underscore) ->
+ Number;
+remove_digit_separators(Number, with_underscore) ->
+ [C || C <- Number, C =/= $_].
+
+-define(BASED_DIGIT(C, B),
+ ((?DIGIT(C) andalso C < $0 + B)
+ orelse (C >= $A andalso B > 10 andalso C < $A + B - 10)
+ orelse (C >= $a andalso B > 10 andalso C < $a + B - 10))).
+
+scan_based_int(Cs, St, Line, Col, Toks, {B,NCs,BCs,Us}) ->
+ scan_based_int(Cs, St, Line, Col, Toks, B, NCs, BCs, Us).
+
+scan_based_int([C|Cs], St, Line, Col, Toks, B, Ncs, Bcs, Us) when
+ ?BASED_DIGIT(C, B) ->
+ scan_based_int(Cs, St, Line, Col, Toks, B, [C|Ncs], Bcs, Us);
+scan_based_int([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, Bcs, _Us)
+ when ?BASED_DIGIT(Next, B) andalso ?BASED_DIGIT(Prev, B) ->
+ scan_based_int(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], Bcs,
+ with_underscore);
+scan_based_int([$_]=Cs, _St, Line, Col, Toks, B, NCs, BCs, Us) ->
+ {more,{Cs,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}};
+scan_based_int([]=Cs, _St, Line, Col, Toks, B, NCs, BCs, Us) ->
+ {more,{Cs,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}};
+scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, Bcs, Us) ->
Ncs = lists:reverse(Ncs0),
- case catch erlang:list_to_integer(Ncs, B) of
+ case catch erlang:list_to_integer(remove_digit_separators(Ncs, Us), B) of
N when is_integer(N) ->
tok3(Cs, St, Line, Col, Toks, integer, Bcs++Ncs, N);
_ ->
@@ -988,32 +1009,52 @@ scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs0,Bcs}) ->
scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs)
end.
-scan_fraction([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) ->
- scan_fraction(Cs, St, Line, Col, Toks, [C|Ncs]);
-scan_fraction([E|Cs], St, Line, Col, Toks, Ncs) when E =:= $e; E =:= $E ->
- scan_exponent_sign(Cs, St, Line, Col, Toks, [E|Ncs]);
-scan_fraction([]=Cs, _St, Line, Col, Toks, Ncs) ->
- {more,{Cs,Col,Toks,Line,Ncs,fun scan_fraction/6}};
-scan_fraction(Cs, St, Line, Col, Toks, Ncs) ->
- float_end(Cs, St, Line, Col, Toks, Ncs).
-
-scan_exponent_sign([C|Cs], St, Line, Col, Toks, Ncs) when C =:= $+; C =:= $- ->
- scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs]);
-scan_exponent_sign([]=Cs, _St, Line, Col, Toks, Ncs) ->
- {more,{Cs,Col,Toks,Line,Ncs,fun scan_exponent_sign/6}};
-scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs) ->
- scan_exponent(Cs, St, Line, Col, Toks, Ncs).
-
-scan_exponent([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) ->
- scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs]);
-scan_exponent([]=Cs, _St, Line, Col, Toks, Ncs) ->
- {more,{Cs,Col,Toks,Line,Ncs,fun scan_exponent/6}};
-scan_exponent(Cs, St, Line, Col, Toks, Ncs) ->
- float_end(Cs, St, Line, Col, Toks, Ncs).
-
-float_end(Cs, St, Line, Col, Toks, Ncs0) ->
+scan_fraction(Cs, St, Line, Col, Toks, {Ncs,Us}) ->
+ scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us).
+
+scan_fraction([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) ->
+ scan_fraction(Cs, St, Line, Col, Toks, [C|Ncs], Us);
+scan_fraction([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs, _Us) when
+ ?DIGIT(Next) andalso ?DIGIT(Prev) ->
+ scan_fraction(Cs, St, Line, Col, Toks, [Next,$_|Ncs], with_underscore);
+scan_fraction([$_]=Cs, _St, Line, Col, Toks, Ncs, Us) ->
+ {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_fraction/6}};
+scan_fraction([E|Cs], St, Line, Col, Toks, Ncs, Us) when E =:= $e; E =:= $E ->
+ scan_exponent_sign(Cs, St, Line, Col, Toks, [E|Ncs], Us);
+scan_fraction([]=Cs, _St, Line, Col, Toks, Ncs, Us) ->
+ {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_fraction/6}};
+scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us) ->
+ float_end(Cs, St, Line, Col, Toks, Ncs, Us).
+
+scan_exponent_sign(Cs, St, Line, Col, Toks, {Ncs, Us}) ->
+ scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs, Us).
+
+scan_exponent_sign([C|Cs], St, Line, Col, Toks, Ncs, Us) when
+ C =:= $+; C =:= $- ->
+ scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs], Us);
+scan_exponent_sign([]=Cs, _St, Line, Col, Toks, Ncs, Us) ->
+ {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_exponent_sign/6}};
+scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs, Us) ->
+ scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us).
+
+scan_exponent(Cs, St, Line, Col, Toks, {Ncs, Us}) ->
+ scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us).
+
+scan_exponent([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) ->
+ scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs], Us);
+scan_exponent([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs, _) when
+ ?DIGIT(Next) andalso ?DIGIT(Prev) ->
+ scan_exponent(Cs, St, Line, Col, Toks, [Next,$_|Ncs], with_underscore);
+scan_exponent([$_]=Cs, _St, Line, Col, Toks, Ncs, Us) ->
+ {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_exponent/6}};
+scan_exponent([]=Cs, _St, Line, Col, Toks, Ncs, Us) ->
+ {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_exponent/6}};
+scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us) ->
+ float_end(Cs, St, Line, Col, Toks, Ncs, Us).
+
+float_end(Cs, St, Line, Col, Toks, Ncs0, Us) ->
Ncs = lists:reverse(Ncs0),
- case catch list_to_float(Ncs) of
+ case catch list_to_float(remove_digit_separators(Ncs, Us)) of
F when is_float(F) ->
tok3(Cs, St, Line, Col, Toks, float, Ncs, F);
_ ->
diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl
index aca5b1e54f84..4ae3301ca065 100644
--- a/lib/stdlib/test/erl_scan_SUITE.erl
+++ b/lib/stdlib/test/erl_scan_SUITE.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1998-2017. All Rights Reserved.
+%% Copyright Ericsson AB 1998-2019. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -300,6 +300,32 @@ integers() ->
Ts = [{integer,{1,1},I}],
test_string(S, Ts)
end || S <- [[N] || N <- lists:seq($0, $9)] ++ ["2323","000"] ],
+ UnderscoreSamples =
+ [{"123_456", 123456},
+ {"123_456_789", 123456789},
+ {"1_2", 12}],
+ lists:foreach(
+ fun({S, I}) ->
+ test_string(S, [{integer, {1, 1}, I}])
+ end, UnderscoreSamples),
+ UnderscoreErrors =
+ ["123_",
+ "123__",
+ "123_456_",
+ "123__456",
+ "_123",
+ "__123"],
+ lists:foreach(
+ fun(S) ->
+ case erl_scan:string(S) of
+ {ok, [{integer, _, _}], _} ->
+ error({unexpected_integer, S});
+ _ ->
+ ok
+ end
+ end, UnderscoreErrors),
+ test_string("_123", [{var,{1,1},'_123'}]),
+ test_string("123_", [{integer,{1,1},123},{var,{1,4},'_'}]),
ok.
base_integers() ->
@@ -315,13 +341,19 @@ base_integers() ->
{error,{{1,1},erl_scan,{base,1}},{1,2}} =
erl_scan:string("1#000", {1,1}, []),
+ {error,{1,erl_scan,{base,1}},1} = erl_scan:string("1#000"),
+ {error,{{1,1},erl_scan,{base,1000}},{1,6}} =
+ erl_scan:string("1_000#000", {1,1}, []),
+
test_string("12#bc", [{integer,{1,1},11},{atom,{1,5},c}]),
[begin
Str = BS ++ "#" ++ S,
- {error,{1,erl_scan,{illegal,integer}},1} =
- erl_scan:string(Str)
- end || {BS,S} <- [{"3","3"},{"15","f"}, {"12","c"}] ],
+ E = 2 + length(BS),
+ {error,{{1,1},erl_scan,{illegal,integer}},{1,E}} =
+ erl_scan:string(Str, {1,1}, [])
+ end || {BS,S} <- [{"3","3"},{"15","f"},{"12","c"},
+ {"1_5","f"},{"1_2","c"}] ],
{ok,[{integer,1,239},{'@',1}],1} = erl_scan_string("16#ef@"),
{ok,[{integer,{1,1},239},{'@',{1,6}}],{1,7}} =
@@ -329,6 +361,36 @@ base_integers() ->
{ok,[{integer,{1,1},14},{atom,{1,5},g@}],{1,7}} =
erl_scan_string("16#eg@", {1,1}, []),
+ UnderscoreSamples =
+ [{"16#1234_ABCD_EF56", 16#1234abcdef56},
+ {"2#0011_0101_0011", 2#001101010011},
+ {"1_6#123ABC", 16#123abc},
+ {"1_6#123_ABC", 16#123abc},
+ {"16#abcdef", 16#ABCDEF}],
+ lists:foreach(
+ fun({S, I}) ->
+ test_string(S, [{integer, {1, 1}, I}])
+ end, UnderscoreSamples),
+ UnderscoreErrors =
+ ["16_#123ABC",
+ "16#123_",
+ "16#_123",
+ "16#ABC_",
+ "16#_ABC",
+ "2#_0101",
+ "1__6#ABC",
+ "16#AB__CD"],
+ lists:foreach(
+ fun(S) ->
+ case erl_scan:string(S) of
+ {ok, [{integer, _, _}], _} ->
+ error({unexpected_integer, S});
+ _ ->
+ ok
+ end
+ end, UnderscoreErrors),
+ test_string("16#123_", [{integer,{1,1},291},{var,{1,7},'_'}]),
+ test_string("_16#ABC", [{var,{1,1},'_16'},{'#',{1,4}},{var,{1,5},'ABC'}]),
ok.
floats() ->
@@ -344,12 +406,44 @@ floats() ->
erl_scan:string("1.0e400"),
{error,{{1,1},erl_scan,{illegal,float}},{1,8}} =
erl_scan:string("1.0e400", {1,1}, []),
+ {error,{{1,1},erl_scan,{illegal,float}},{1,9}} =
+ erl_scan:string("1.0e4_00", {1,1}, []),
[begin
{error,{1,erl_scan,{illegal,float}},1} = erl_scan:string(S),
{error,{{1,1},erl_scan,{illegal,float}},{1,_}} =
erl_scan:string(S, {1,1}, [])
end || S <- ["1.14Ea"]],
+ UnderscoreSamples =
+ [{"123_456.789", 123456.789},
+ {"123.456_789", 123.456789},
+ {"1.2_345e10", 1.2345e10},
+ {"1.234e1_06", 1.234e106},
+ {"12_34.56_78e1_6", 1234.5678e16},
+ {"12_34.56_78e-1_8", 1234.5678e-18}],
+ lists:foreach(
+ fun({S, I}) ->
+ test_string(S, [{float, {1, 1}, I}])
+ end, UnderscoreSamples),
+ UnderscoreErrors =
+ ["123_.456",
+ "123._456",
+ "123.456_",
+ "123._",
+ "1._23e10",
+ "1.23e_10",
+ "1.23e10_"],
+ lists:foreach(
+ fun(S) ->
+ case erl_scan:string(S) of
+ {ok, [{float, _, _}], _} ->
+ error({unexpected_float, S});
+ _ ->
+ ok
+ end
+ end, UnderscoreErrors),
+ test_string("123._", [{integer,{1,1},123},{'.',{1,4}},{var,{1,5},'_'}]),
+ test_string("1.23_e10", [{float,{1,1},1.23},{var,{1,5},'_e10'}]),
ok.
dots() ->
diff --git a/lib/tools/emacs/erlang.el b/lib/tools/emacs/erlang.el
index 0b3a2319e2e9..7e16377d294b 100644
--- a/lib/tools/emacs/erlang.el
+++ b/lib/tools/emacs/erlang.el
@@ -1224,8 +1224,8 @@ This must be placed in front of `erlang-font-lock-keywords-vars'.")
1 'font-lock-type-face)
;; Don't highlight numerical constants.
(list (if erlang-regexp-modern-p
- "\\_<[0-9]+#\\([0-9a-zA-Z]+\\)"
- "\\<[0-9]+#\\([0-9a-zA-Z]+\\)")
+ "\\_<\\([0-9]+\\(_[0-9]+\\)*#[0-9a-zA-Z]+\\(_[0-9a-zA-Z]+\\)*\\)"
+ "\\<\\([0-9]+\\(_[0-9]+\\)*#[0-9a-zA-Z]+\\(_[0-9a-zA-Z]+\\)*\\)")
1 nil t)
(list (concat "^-record\\s-*(\\s-*" erlang-atom-regexp)
1 'font-lock-type-face))
diff --git a/system/doc/reference_manual/data_types.xml b/system/doc/reference_manual/data_types.xml
index 93c679357b36..8e3e181303c3 100644
--- a/system/doc/reference_manual/data_types.xml
+++ b/system/doc/reference_manual/data_types.xml
@@ -52,24 +52,33 @@
Integer with the base
Leading zeroes are ignored. Single underscore
Examples:
1> 42. 42 -2> $A. +2> -1_234_567_890. +-1234567890 +3> $A. 65 -3> $\n. +4> $\n. 10 -4> 2#101. +5> 2#101. 5 -5> 16#1f. +6> 16#1f. 31 -6> 2.3. +7> 16#4865_316F_774F_6C64. +5216630098191412324 +8> 2.3. 2.3 -7> 2.3e3. +9> 2.3e3. 2.3e3 -8> 2.3e-3. -0.0023+10> 2.3e-3. +0.0023 +11> 1_234.333_333 +1234.333333 +