diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index 4774c4bf19ee..3fdc7385d455 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2017. All Rights Reserved. +%% Copyright Ericsson AB 1996-2019. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -249,7 +249,7 @@ string_thing(_) -> "string". -define(WHITE_SPACE(C), is_integer(C) andalso (C >= $\000 andalso C =< $\s orelse C >= $\200 andalso C =< $\240)). --define(DIGIT(C), C >= $0, C =< $9). +-define(DIGIT(C), C >= $0 andalso C =< $9). -define(CHAR(C), is_integer(C), C >= 0). -define(UNICODE(C), is_integer(C) andalso @@ -379,7 +379,7 @@ scan1([$\%|Cs], St, Line, Col, Toks) when not St#erl_scan.comment -> scan1([$\%=C|Cs], St, Line, Col, Toks) -> scan_comment(Cs, St, Line, Col, Toks, [C]); scan1([C|Cs], St, Line, Col, Toks) when ?DIGIT(C) -> - scan_number(Cs, St, Line, Col, Toks, [C]); + scan_number(Cs, St, Line, Col, Toks, [C], no_underscore); scan1("..."++Cs, St, Line, Col, Toks) -> tok2(Cs, St, Line, Col, Toks, "...", '...', 3); scan1(".."=Cs, _St, Line, Col, Toks) -> @@ -938,27 +938,35 @@ escape_char($s) -> $\s; % \s = SPC escape_char($d) -> $\d; % \d = DEL escape_char(C) -> C. -scan_number([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> - scan_number(Cs, St, Line, Col, Toks, [C|Ncs]); -scan_number([$.,C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> - scan_fraction(Cs, St, Line, Col, Toks, [C,$.|Ncs]); -scan_number([$.]=Cs, _St, Line, Col, Toks, Ncs) -> - {more,{Cs,Col,Toks,Line,Ncs,fun scan_number/6}}; -scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0) -> +scan_number(Cs, St, Line, Col, Toks, {Ncs, Us}) -> + scan_number(Cs, St, Line, Col, Toks, Ncs, Us). + +scan_number([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> + scan_number(Cs, St, Line, Col, Toks, [C|Ncs], Us); +scan_number([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs, _Us) when + ?DIGIT(Next) andalso ?DIGIT(Prev) -> + scan_number(Cs, St, Line, Col, Toks, [Next,$_|Ncs], with_underscore); +scan_number([$_]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_number/6}}; +scan_number([$.,C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> + scan_fraction(Cs, St, Line, Col, Toks, [C,$.|Ncs], Us); +scan_number([$.]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_number/6}}; +scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0, Us) -> Ncs = lists:reverse(Ncs0), - case catch list_to_integer(Ncs) of + case catch list_to_integer(remove_digit_separators(Ncs, Us)) of B when B >= 2, B =< 1+$Z-$A+10 -> Bcs = Ncs++[$#], - scan_based_int(Cs, St, Line, Col, Toks, {B,[],Bcs}); + scan_based_int(Cs, St, Line, Col, Toks, B, [], Bcs, no_underscore); B -> Len = length(Ncs), scan_error({base,B}, Line, Col, Line, incr_column(Col, Len), Cs0) end; -scan_number([]=Cs, _St, Line, Col, Toks, Ncs) -> - {more,{Cs,Col,Toks,Line,Ncs,fun scan_number/6}}; -scan_number(Cs, St, Line, Col, Toks, Ncs0) -> +scan_number([]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_number/6}}; +scan_number(Cs, St, Line, Col, Toks, Ncs0, Us) -> Ncs = lists:reverse(Ncs0), - case catch list_to_integer(Ncs) of + case catch list_to_integer(remove_digit_separators(Ncs, Us)) of N when is_integer(N) -> tok3(Cs, St, Line, Col, Toks, integer, Ncs, N); _ -> @@ -966,20 +974,33 @@ scan_number(Cs, St, Line, Col, Toks, Ncs0) -> scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs) end. -scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}) - when ?DIGIT(C), C < $0+B -> - scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs}); -scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}) - when C >= $A, B > 10, C < $A+B-10 -> - scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs}); -scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}) - when C >= $a, B > 10, C < $a+B-10 -> - scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs}); -scan_based_int([]=Cs, _St, Line, Col, Toks, State) -> - {more,{Cs,Col,Toks,Line,State,fun scan_based_int/6}}; -scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs0,Bcs}) -> +remove_digit_separators(Number, no_underscore) -> + Number; +remove_digit_separators(Number, with_underscore) -> + [C || C <- Number, C =/= $_]. + +-define(BASED_DIGIT(C, B), + ((?DIGIT(C) andalso C < $0 + B) + orelse (C >= $A andalso B > 10 andalso C < $A + B - 10) + orelse (C >= $a andalso B > 10 andalso C < $a + B - 10))). + +scan_based_int(Cs, St, Line, Col, Toks, {B,NCs,BCs,Us}) -> + scan_based_int(Cs, St, Line, Col, Toks, B, NCs, BCs, Us). + +scan_based_int([C|Cs], St, Line, Col, Toks, B, Ncs, Bcs, Us) when + ?BASED_DIGIT(C, B) -> + scan_based_int(Cs, St, Line, Col, Toks, B, [C|Ncs], Bcs, Us); +scan_based_int([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, Bcs, _Us) + when ?BASED_DIGIT(Next, B) andalso ?BASED_DIGIT(Prev, B) -> + scan_based_int(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], Bcs, + with_underscore); +scan_based_int([$_]=Cs, _St, Line, Col, Toks, B, NCs, BCs, Us) -> + {more,{Cs,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}}; +scan_based_int([]=Cs, _St, Line, Col, Toks, B, NCs, BCs, Us) -> + {more,{Cs,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}}; +scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, Bcs, Us) -> Ncs = lists:reverse(Ncs0), - case catch erlang:list_to_integer(Ncs, B) of + case catch erlang:list_to_integer(remove_digit_separators(Ncs, Us), B) of N when is_integer(N) -> tok3(Cs, St, Line, Col, Toks, integer, Bcs++Ncs, N); _ -> @@ -988,32 +1009,52 @@ scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs0,Bcs}) -> scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs) end. -scan_fraction([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> - scan_fraction(Cs, St, Line, Col, Toks, [C|Ncs]); -scan_fraction([E|Cs], St, Line, Col, Toks, Ncs) when E =:= $e; E =:= $E -> - scan_exponent_sign(Cs, St, Line, Col, Toks, [E|Ncs]); -scan_fraction([]=Cs, _St, Line, Col, Toks, Ncs) -> - {more,{Cs,Col,Toks,Line,Ncs,fun scan_fraction/6}}; -scan_fraction(Cs, St, Line, Col, Toks, Ncs) -> - float_end(Cs, St, Line, Col, Toks, Ncs). - -scan_exponent_sign([C|Cs], St, Line, Col, Toks, Ncs) when C =:= $+; C =:= $- -> - scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs]); -scan_exponent_sign([]=Cs, _St, Line, Col, Toks, Ncs) -> - {more,{Cs,Col,Toks,Line,Ncs,fun scan_exponent_sign/6}}; -scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs) -> - scan_exponent(Cs, St, Line, Col, Toks, Ncs). - -scan_exponent([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> - scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs]); -scan_exponent([]=Cs, _St, Line, Col, Toks, Ncs) -> - {more,{Cs,Col,Toks,Line,Ncs,fun scan_exponent/6}}; -scan_exponent(Cs, St, Line, Col, Toks, Ncs) -> - float_end(Cs, St, Line, Col, Toks, Ncs). - -float_end(Cs, St, Line, Col, Toks, Ncs0) -> +scan_fraction(Cs, St, Line, Col, Toks, {Ncs,Us}) -> + scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us). + +scan_fraction([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> + scan_fraction(Cs, St, Line, Col, Toks, [C|Ncs], Us); +scan_fraction([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs, _Us) when + ?DIGIT(Next) andalso ?DIGIT(Prev) -> + scan_fraction(Cs, St, Line, Col, Toks, [Next,$_|Ncs], with_underscore); +scan_fraction([$_]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_fraction/6}}; +scan_fraction([E|Cs], St, Line, Col, Toks, Ncs, Us) when E =:= $e; E =:= $E -> + scan_exponent_sign(Cs, St, Line, Col, Toks, [E|Ncs], Us); +scan_fraction([]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_fraction/6}}; +scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us) -> + float_end(Cs, St, Line, Col, Toks, Ncs, Us). + +scan_exponent_sign(Cs, St, Line, Col, Toks, {Ncs, Us}) -> + scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs, Us). + +scan_exponent_sign([C|Cs], St, Line, Col, Toks, Ncs, Us) when + C =:= $+; C =:= $- -> + scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs], Us); +scan_exponent_sign([]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_exponent_sign/6}}; +scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs, Us) -> + scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us). + +scan_exponent(Cs, St, Line, Col, Toks, {Ncs, Us}) -> + scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us). + +scan_exponent([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> + scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs], Us); +scan_exponent([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs, _) when + ?DIGIT(Next) andalso ?DIGIT(Prev) -> + scan_exponent(Cs, St, Line, Col, Toks, [Next,$_|Ncs], with_underscore); +scan_exponent([$_]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_exponent/6}}; +scan_exponent([]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_exponent/6}}; +scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us) -> + float_end(Cs, St, Line, Col, Toks, Ncs, Us). + +float_end(Cs, St, Line, Col, Toks, Ncs0, Us) -> Ncs = lists:reverse(Ncs0), - case catch list_to_float(Ncs) of + case catch list_to_float(remove_digit_separators(Ncs, Us)) of F when is_float(F) -> tok3(Cs, St, Line, Col, Toks, float, Ncs, F); _ -> diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl index aca5b1e54f84..4ae3301ca065 100644 --- a/lib/stdlib/test/erl_scan_SUITE.erl +++ b/lib/stdlib/test/erl_scan_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1998-2017. All Rights Reserved. +%% Copyright Ericsson AB 1998-2019. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -300,6 +300,32 @@ integers() -> Ts = [{integer,{1,1},I}], test_string(S, Ts) end || S <- [[N] || N <- lists:seq($0, $9)] ++ ["2323","000"] ], + UnderscoreSamples = + [{"123_456", 123456}, + {"123_456_789", 123456789}, + {"1_2", 12}], + lists:foreach( + fun({S, I}) -> + test_string(S, [{integer, {1, 1}, I}]) + end, UnderscoreSamples), + UnderscoreErrors = + ["123_", + "123__", + "123_456_", + "123__456", + "_123", + "__123"], + lists:foreach( + fun(S) -> + case erl_scan:string(S) of + {ok, [{integer, _, _}], _} -> + error({unexpected_integer, S}); + _ -> + ok + end + end, UnderscoreErrors), + test_string("_123", [{var,{1,1},'_123'}]), + test_string("123_", [{integer,{1,1},123},{var,{1,4},'_'}]), ok. base_integers() -> @@ -315,13 +341,19 @@ base_integers() -> {error,{{1,1},erl_scan,{base,1}},{1,2}} = erl_scan:string("1#000", {1,1}, []), + {error,{1,erl_scan,{base,1}},1} = erl_scan:string("1#000"), + {error,{{1,1},erl_scan,{base,1000}},{1,6}} = + erl_scan:string("1_000#000", {1,1}, []), + test_string("12#bc", [{integer,{1,1},11},{atom,{1,5},c}]), [begin Str = BS ++ "#" ++ S, - {error,{1,erl_scan,{illegal,integer}},1} = - erl_scan:string(Str) - end || {BS,S} <- [{"3","3"},{"15","f"}, {"12","c"}] ], + E = 2 + length(BS), + {error,{{1,1},erl_scan,{illegal,integer}},{1,E}} = + erl_scan:string(Str, {1,1}, []) + end || {BS,S} <- [{"3","3"},{"15","f"},{"12","c"}, + {"1_5","f"},{"1_2","c"}] ], {ok,[{integer,1,239},{'@',1}],1} = erl_scan_string("16#ef@"), {ok,[{integer,{1,1},239},{'@',{1,6}}],{1,7}} = @@ -329,6 +361,36 @@ base_integers() -> {ok,[{integer,{1,1},14},{atom,{1,5},g@}],{1,7}} = erl_scan_string("16#eg@", {1,1}, []), + UnderscoreSamples = + [{"16#1234_ABCD_EF56", 16#1234abcdef56}, + {"2#0011_0101_0011", 2#001101010011}, + {"1_6#123ABC", 16#123abc}, + {"1_6#123_ABC", 16#123abc}, + {"16#abcdef", 16#ABCDEF}], + lists:foreach( + fun({S, I}) -> + test_string(S, [{integer, {1, 1}, I}]) + end, UnderscoreSamples), + UnderscoreErrors = + ["16_#123ABC", + "16#123_", + "16#_123", + "16#ABC_", + "16#_ABC", + "2#_0101", + "1__6#ABC", + "16#AB__CD"], + lists:foreach( + fun(S) -> + case erl_scan:string(S) of + {ok, [{integer, _, _}], _} -> + error({unexpected_integer, S}); + _ -> + ok + end + end, UnderscoreErrors), + test_string("16#123_", [{integer,{1,1},291},{var,{1,7},'_'}]), + test_string("_16#ABC", [{var,{1,1},'_16'},{'#',{1,4}},{var,{1,5},'ABC'}]), ok. floats() -> @@ -344,12 +406,44 @@ floats() -> erl_scan:string("1.0e400"), {error,{{1,1},erl_scan,{illegal,float}},{1,8}} = erl_scan:string("1.0e400", {1,1}, []), + {error,{{1,1},erl_scan,{illegal,float}},{1,9}} = + erl_scan:string("1.0e4_00", {1,1}, []), [begin {error,{1,erl_scan,{illegal,float}},1} = erl_scan:string(S), {error,{{1,1},erl_scan,{illegal,float}},{1,_}} = erl_scan:string(S, {1,1}, []) end || S <- ["1.14Ea"]], + UnderscoreSamples = + [{"123_456.789", 123456.789}, + {"123.456_789", 123.456789}, + {"1.2_345e10", 1.2345e10}, + {"1.234e1_06", 1.234e106}, + {"12_34.56_78e1_6", 1234.5678e16}, + {"12_34.56_78e-1_8", 1234.5678e-18}], + lists:foreach( + fun({S, I}) -> + test_string(S, [{float, {1, 1}, I}]) + end, UnderscoreSamples), + UnderscoreErrors = + ["123_.456", + "123._456", + "123.456_", + "123._", + "1._23e10", + "1.23e_10", + "1.23e10_"], + lists:foreach( + fun(S) -> + case erl_scan:string(S) of + {ok, [{float, _, _}], _} -> + error({unexpected_float, S}); + _ -> + ok + end + end, UnderscoreErrors), + test_string("123._", [{integer,{1,1},123},{'.',{1,4}},{var,{1,5},'_'}]), + test_string("1.23_e10", [{float,{1,1},1.23},{var,{1,5},'_e10'}]), ok. dots() -> diff --git a/lib/tools/emacs/erlang.el b/lib/tools/emacs/erlang.el index 0b3a2319e2e9..7e16377d294b 100644 --- a/lib/tools/emacs/erlang.el +++ b/lib/tools/emacs/erlang.el @@ -1224,8 +1224,8 @@ This must be placed in front of `erlang-font-lock-keywords-vars'.") 1 'font-lock-type-face) ;; Don't highlight numerical constants. (list (if erlang-regexp-modern-p - "\\_<[0-9]+#\\([0-9a-zA-Z]+\\)" - "\\<[0-9]+#\\([0-9a-zA-Z]+\\)") + "\\_<\\([0-9]+\\(_[0-9]+\\)*#[0-9a-zA-Z]+\\(_[0-9a-zA-Z]+\\)*\\)" + "\\<\\([0-9]+\\(_[0-9]+\\)*#[0-9a-zA-Z]+\\(_[0-9a-zA-Z]+\\)*\\)") 1 nil t) (list (concat "^-record\\s-*(\\s-*" erlang-atom-regexp) 1 'font-lock-type-face)) diff --git a/system/doc/reference_manual/data_types.xml b/system/doc/reference_manual/data_types.xml index 93c679357b36..8e3e181303c3 100644 --- a/system/doc/reference_manual/data_types.xml +++ b/system/doc/reference_manual/data_types.xml @@ -52,24 +52,33 @@ Integer with the base base, that must be an integer in the range 2..36. +

Leading zeroes are ignored. Single underscore _ can be inserted + between digits as a visual separator.

Examples:

 1> 42.
 42
-2> $A.
+2> -1_234_567_890.
+-1234567890
+3> $A.
 65
-3> $\n.
+4> $\n.
 10
-4> 2#101.
+5> 2#101.
 5
-5> 16#1f.
+6> 16#1f.
 31
-6> 2.3.
+7> 16#4865_316F_774F_6C64.
+5216630098191412324
+8> 2.3.
 2.3
-7> 2.3e3.
+9> 2.3e3.
 2.3e3
-8> 2.3e-3.
-0.0023
+10> 2.3e-3. +0.0023 +11> 1_234.333_333 +1234.333333 +