From 23bc42a53bd46984bdca6962b368ce703ce1e45b Mon Sep 17 00:00:00 2001 From: Sergey Prokhorov Date: Thu, 18 Jul 2019 00:22:03 +0200 Subject: [PATCH 1/8] Allow underscores in numeric literals To make long integer literals more readable. Examples: * 123_456_789 * 123_456.789_123 * 16#123_ABC --- lib/stdlib/src/erl_scan.erl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index 4774c4bf19ee..4e5a51ae0709 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -940,6 +940,8 @@ escape_char(C) -> C. scan_number([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> scan_number(Cs, St, Line, Col, Toks, [C|Ncs]); +scan_number([$_|Cs], St, Line, Col, Toks, Ncs) -> + scan_number(Cs, St, Line, Col, Toks, Ncs); scan_number([$.,C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> scan_fraction(Cs, St, Line, Col, Toks, [C,$.|Ncs]); scan_number([$.]=Cs, _St, Line, Col, Toks, Ncs) -> @@ -975,6 +977,8 @@ scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}) scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}) when C >= $a, B > 10, C < $a+B-10 -> scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs}); +scan_based_int([$_|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}) -> + scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs,Bcs}); scan_based_int([]=Cs, _St, Line, Col, Toks, State) -> {more,{Cs,Col,Toks,Line,State,fun scan_based_int/6}}; scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs0,Bcs}) -> @@ -990,6 +994,8 @@ scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs0,Bcs}) -> scan_fraction([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> scan_fraction(Cs, St, Line, Col, Toks, [C|Ncs]); +scan_fraction([$_|Cs], St, Line, Col, Toks, Ncs) -> + scan_fraction(Cs, St, Line, Col, Toks, Ncs); scan_fraction([E|Cs], St, Line, Col, Toks, Ncs) when E =:= $e; E =:= $E -> scan_exponent_sign(Cs, St, Line, Col, Toks, [E|Ncs]); scan_fraction([]=Cs, _St, Line, Col, Toks, Ncs) -> @@ -1006,6 +1012,8 @@ scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs) -> scan_exponent([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs]); +scan_exponent([$_|Cs], St, Line, Col, Toks, Ncs) -> + scan_exponent(Cs, St, Line, Col, Toks, Ncs); scan_exponent([]=Cs, _St, Line, Col, Toks, Ncs) -> {more,{Cs,Col,Toks,Line,Ncs,fun scan_exponent/6}}; scan_exponent(Cs, St, Line, Col, Toks, Ncs) -> From 3fd74ac49fad11ed9e4f78932b36aec44d2d6514 Mon Sep 17 00:00:00 2001 From: Sergey Prokhorov Date: Fri, 19 Jul 2019 01:52:15 +0200 Subject: [PATCH 2/8] Only allow (single) separator between digits --- lib/stdlib/src/erl_scan.erl | 48 +++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index 4e5a51ae0709..35ac83eb1355 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -249,7 +249,7 @@ string_thing(_) -> "string". -define(WHITE_SPACE(C), is_integer(C) andalso (C >= $\000 andalso C =< $\s orelse C >= $\200 andalso C =< $\240)). --define(DIGIT(C), C >= $0, C =< $9). +-define(DIGIT(C), C >= $0 andalso C =< $9). -define(CHAR(C), is_integer(C), C >= 0). -define(UNICODE(C), is_integer(C) andalso @@ -940,15 +940,16 @@ escape_char(C) -> C. scan_number([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> scan_number(Cs, St, Line, Col, Toks, [C|Ncs]); -scan_number([$_|Cs], St, Line, Col, Toks, Ncs) -> - scan_number(Cs, St, Line, Col, Toks, Ncs); +scan_number([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs) when + ?DIGIT(Next) andalso ?DIGIT(Prev) -> + scan_number(Cs, St, Line, Col, Toks, [Next,$_|Ncs]); scan_number([$.,C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> scan_fraction(Cs, St, Line, Col, Toks, [C,$.|Ncs]); scan_number([$.]=Cs, _St, Line, Col, Toks, Ncs) -> {more,{Cs,Col,Toks,Line,Ncs,fun scan_number/6}}; scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0) -> Ncs = lists:reverse(Ncs0), - case catch list_to_integer(Ncs) of + case catch list_to_integer(remove_digit_separators(Ncs)) of B when B >= 2, B =< 1+$Z-$A+10 -> Bcs = Ncs++[$#], scan_based_int(Cs, St, Line, Col, Toks, {B,[],Bcs}); @@ -960,7 +961,7 @@ scan_number([]=Cs, _St, Line, Col, Toks, Ncs) -> {more,{Cs,Col,Toks,Line,Ncs,fun scan_number/6}}; scan_number(Cs, St, Line, Col, Toks, Ncs0) -> Ncs = lists:reverse(Ncs0), - case catch list_to_integer(Ncs) of + case catch list_to_integer(remove_digit_separators(Ncs)) of N when is_integer(N) -> tok3(Cs, St, Line, Col, Toks, integer, Ncs, N); _ -> @@ -968,22 +969,25 @@ scan_number(Cs, St, Line, Col, Toks, Ncs0) -> scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs) end. -scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}) - when ?DIGIT(C), C < $0+B -> - scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs}); -scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}) - when C >= $A, B > 10, C < $A+B-10 -> - scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs}); -scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}) - when C >= $a, B > 10, C < $a+B-10 -> +remove_digit_separators(Number) -> + [C || C <- Number, C =/= $_]. + +-define(BASED_DIGIT(C, B), + ((?DIGIT(C) andalso C < $0 + B) + orelse (C >= $A andalso B > 10 andalso C < $A + B - 10) + orelse (C >= $a andalso B > 10 andalso C < $a + B - 10))). + +scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}) when + ?BASED_DIGIT(C, B) -> scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs}); -scan_based_int([$_|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}) -> - scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs,Bcs}); +scan_based_int([$_,Next|Cs], St, Line, Col, Toks, {B,[Prev|_]=Ncs,Bcs}) when + ?BASED_DIGIT(Next, B) andalso ?BASED_DIGIT(Prev, B) -> + scan_based_int(Cs, St, Line, Col, Toks, {B,[Next,$_|Ncs],Bcs}); scan_based_int([]=Cs, _St, Line, Col, Toks, State) -> {more,{Cs,Col,Toks,Line,State,fun scan_based_int/6}}; scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs0,Bcs}) -> Ncs = lists:reverse(Ncs0), - case catch erlang:list_to_integer(Ncs, B) of + case catch erlang:list_to_integer(remove_digit_separators(Ncs), B) of N when is_integer(N) -> tok3(Cs, St, Line, Col, Toks, integer, Bcs++Ncs, N); _ -> @@ -994,8 +998,9 @@ scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs0,Bcs}) -> scan_fraction([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> scan_fraction(Cs, St, Line, Col, Toks, [C|Ncs]); -scan_fraction([$_|Cs], St, Line, Col, Toks, Ncs) -> - scan_fraction(Cs, St, Line, Col, Toks, Ncs); +scan_fraction([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs) when + ?DIGIT(Next) andalso ?DIGIT(Prev) -> + scan_fraction(Cs, St, Line, Col, Toks, [Next,$_|Ncs]); scan_fraction([E|Cs], St, Line, Col, Toks, Ncs) when E =:= $e; E =:= $E -> scan_exponent_sign(Cs, St, Line, Col, Toks, [E|Ncs]); scan_fraction([]=Cs, _St, Line, Col, Toks, Ncs) -> @@ -1012,8 +1017,9 @@ scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs) -> scan_exponent([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs]); -scan_exponent([$_|Cs], St, Line, Col, Toks, Ncs) -> - scan_exponent(Cs, St, Line, Col, Toks, Ncs); +scan_exponent([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs) when + ?DIGIT(Next) andalso ?DIGIT(Prev) -> + scan_exponent(Cs, St, Line, Col, Toks, [Next,$_|Ncs]); scan_exponent([]=Cs, _St, Line, Col, Toks, Ncs) -> {more,{Cs,Col,Toks,Line,Ncs,fun scan_exponent/6}}; scan_exponent(Cs, St, Line, Col, Toks, Ncs) -> @@ -1021,7 +1027,7 @@ scan_exponent(Cs, St, Line, Col, Toks, Ncs) -> float_end(Cs, St, Line, Col, Toks, Ncs0) -> Ncs = lists:reverse(Ncs0), - case catch list_to_float(Ncs) of + case catch list_to_float(remove_digit_separators(Ncs)) of F when is_float(F) -> tok3(Cs, St, Line, Col, Toks, float, Ncs, F); _ -> From 8a63b9ba5002c8a6fa2f21c60c24cd0324b99a36 Mon Sep 17 00:00:00 2001 From: Sergey Prokhorov Date: Mon, 2 Sep 2019 23:30:05 +0200 Subject: [PATCH 3/8] Add tests --- lib/stdlib/test/erl_scan_SUITE.erl | 80 ++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl index aca5b1e54f84..1eb6656051d1 100644 --- a/lib/stdlib/test/erl_scan_SUITE.erl +++ b/lib/stdlib/test/erl_scan_SUITE.erl @@ -300,6 +300,30 @@ integers() -> Ts = [{integer,{1,1},I}], test_string(S, Ts) end || S <- [[N] || N <- lists:seq($0, $9)] ++ ["2323","000"] ], + UnderscoreSamples = + [{"123_456", 123456}, + {"123_456_789", 123456789}, + {"1_2", 12}], + lists:foreach( + fun({S, I}) -> + {ok, [{integer, 1, I}], _} = erl_scan_string(S) + end, UnderscoreSamples), + UnderscoreErrors = + ["123_", + "123__", + "123_456_", + "123__456", + "_123", + "__123"], + lists:foreach( + fun(S) -> + case erl_scan:string(S) of + {ok, [{integer, _, _}], _} -> + error({unexpected_integer, S}); + _ -> + ok + end + end, UnderscoreErrors), ok. base_integers() -> @@ -329,6 +353,34 @@ base_integers() -> {ok,[{integer,{1,1},14},{atom,{1,5},g@}],{1,7}} = erl_scan_string("16#eg@", {1,1}, []), + UnderscoreSamples = + [{"16#1234_ABCD_EF56", 16#1234abcdef56}, + {"2#0011_0101_0011", 2#001101010011}, + {"1_6#123ABC", 16#123abc}, + {"1_6#123_ABC", 16#123abc}, + {"16#abcdef", 16#ABCDEF}], + lists:foreach( + fun({S, I}) -> + {ok, [{integer, 1, I}], _} = erl_scan_string(S) + end, UnderscoreSamples), + UnderscoreErrors = + ["16_#123ABC", + "16#123_", + "16#_123", + "16#ABC_", + "16#_ABC", + "2#_0101", + "1__6#ABC", + "16#AB__CD"], + lists:foreach( + fun(S) -> + case erl_scan:string(S) of + {ok, [{integer, _, _}], _} -> + error({unexpected_integer, S}); + _ -> + ok + end + end, UnderscoreErrors), ok. floats() -> @@ -350,6 +402,34 @@ floats() -> erl_scan:string(S, {1,1}, []) end || S <- ["1.14Ea"]], + UnderscoreSamples = + [{"123_456.789", 123456.789}, + {"123.456_789", 123.456789}, + {"1.2_345e10", 1.2345e10}, + {"1.234e1_06", 1.234e106}, + {"12_34.56_78e1_6", 1234.5678e16}, + {"12_34.56_78e-1_8", 1234.5678e-18}], + lists:foreach( + fun({S, I}) -> + {ok, [{float, 1, I}], _} = erl_scan_string(S) + end, UnderscoreSamples), + UnderscoreErrors = + ["123_.456", + "123._456", + "123.456_", + "123._", + "1._23e10", + "1.23e_10", + "1.23e10_"], + lists:foreach( + fun(S) -> + case erl_scan:string(S) of + {ok, [{float, _, _}], _} -> + error({unexpected_float, S}); + _ -> + ok + end + end, UnderscoreErrors), ok. dots() -> From d76eec9ac43ba86f7d26748ab12e3afeb40339c2 Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Wed, 25 Sep 2019 10:43:03 +0200 Subject: [PATCH 4/8] stdlib: Minor optimization of the Erlang scanner Remove underscores from numbers only when necessary. --- lib/stdlib/src/erl_scan.erl | 126 ++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 54 deletions(-) diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index 35ac83eb1355..4779f4c50845 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2017. All Rights Reserved. +%% Copyright Ericsson AB 1996-2019. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -379,7 +379,7 @@ scan1([$\%|Cs], St, Line, Col, Toks) when not St#erl_scan.comment -> scan1([$\%=C|Cs], St, Line, Col, Toks) -> scan_comment(Cs, St, Line, Col, Toks, [C]); scan1([C|Cs], St, Line, Col, Toks) when ?DIGIT(C) -> - scan_number(Cs, St, Line, Col, Toks, [C]); + scan_number(Cs, St, Line, Col, Toks, [C], no_underscore); scan1("..."++Cs, St, Line, Col, Toks) -> tok2(Cs, St, Line, Col, Toks, "...", '...', 3); scan1(".."=Cs, _St, Line, Col, Toks) -> @@ -938,30 +938,33 @@ escape_char($s) -> $\s; % \s = SPC escape_char($d) -> $\d; % \d = DEL escape_char(C) -> C. -scan_number([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> - scan_number(Cs, St, Line, Col, Toks, [C|Ncs]); -scan_number([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs) when +scan_number(Cs, St, Line, Col, Toks, {Ncs, Us}) -> + scan_number(Cs, St, Line, Col, Toks, Ncs, Us). + +scan_number([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> + scan_number(Cs, St, Line, Col, Toks, [C|Ncs], Us); +scan_number([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs, _Us) when ?DIGIT(Next) andalso ?DIGIT(Prev) -> - scan_number(Cs, St, Line, Col, Toks, [Next,$_|Ncs]); -scan_number([$.,C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> - scan_fraction(Cs, St, Line, Col, Toks, [C,$.|Ncs]); -scan_number([$.]=Cs, _St, Line, Col, Toks, Ncs) -> - {more,{Cs,Col,Toks,Line,Ncs,fun scan_number/6}}; -scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0) -> + scan_number(Cs, St, Line, Col, Toks, [Next,$_|Ncs], with_underscore); +scan_number([$.,C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> + scan_fraction(Cs, St, Line, Col, Toks, [C,$.|Ncs], Us); +scan_number([$.]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_number/6}}; +scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0, Us) -> Ncs = lists:reverse(Ncs0), - case catch list_to_integer(remove_digit_separators(Ncs)) of + case catch list_to_integer(remove_digit_separators(Ncs, Us)) of B when B >= 2, B =< 1+$Z-$A+10 -> Bcs = Ncs++[$#], - scan_based_int(Cs, St, Line, Col, Toks, {B,[],Bcs}); + scan_based_int(Cs, St, Line, Col, Toks, {B,[],Bcs}, no_underscore); B -> Len = length(Ncs), scan_error({base,B}, Line, Col, Line, incr_column(Col, Len), Cs0) end; -scan_number([]=Cs, _St, Line, Col, Toks, Ncs) -> - {more,{Cs,Col,Toks,Line,Ncs,fun scan_number/6}}; -scan_number(Cs, St, Line, Col, Toks, Ncs0) -> +scan_number([]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_number/6}}; +scan_number(Cs, St, Line, Col, Toks, Ncs0, Us) -> Ncs = lists:reverse(Ncs0), - case catch list_to_integer(remove_digit_separators(Ncs)) of + case catch list_to_integer(remove_digit_separators(Ncs, Us)) of N when is_integer(N) -> tok3(Cs, St, Line, Col, Toks, integer, Ncs, N); _ -> @@ -969,7 +972,9 @@ scan_number(Cs, St, Line, Col, Toks, Ncs0) -> scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs) end. -remove_digit_separators(Number) -> +remove_digit_separators(Number, no_underscore) -> + Number; +remove_digit_separators(Number, with_underscore) -> [C || C <- Number, C =/= $_]. -define(BASED_DIGIT(C, B), @@ -977,17 +982,20 @@ remove_digit_separators(Number) -> orelse (C >= $A andalso B > 10 andalso C < $A + B - 10) orelse (C >= $a andalso B > 10 andalso C < $a + B - 10))). -scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}) when +scan_based_int(Cs, St, Line, Col, Toks, {State,Us}) -> + scan_based_int(Cs, St, Line, Col, Toks, State, Us). + +scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}, Us) when ?BASED_DIGIT(C, B) -> - scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs}); -scan_based_int([$_,Next|Cs], St, Line, Col, Toks, {B,[Prev|_]=Ncs,Bcs}) when + scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs}, Us); +scan_based_int([$_,Next|Cs], St, Line, Col, Toks, {B,[Prev|_]=Ncs,Bcs}, _Us) when ?BASED_DIGIT(Next, B) andalso ?BASED_DIGIT(Prev, B) -> - scan_based_int(Cs, St, Line, Col, Toks, {B,[Next,$_|Ncs],Bcs}); -scan_based_int([]=Cs, _St, Line, Col, Toks, State) -> - {more,{Cs,Col,Toks,Line,State,fun scan_based_int/6}}; -scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs0,Bcs}) -> + scan_based_int(Cs, St, Line, Col, Toks, {B,[Next,$_|Ncs],Bcs}, with_underscore); +scan_based_int([]=Cs, _St, Line, Col, Toks, State, Us) -> + {more,{Cs,Col,Toks,Line,{State,Us},fun scan_based_int/6}}; +scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs0,Bcs}, Us) -> Ncs = lists:reverse(Ncs0), - case catch erlang:list_to_integer(remove_digit_separators(Ncs), B) of + case catch erlang:list_to_integer(remove_digit_separators(Ncs, Us), B) of N when is_integer(N) -> tok3(Cs, St, Line, Col, Toks, integer, Bcs++Ncs, N); _ -> @@ -996,38 +1004,48 @@ scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs0,Bcs}) -> scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs) end. -scan_fraction([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> - scan_fraction(Cs, St, Line, Col, Toks, [C|Ncs]); -scan_fraction([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs) when +scan_fraction(Cs, St, Line, Col, Toks, {Ncs,Us}) -> + scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us). + +scan_fraction([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> + scan_fraction(Cs, St, Line, Col, Toks, [C|Ncs], Us); +scan_fraction([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs, _Us) when ?DIGIT(Next) andalso ?DIGIT(Prev) -> - scan_fraction(Cs, St, Line, Col, Toks, [Next,$_|Ncs]); -scan_fraction([E|Cs], St, Line, Col, Toks, Ncs) when E =:= $e; E =:= $E -> - scan_exponent_sign(Cs, St, Line, Col, Toks, [E|Ncs]); -scan_fraction([]=Cs, _St, Line, Col, Toks, Ncs) -> - {more,{Cs,Col,Toks,Line,Ncs,fun scan_fraction/6}}; -scan_fraction(Cs, St, Line, Col, Toks, Ncs) -> - float_end(Cs, St, Line, Col, Toks, Ncs). - -scan_exponent_sign([C|Cs], St, Line, Col, Toks, Ncs) when C =:= $+; C =:= $- -> - scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs]); -scan_exponent_sign([]=Cs, _St, Line, Col, Toks, Ncs) -> - {more,{Cs,Col,Toks,Line,Ncs,fun scan_exponent_sign/6}}; -scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs) -> - scan_exponent(Cs, St, Line, Col, Toks, Ncs). - -scan_exponent([C|Cs], St, Line, Col, Toks, Ncs) when ?DIGIT(C) -> - scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs]); -scan_exponent([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs) when + scan_fraction(Cs, St, Line, Col, Toks, [Next,$_|Ncs], with_underscore); +scan_fraction([E|Cs], St, Line, Col, Toks, Ncs, Us) when E =:= $e; E =:= $E -> + scan_exponent_sign(Cs, St, Line, Col, Toks, [E|Ncs], Us); +scan_fraction([]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_fraction/6}}; +scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us) -> + float_end(Cs, St, Line, Col, Toks, Ncs, Us). + +scan_exponent_sign(Cs, St, Line, Col, Toks, {Ncs, Us}) -> + scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs, Us). + +scan_exponent_sign([C|Cs], St, Line, Col, Toks, Ncs, Us) when + C =:= $+; C =:= $- -> + scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs], Us); +scan_exponent_sign([]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_exponent_sign/6}}; +scan_exponent_sign(Cs, St, Line, Col, Toks, Ncs, Us) -> + scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us). + +scan_exponent(Cs, St, Line, Col, Toks, {Ncs, Us}) -> + scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us). + +scan_exponent([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> + scan_exponent(Cs, St, Line, Col, Toks, [C|Ncs], Us); +scan_exponent([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs, _) when ?DIGIT(Next) andalso ?DIGIT(Prev) -> - scan_exponent(Cs, St, Line, Col, Toks, [Next,$_|Ncs]); -scan_exponent([]=Cs, _St, Line, Col, Toks, Ncs) -> - {more,{Cs,Col,Toks,Line,Ncs,fun scan_exponent/6}}; -scan_exponent(Cs, St, Line, Col, Toks, Ncs) -> - float_end(Cs, St, Line, Col, Toks, Ncs). + scan_exponent(Cs, St, Line, Col, Toks, [Next,$_|Ncs], with_underscore); +scan_exponent([]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_exponent/6}}; +scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us) -> + float_end(Cs, St, Line, Col, Toks, Ncs, Us). -float_end(Cs, St, Line, Col, Toks, Ncs0) -> +float_end(Cs, St, Line, Col, Toks, Ncs0, Us) -> Ncs = lists:reverse(Ncs0), - case catch list_to_float(remove_digit_separators(Ncs)) of + case catch list_to_float(remove_digit_separators(Ncs, Us)) of F when is_float(F) -> tok3(Cs, St, Line, Col, Toks, float, Ncs, F); _ -> From 943829171d651e0883c2291c1f954e4332ca5df7 Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Wed, 25 Sep 2019 14:46:26 +0200 Subject: [PATCH 5/8] stdlib: Minor optimization of the Erlang scanner Avoid some memory allocation when scanning based integers. (This has nothing to do with the new underscore-in-integers feature.) --- lib/stdlib/src/erl_scan.erl | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index 4779f4c50845..48fd1fb463ad 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -955,7 +955,7 @@ scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0, Us) -> case catch list_to_integer(remove_digit_separators(Ncs, Us)) of B when B >= 2, B =< 1+$Z-$A+10 -> Bcs = Ncs++[$#], - scan_based_int(Cs, St, Line, Col, Toks, {B,[],Bcs}, no_underscore); + scan_based_int(Cs, St, Line, Col, Toks, B, [], Bcs, no_underscore); B -> Len = length(Ncs), scan_error({base,B}, Line, Col, Line, incr_column(Col, Len), Cs0) @@ -982,18 +982,19 @@ remove_digit_separators(Number, with_underscore) -> orelse (C >= $A andalso B > 10 andalso C < $A + B - 10) orelse (C >= $a andalso B > 10 andalso C < $a + B - 10))). -scan_based_int(Cs, St, Line, Col, Toks, {State,Us}) -> - scan_based_int(Cs, St, Line, Col, Toks, State, Us). +scan_based_int(Cs, St, Line, Col, Toks, {B,NCs,BCs,Us}) -> + scan_based_int(Cs, St, Line, Col, Toks, B, NCs, BCs, Us). -scan_based_int([C|Cs], St, Line, Col, Toks, {B,Ncs,Bcs}, Us) when +scan_based_int([C|Cs], St, Line, Col, Toks, B, Ncs, Bcs, Us) when ?BASED_DIGIT(C, B) -> - scan_based_int(Cs, St, Line, Col, Toks, {B,[C|Ncs],Bcs}, Us); -scan_based_int([$_,Next|Cs], St, Line, Col, Toks, {B,[Prev|_]=Ncs,Bcs}, _Us) when - ?BASED_DIGIT(Next, B) andalso ?BASED_DIGIT(Prev, B) -> - scan_based_int(Cs, St, Line, Col, Toks, {B,[Next,$_|Ncs],Bcs}, with_underscore); -scan_based_int([]=Cs, _St, Line, Col, Toks, State, Us) -> - {more,{Cs,Col,Toks,Line,{State,Us},fun scan_based_int/6}}; -scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs0,Bcs}, Us) -> + scan_based_int(Cs, St, Line, Col, Toks, B, [C|Ncs], Bcs, Us); +scan_based_int([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, Bcs, _Us) + when ?BASED_DIGIT(Next, B) andalso ?BASED_DIGIT(Prev, B) -> + scan_based_int(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], Bcs, + with_underscore); +scan_based_int([]=Cs, _St, Line, Col, Toks, B, NCs, BCs, Us) -> + {more,{Cs,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}}; +scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, Bcs, Us) -> Ncs = lists:reverse(Ncs0), case catch erlang:list_to_integer(remove_digit_separators(Ncs, Us), B) of N when is_integer(N) -> From 569b72ffbdbbeb8190a0192aa10625c22aa805ad Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Thu, 26 Sep 2019 13:16:12 +0200 Subject: [PATCH 6/8] stdlib: Correct handling of scanner continuation And add a few more tests. --- lib/stdlib/src/erl_scan.erl | 8 ++++++++ lib/stdlib/test/erl_scan_SUITE.erl | 28 +++++++++++++++++++++------- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index 48fd1fb463ad..3fdc7385d455 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -946,6 +946,8 @@ scan_number([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> scan_number([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs, _Us) when ?DIGIT(Next) andalso ?DIGIT(Prev) -> scan_number(Cs, St, Line, Col, Toks, [Next,$_|Ncs], with_underscore); +scan_number([$_]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_number/6}}; scan_number([$.,C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> scan_fraction(Cs, St, Line, Col, Toks, [C,$.|Ncs], Us); scan_number([$.]=Cs, _St, Line, Col, Toks, Ncs, Us) -> @@ -992,6 +994,8 @@ scan_based_int([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, Bcs, _Us) when ?BASED_DIGIT(Next, B) andalso ?BASED_DIGIT(Prev, B) -> scan_based_int(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], Bcs, with_underscore); +scan_based_int([$_]=Cs, _St, Line, Col, Toks, B, NCs, BCs, Us) -> + {more,{Cs,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}}; scan_based_int([]=Cs, _St, Line, Col, Toks, B, NCs, BCs, Us) -> {more,{Cs,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}}; scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, Bcs, Us) -> @@ -1013,6 +1017,8 @@ scan_fraction([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> scan_fraction([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs, _Us) when ?DIGIT(Next) andalso ?DIGIT(Prev) -> scan_fraction(Cs, St, Line, Col, Toks, [Next,$_|Ncs], with_underscore); +scan_fraction([$_]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_fraction/6}}; scan_fraction([E|Cs], St, Line, Col, Toks, Ncs, Us) when E =:= $e; E =:= $E -> scan_exponent_sign(Cs, St, Line, Col, Toks, [E|Ncs], Us); scan_fraction([]=Cs, _St, Line, Col, Toks, Ncs, Us) -> @@ -1039,6 +1045,8 @@ scan_exponent([C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> scan_exponent([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs, _) when ?DIGIT(Next) andalso ?DIGIT(Prev) -> scan_exponent(Cs, St, Line, Col, Toks, [Next,$_|Ncs], with_underscore); +scan_exponent([$_]=Cs, _St, Line, Col, Toks, Ncs, Us) -> + {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_exponent/6}}; scan_exponent([]=Cs, _St, Line, Col, Toks, Ncs, Us) -> {more,{Cs,Col,Toks,Line,{Ncs,Us},fun scan_exponent/6}}; scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us) -> diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl index 1eb6656051d1..4ae3301ca065 100644 --- a/lib/stdlib/test/erl_scan_SUITE.erl +++ b/lib/stdlib/test/erl_scan_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1998-2017. All Rights Reserved. +%% Copyright Ericsson AB 1998-2019. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -306,7 +306,7 @@ integers() -> {"1_2", 12}], lists:foreach( fun({S, I}) -> - {ok, [{integer, 1, I}], _} = erl_scan_string(S) + test_string(S, [{integer, {1, 1}, I}]) end, UnderscoreSamples), UnderscoreErrors = ["123_", @@ -324,6 +324,8 @@ integers() -> ok end end, UnderscoreErrors), + test_string("_123", [{var,{1,1},'_123'}]), + test_string("123_", [{integer,{1,1},123},{var,{1,4},'_'}]), ok. base_integers() -> @@ -339,13 +341,19 @@ base_integers() -> {error,{{1,1},erl_scan,{base,1}},{1,2}} = erl_scan:string("1#000", {1,1}, []), + {error,{1,erl_scan,{base,1}},1} = erl_scan:string("1#000"), + {error,{{1,1},erl_scan,{base,1000}},{1,6}} = + erl_scan:string("1_000#000", {1,1}, []), + test_string("12#bc", [{integer,{1,1},11},{atom,{1,5},c}]), [begin Str = BS ++ "#" ++ S, - {error,{1,erl_scan,{illegal,integer}},1} = - erl_scan:string(Str) - end || {BS,S} <- [{"3","3"},{"15","f"}, {"12","c"}] ], + E = 2 + length(BS), + {error,{{1,1},erl_scan,{illegal,integer}},{1,E}} = + erl_scan:string(Str, {1,1}, []) + end || {BS,S} <- [{"3","3"},{"15","f"},{"12","c"}, + {"1_5","f"},{"1_2","c"}] ], {ok,[{integer,1,239},{'@',1}],1} = erl_scan_string("16#ef@"), {ok,[{integer,{1,1},239},{'@',{1,6}}],{1,7}} = @@ -361,7 +369,7 @@ base_integers() -> {"16#abcdef", 16#ABCDEF}], lists:foreach( fun({S, I}) -> - {ok, [{integer, 1, I}], _} = erl_scan_string(S) + test_string(S, [{integer, {1, 1}, I}]) end, UnderscoreSamples), UnderscoreErrors = ["16_#123ABC", @@ -381,6 +389,8 @@ base_integers() -> ok end end, UnderscoreErrors), + test_string("16#123_", [{integer,{1,1},291},{var,{1,7},'_'}]), + test_string("_16#ABC", [{var,{1,1},'_16'},{'#',{1,4}},{var,{1,5},'ABC'}]), ok. floats() -> @@ -396,6 +406,8 @@ floats() -> erl_scan:string("1.0e400"), {error,{{1,1},erl_scan,{illegal,float}},{1,8}} = erl_scan:string("1.0e400", {1,1}, []), + {error,{{1,1},erl_scan,{illegal,float}},{1,9}} = + erl_scan:string("1.0e4_00", {1,1}, []), [begin {error,{1,erl_scan,{illegal,float}},1} = erl_scan:string(S), {error,{{1,1},erl_scan,{illegal,float}},{1,_}} = @@ -411,7 +423,7 @@ floats() -> {"12_34.56_78e-1_8", 1234.5678e-18}], lists:foreach( fun({S, I}) -> - {ok, [{float, 1, I}], _} = erl_scan_string(S) + test_string(S, [{float, {1, 1}, I}]) end, UnderscoreSamples), UnderscoreErrors = ["123_.456", @@ -430,6 +442,8 @@ floats() -> ok end end, UnderscoreErrors), + test_string("123._", [{integer,{1,1},123},{'.',{1,4}},{var,{1,5},'_'}]), + test_string("1.23_e10", [{float,{1,1},1.23},{var,{1,5},'_e10'}]), ok. dots() -> From 2a129be8c151f2ae04f46d000d1e0556d918cae3 Mon Sep 17 00:00:00 2001 From: Sergey Prokhorov Date: Thu, 17 Oct 2019 00:21:35 +0200 Subject: [PATCH 7/8] Add underscore separator description to numeric literal docs --- system/doc/reference_manual/data_types.xml | 25 +++++++++++++++------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/system/doc/reference_manual/data_types.xml b/system/doc/reference_manual/data_types.xml index 93c679357b36..8e3e181303c3 100644 --- a/system/doc/reference_manual/data_types.xml +++ b/system/doc/reference_manual/data_types.xml @@ -52,24 +52,33 @@ Integer with the base base, that must be an integer in the range 2..36. +

Leading zeroes are ignored. Single underscore _ can be inserted + between digits as a visual separator.

Examples:

 1> 42.
 42
-2> $A.
+2> -1_234_567_890.
+-1234567890
+3> $A.
 65
-3> $\n.
+4> $\n.
 10
-4> 2#101.
+5> 2#101.
 5
-5> 16#1f.
+6> 16#1f.
 31
-6> 2.3.
+7> 16#4865_316F_774F_6C64.
+5216630098191412324
+8> 2.3.
 2.3
-7> 2.3e3.
+9> 2.3e3.
 2.3e3
-8> 2.3e-3.
-0.0023
+10> 2.3e-3. +0.0023 +11> 1_234.333_333 +1234.333333 +
From 9d545d4a78d887b662448950cc260674228071ae Mon Sep 17 00:00:00 2001 From: Sergey Prokhorov Date: Thu, 17 Oct 2019 02:18:47 +0200 Subject: [PATCH 8/8] Add underscores in numeric literals support to emacs mode --- lib/tools/emacs/erlang.el | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/tools/emacs/erlang.el b/lib/tools/emacs/erlang.el index 0b3a2319e2e9..7e16377d294b 100644 --- a/lib/tools/emacs/erlang.el +++ b/lib/tools/emacs/erlang.el @@ -1224,8 +1224,8 @@ This must be placed in front of `erlang-font-lock-keywords-vars'.") 1 'font-lock-type-face) ;; Don't highlight numerical constants. (list (if erlang-regexp-modern-p - "\\_<[0-9]+#\\([0-9a-zA-Z]+\\)" - "\\<[0-9]+#\\([0-9a-zA-Z]+\\)") + "\\_<\\([0-9]+\\(_[0-9]+\\)*#[0-9a-zA-Z]+\\(_[0-9a-zA-Z]+\\)*\\)" + "\\<\\([0-9]+\\(_[0-9]+\\)*#[0-9a-zA-Z]+\\(_[0-9a-zA-Z]+\\)*\\)") 1 nil t) (list (concat "^-record\\s-*(\\s-*" erlang-atom-regexp) 1 'font-lock-type-face))