Skip to content

Commit 8b41315

Browse files
committed
handle arbitrary length _* in e _* suffix
1 parent 73e94f8 commit 8b41315

File tree

1 file changed

+67
-28
lines changed
  • compiler/rustc_lexer/src

1 file changed

+67
-28
lines changed

compiler/rustc_lexer/src/lib.rs

+67-28
Original file line numberDiff line numberDiff line change
@@ -361,17 +361,6 @@ pub fn is_ident(string: &str) -> bool {
361361
}
362362
}
363363

364-
/// Is the character after the 'e' in a number valid for an exponent?
365-
///
366-
/// If not the number will be passed to the parser with a suffix beginning with 'e' rather
367-
/// than an exponent (and will be rejected there).
368-
///
369-
/// The way this function is written means that `1e_` is considered an invalid exponent
370-
/// rather than a number with suffix.
371-
fn is_exponent_second(ch: char) -> bool {
372-
matches!(ch, '0'..='9' | '_' | '+' | '-')
373-
}
374-
375364
impl Cursor<'_> {
376365
/// Parses a token from the input string.
377366
pub fn advance_token(&mut self) -> Token {
@@ -425,9 +414,7 @@ impl Cursor<'_> {
425414

426415
// Numeric literal.
427416
c @ '0'..='9' => {
428-
let literal_kind = self.number(c);
429-
let suffix_start = self.pos_within_token();
430-
self.eat_literal_suffix();
417+
let (literal_kind, suffix_start) = self.number(c);
431418
TokenKind::Literal { kind: literal_kind, suffix_start }
432419
}
433420

@@ -624,7 +611,7 @@ impl Cursor<'_> {
624611
}
625612
}
626613

627-
fn number(&mut self, first_digit: char) -> LiteralKind {
614+
fn number(&mut self, first_digit: char) -> (LiteralKind, u32) {
628615
debug_assert!('0' <= self.prev() && self.prev() <= '9');
629616
let mut base = Base::Decimal;
630617
if first_digit == '0' {
@@ -634,21 +621,27 @@ impl Cursor<'_> {
634621
base = Base::Binary;
635622
self.bump();
636623
if !self.eat_decimal_digits() {
637-
return Int { base, empty_int: true };
624+
let suffix_start = self.pos_within_token();
625+
self.eat_literal_suffix();
626+
return (Int { base, empty_int: true }, suffix_start);
638627
}
639628
}
640629
'o' => {
641630
base = Base::Octal;
642631
self.bump();
643632
if !self.eat_decimal_digits() {
644-
return Int { base, empty_int: true };
633+
let suffix_start = self.pos_within_token();
634+
self.eat_literal_suffix();
635+
return (Int { base, empty_int: true }, suffix_start);
645636
}
646637
}
647638
'x' => {
648639
base = Base::Hexadecimal;
649640
self.bump();
650641
if !self.eat_hexadecimal_digits() {
651-
return Int { base, empty_int: true };
642+
let suffix_start = self.pos_within_token();
643+
self.eat_literal_suffix();
644+
return (Int { base, empty_int: true }, suffix_start);
652645
}
653646
}
654647
// Not a base prefix; consume additional digits.
@@ -660,40 +653,85 @@ impl Cursor<'_> {
660653
'.' | 'e' | 'E' => {}
661654

662655
// Just a 0.
663-
_ => return Int { base, empty_int: false },
656+
_ => {
657+
let suffix_start = self.pos_within_token();
658+
self.eat_literal_suffix();
659+
return (Int { base, empty_int: false }, suffix_start);
660+
}
664661
}
665662
} else {
666663
// No base prefix, parse number in the usual way.
667664
self.eat_decimal_digits();
668665
};
669666

670-
match self.first() {
667+
match (self.first(), self.second()) {
671668
// Don't be greedy if this is actually an
672669
// integer literal followed by field/method access or a range pattern
673670
// (`0..2` and `12.foo()`)
674-
'.' if self.second() != '.' && !is_id_start(self.second()) => {
671+
('.', second) if second != '.' && !is_id_start(second) => {
675672
// might have stuff after the ., and if it does, it needs to start
676673
// with a number
677674
self.bump();
678675
let mut empty_exponent = false;
676+
let mut suffix_start = self.pos_within_token();
679677
if self.first().is_ascii_digit() {
680678
self.eat_decimal_digits();
681-
match self.first() {
682-
'e' | 'E' if is_exponent_second(self.second()) => {
679+
// This will be the start of the suffix if there is no exponent
680+
suffix_start = self.pos_within_token();
681+
match (self.first(), self.second()) {
682+
('e' | 'E', '_') => {
683+
// check if series of `_` is ended by a digit. If yes
684+
// include it in the number as exponent. If no include
685+
// it in suffix.
686+
while matches!(self.first(), '_') {
687+
self.bump();
688+
}
689+
if self.first().is_ascii_digit() {
690+
self.eat_decimal_digits();
691+
suffix_start = self.pos_within_token();
692+
}
693+
}
694+
('e' | 'E', '0'..'9' | '+' | '-') => {
695+
// definitely an exponent
683696
self.bump();
684697
empty_exponent = !self.eat_float_exponent();
698+
suffix_start = self.pos_within_token();
685699
}
686700
_ => (),
687701
}
688702
}
689-
Float { base, empty_exponent }
703+
self.eat_literal_suffix();
704+
(Float { base, empty_exponent }, suffix_start)
705+
}
706+
('e' | 'E', '_') => {
707+
// see above bock for similar apporach
708+
let non_exponent_suffix_start = self.pos_within_token();
709+
while matches!(self.first(), '_') {
710+
self.bump();
711+
}
712+
if self.first().is_ascii_digit() {
713+
self.eat_decimal_digits();
714+
let suffix_start = self.pos_within_token();
715+
self.eat_literal_suffix();
716+
(Float { base, empty_exponent: false }, suffix_start)
717+
} else {
718+
// No digit means suffix, and therefore int
719+
(Int { base, empty_int: false }, non_exponent_suffix_start)
720+
}
690721
}
691-
'e' | 'E' if is_exponent_second(self.second()) => {
722+
('e' | 'E', '0'..='9' | '+' | '-') => {
723+
// definitely an exponent
692724
self.bump();
693725
let empty_exponent = !self.eat_float_exponent();
694-
Float { base, empty_exponent }
726+
let suffix_start = self.pos_within_token();
727+
self.eat_literal_suffix();
728+
(Float { base, empty_exponent }, suffix_start)
729+
}
730+
_ => {
731+
let suffix_start = self.pos_within_token();
732+
self.eat_literal_suffix();
733+
(Int { base, empty_int: false }, suffix_start)
695734
}
696-
_ => Int { base, empty_int: false },
697735
}
698736
}
699737

@@ -942,6 +980,7 @@ impl Cursor<'_> {
942980
}
943981
}
944982

983+
/// Returns `true` if a digit was consumed (rather than just '_')
945984
fn eat_decimal_digits(&mut self) -> bool {
946985
let mut has_digits = false;
947986
loop {
@@ -979,7 +1018,7 @@ impl Cursor<'_> {
9791018
/// Eats the float exponent. Returns true if at least one digit was met,
9801019
/// and returns false otherwise.
9811020
fn eat_float_exponent(&mut self) -> bool {
982-
debug_assert!(self.prev() == 'e' || self.prev() == 'E');
1021+
debug_assert!(matches!(self.prev(), 'e' | 'E'));
9831022
if self.first() == '-' || self.first() == '+' {
9841023
self.bump();
9851024
}

0 commit comments

Comments
 (0)