Skip to content

Commit e276417

Browse files
move some invalid exponent detection into rustc_session
This PR allows integer and float suffixes that begin with 'e' (or 'E') to pass the lexer. Before, an 'e' that wasn't followed by a valid exponent was rejected by the lexer. This rejection still happens in the parser, but now proc macro authors have the opportunity to interpret the suffix, enabling tokens like `1em` to be used in macros. Diagnoistics are also marginally improved. Exponents that contain arbitrarily long suffixes are handled without read-ahead by tracking the exponent start in case of invalid exponent, so the suffix start is correct. Also adds tests for various edge cases. Co-authored-by: Vadim Petrochenkov <vadim.petrochenkov@gmail.com>
1 parent cb08599 commit e276417

9 files changed

+208
-77
lines changed

compiler/rustc_lexer/src/lib.rs

+71-29
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ pub enum DocStyle {
194194
pub enum LiteralKind {
195195
/// `12_u8`, `0o100`, `0b120i99`, `1f32`.
196196
Int { base: Base, empty_int: bool },
197-
/// `12.34f32`, `1e3`, but not `1f32`.
197+
/// `12.34f32`, `1e3` and `1e+`, but not `1f32` or `1em`.
198198
Float { base: Base, empty_exponent: bool },
199199
/// `'a'`, `'\\'`, `'''`, `';`
200200
Char { terminated: bool },
@@ -409,8 +409,8 @@ impl Cursor<'_> {
409409

410410
// Numeric literal.
411411
c @ '0'..='9' => {
412-
let literal_kind = self.number(c);
413-
let suffix_start = self.pos_within_token();
412+
let (literal_kind, suffix_start) = self.number(c);
413+
let suffix_start = suffix_start.unwrap_or(self.pos_within_token());
414414
self.eat_literal_suffix();
415415
TokenKind::Literal { kind: literal_kind, suffix_start }
416416
}
@@ -606,7 +606,9 @@ impl Cursor<'_> {
606606
}
607607
}
608608

609-
fn number(&mut self, first_digit: char) -> LiteralKind {
609+
/// Parses a number and in `.1` returns the offset of the literal suffix if
610+
/// different from the current position on return.
611+
fn number(&mut self, first_digit: char) -> (LiteralKind, Option<u32>) {
610612
debug_assert!('0' <= self.prev() && self.prev() <= '9');
611613
let mut base = Base::Decimal;
612614
if first_digit == '0' {
@@ -616,21 +618,21 @@ impl Cursor<'_> {
616618
base = Base::Binary;
617619
self.bump();
618620
if !self.eat_decimal_digits() {
619-
return Int { base, empty_int: true };
621+
return (Int { base, empty_int: true }, None);
620622
}
621623
}
622624
'o' => {
623625
base = Base::Octal;
624626
self.bump();
625627
if !self.eat_decimal_digits() {
626-
return Int { base, empty_int: true };
628+
return (Int { base, empty_int: true }, None);
627629
}
628630
}
629631
'x' => {
630632
base = Base::Hexadecimal;
631633
self.bump();
632634
if !self.eat_hexadecimal_digits() {
633-
return Int { base, empty_int: true };
635+
return (Int { base, empty_int: true }, None);
634636
}
635637
}
636638
// Not a base prefix; consume additional digits.
@@ -642,40 +644,79 @@ impl Cursor<'_> {
642644
'.' | 'e' | 'E' => {}
643645

644646
// Just a 0.
645-
_ => return Int { base, empty_int: false },
647+
_ => return (Int { base, empty_int: false }, None),
646648
}
647649
} else {
648650
// No base prefix, parse number in the usual way.
649651
self.eat_decimal_digits();
650652
};
651653

652-
match self.first() {
654+
match (self.first(), self.second()) {
653655
// Don't be greedy if this is actually an
654656
// integer literal followed by field/method access or a range pattern
655657
// (`0..2` and `12.foo()`)
656-
'.' if self.second() != '.' && !is_id_start(self.second()) => {
657-
// might have stuff after the ., and if it does, it needs to start
658-
// with a number
658+
('.', second) if second != '.' && !is_id_start(second) => {
659659
self.bump();
660+
self.eat_decimal_digits();
661+
660662
let mut empty_exponent = false;
661-
if self.first().is_ascii_digit() {
662-
self.eat_decimal_digits();
663-
match self.first() {
664-
'e' | 'E' => {
665-
self.bump();
666-
empty_exponent = !self.eat_float_exponent();
667-
}
668-
_ => (),
663+
let suffix_start = match (self.first(), self.second()) {
664+
('e' | 'E', '_') => self.eat_underscore_exponent(),
665+
('e' | 'E', '0'..='9' | '+' | '-') => {
666+
// Definitely an exponent (which still can be empty).
667+
self.bump();
668+
empty_exponent = !self.eat_float_exponent();
669+
None
669670
}
671+
_ => None,
672+
};
673+
(Float { base, empty_exponent }, suffix_start)
674+
}
675+
('e' | 'E', '_') => {
676+
match self.eat_underscore_exponent() {
677+
Some(suffix_start) => {
678+
// The suffix begins at `e`, meaning the number is an integer.
679+
(Int { base, empty_int: false }, Some(suffix_start))
680+
}
681+
None => (Float { base, empty_exponent: false }, None),
670682
}
671-
Float { base, empty_exponent }
672683
}
673-
'e' | 'E' => {
684+
('e' | 'E', '0'..='9' | '+' | '-') => {
685+
// Definitely an exponent (which still can be empty).
674686
self.bump();
675687
let empty_exponent = !self.eat_float_exponent();
676-
Float { base, empty_exponent }
688+
(Float { base, empty_exponent }, None)
677689
}
678-
_ => Int { base, empty_int: false },
690+
_ => (Int { base, empty_int: false }, None),
691+
}
692+
}
693+
694+
/// Try to find and eat an exponent
695+
///
696+
/// Assumes the first character is `e`/`E` and second is `_`, and consumes
697+
/// `e`/`E` followed by all consecutive `_`s.
698+
///
699+
/// Returns `Some` if no exponent was found. In this case, the suffix is partially
700+
/// consumed, and began at the return value.
701+
fn eat_underscore_exponent(&mut self) -> Option<u32> {
702+
debug_assert!(matches!(self.first(), 'e' | 'E'));
703+
debug_assert!(matches!(self.second(), '_'));
704+
let suffix_start = self.pos_within_token();
705+
706+
// check if series of `_` is ended by a digit. If yes
707+
// include it in the number as exponent. If no include
708+
// it in suffix.
709+
self.bump();
710+
while matches!(self.first(), '_') {
711+
self.bump();
712+
}
713+
// If we find a digit, then the exponential was valid
714+
// so the suffix will start at the cursor as usual.
715+
if self.first().is_ascii_digit() {
716+
self.eat_decimal_digits();
717+
None
718+
} else {
719+
Some(suffix_start)
679720
}
680721
}
681722

@@ -924,6 +965,7 @@ impl Cursor<'_> {
924965
}
925966
}
926967

968+
/// Returns `true` if a digit was consumed (rather than just '_'s).
927969
fn eat_decimal_digits(&mut self) -> bool {
928970
let mut has_digits = false;
929971
loop {
@@ -961,20 +1003,20 @@ impl Cursor<'_> {
9611003
/// Eats the float exponent. Returns true if at least one digit was met,
9621004
/// and returns false otherwise.
9631005
fn eat_float_exponent(&mut self) -> bool {
964-
debug_assert!(self.prev() == 'e' || self.prev() == 'E');
1006+
debug_assert!(matches!(self.prev(), 'e' | 'E'));
9651007
if self.first() == '-' || self.first() == '+' {
9661008
self.bump();
9671009
}
9681010
self.eat_decimal_digits()
9691011
}
9701012

971-
// Eats the suffix of the literal, e.g. "u8".
1013+
/// Eats the suffix of the literal, e.g. "u8".
9721014
fn eat_literal_suffix(&mut self) {
973-
self.eat_identifier();
1015+
self.eat_identifier()
9741016
}
9751017

976-
// Eats the identifier. Note: succeeds on `_`, which isn't a valid
977-
// identifier.
1018+
/// Eats the identifier. Note: succeeds on `_`, which isn't a valid
1019+
/// identifier.
9781020
fn eat_identifier(&mut self) {
9791021
if !is_id_start(self.first()) {
9801022
return;

compiler/rustc_session/messages.ftl

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ session_embed_source_insufficient_dwarf_version = `-Zembed-source=y` requires at
1414
1515
session_embed_source_requires_debug_info = `-Zembed-source=y` requires debug information to be enabled
1616
17+
session_empty_float_exponent = expected at least one digit in exponent
18+
1719
session_expr_parentheses_needed = parentheses are required to parse this as an expression
1820
1921
session_failed_to_create_profiler = failed to create profiler: {$err}

compiler/rustc_session/src/errors.rs

+15
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,10 @@ pub fn report_lit_error(
377377
s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit())
378378
}
379379

380+
fn looks_like_empty_exponent(s: &str) -> bool {
381+
s.len() == 1 && matches!(s.chars().next(), Some('e' | 'E'))
382+
}
383+
380384
// Try to lowercase the prefix if the prefix and suffix are valid.
381385
fn fix_base_capitalisation(prefix: &str, suffix: &str) -> Option<String> {
382386
let mut chars = suffix.chars();
@@ -409,6 +413,8 @@ pub fn report_lit_error(
409413
if looks_like_width_suffix(&['i', 'u'], suf) {
410414
// If it looks like a width, try to be helpful.
411415
dcx.emit_err(InvalidIntLiteralWidth { span, width: suf[1..].into() })
416+
} else if looks_like_empty_exponent(suf) {
417+
dcx.emit_err(EmptyFloatExponent { span })
412418
} else if let Some(fixed) = fix_base_capitalisation(lit.symbol.as_str(), suf) {
413419
dcx.emit_err(InvalidNumLiteralBasePrefix { span, fixed })
414420
} else {
@@ -420,6 +426,8 @@ pub fn report_lit_error(
420426
if looks_like_width_suffix(&['f'], suf) {
421427
// If it looks like a width, try to be helpful.
422428
dcx.emit_err(InvalidFloatLiteralWidth { span, width: suf[1..].to_string() })
429+
} else if looks_like_empty_exponent(suf) {
430+
dcx.emit_err(EmptyFloatExponent { span })
423431
} else {
424432
dcx.emit_err(InvalidFloatLiteralSuffix { span, suffix: suf.to_string() })
425433
}
@@ -489,3 +497,10 @@ pub(crate) struct SoftFloatIgnored;
489497
#[note]
490498
#[note(session_soft_float_deprecated_issue)]
491499
pub(crate) struct SoftFloatDeprecated;
500+
501+
#[derive(Diagnostic)]
502+
#[diag(session_empty_float_exponent)]
503+
pub(crate) struct EmptyFloatExponent {
504+
#[primary_span]
505+
pub span: Span,
506+
}
+36-36
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,3 @@
1-
error: expected at least one digit in exponent
2-
--> $DIR/issue-104390.rs:1:27
3-
|
4-
LL | fn f1() -> impl Sized { & 2E }
5-
| ^^
6-
7-
error: expected at least one digit in exponent
8-
--> $DIR/issue-104390.rs:2:28
9-
|
10-
LL | fn f2() -> impl Sized { && 2E }
11-
| ^^
12-
13-
error: expected at least one digit in exponent
14-
--> $DIR/issue-104390.rs:3:29
15-
|
16-
LL | fn f3() -> impl Sized { &'a 2E }
17-
| ^^
18-
19-
error: expected at least one digit in exponent
20-
--> $DIR/issue-104390.rs:5:34
21-
|
22-
LL | fn f4() -> impl Sized { &'static 2E }
23-
| ^^
24-
25-
error: expected at least one digit in exponent
26-
--> $DIR/issue-104390.rs:7:28
27-
|
28-
LL | fn f5() -> impl Sized { *& 2E }
29-
| ^^
30-
31-
error: expected at least one digit in exponent
32-
--> $DIR/issue-104390.rs:8:29
33-
|
34-
LL | fn f6() -> impl Sized { &'_ 2E }
35-
| ^^
36-
371
error: borrow expressions cannot be annotated with lifetimes
382
--> $DIR/issue-104390.rs:3:25
393
|
@@ -76,5 +40,41 @@ LL - fn f6() -> impl Sized { &'_ 2E }
7640
LL + fn f6() -> impl Sized { &2E }
7741
|
7842

43+
error: expected at least one digit in exponent
44+
--> $DIR/issue-104390.rs:1:27
45+
|
46+
LL | fn f1() -> impl Sized { & 2E }
47+
| ^^
48+
49+
error: expected at least one digit in exponent
50+
--> $DIR/issue-104390.rs:2:28
51+
|
52+
LL | fn f2() -> impl Sized { && 2E }
53+
| ^^
54+
55+
error: expected at least one digit in exponent
56+
--> $DIR/issue-104390.rs:3:29
57+
|
58+
LL | fn f3() -> impl Sized { &'a 2E }
59+
| ^^
60+
61+
error: expected at least one digit in exponent
62+
--> $DIR/issue-104390.rs:5:34
63+
|
64+
LL | fn f4() -> impl Sized { &'static 2E }
65+
| ^^
66+
67+
error: expected at least one digit in exponent
68+
--> $DIR/issue-104390.rs:7:28
69+
|
70+
LL | fn f5() -> impl Sized { *& 2E }
71+
| ^^
72+
73+
error: expected at least one digit in exponent
74+
--> $DIR/issue-104390.rs:8:29
75+
|
76+
LL | fn f6() -> impl Sized { &'_ 2E }
77+
| ^^
78+
7979
error: aborting due to 9 previous errors
8080

tests/ui/consts/issue-91434.stderr

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
error: expected at least one digit in exponent
2-
--> $DIR/issue-91434.rs:2:11
3-
|
4-
LL | [9; [[9E; h]]];
5-
| ^^
6-
71
error[E0425]: cannot find value `h` in this scope
82
--> $DIR/issue-91434.rs:2:15
93
|
104
LL | [9; [[9E; h]]];
115
| ^ not found in this scope
126

7+
error: expected at least one digit in exponent
8+
--> $DIR/issue-91434.rs:2:11
9+
|
10+
LL | [9; [[9E; h]]];
11+
| ^^
12+
1313
error: aborting due to 2 previous errors
1414

1515
For more information about this error, try `rustc --explain E0425`.
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,3 @@
1-
error: expected at least one digit in exponent
2-
--> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:47
3-
|
4-
LL | const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹⋅s⁻²
5-
| ^^^^^^
6-
71
error: unknown start of token: \u{2212}
82
--> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:53
93
|
@@ -16,5 +10,11 @@ LL - const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹
1610
LL + const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e-11; // m³⋅kg⁻¹⋅s⁻²
1711
|
1812

13+
error: expected at least one digit in exponent
14+
--> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:47
15+
|
16+
LL | const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹⋅s⁻²
17+
| ^^^^^^
18+
1919
error: aborting due to 2 previous errors
2020

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
const _A: f64 = 1em;
2+
//~^ ERROR invalid suffix `em` for number literal
3+
const _B: f64 = 1e0m;
4+
//~^ ERROR invalid suffix `m` for float literal
5+
const _C: f64 = 1e_______________0m;
6+
//~^ ERROR invalid suffix `m` for float literal
7+
const _D: f64 = 1e_______________m;
8+
//~^ ERROR invalid suffix `e_______________m` for number literal
9+
10+
// All the above patterns should not generate an error when used in a macro
11+
macro_rules! do_nothing {
12+
($($toks:tt)*) => {};
13+
}
14+
do_nothing!(1em 1e0m 1e_______________0m 1e_______________m);
15+
16+
fn main() {}

0 commit comments

Comments
 (0)