Skip to content

Commit 3828fa2

Browse files
authored
Rollup merge of #69384 - petrochenkov:nounnorm, r=Centril
parser: `token` -> `normalized_token`, `nonnormalized_token` -> `token` So, after #69006, its follow-ups and an attempt to remove `Parser::prev_span` I came to the conclusion that the unnormalized token and its span is what you want in most cases, so it should be default. Normalization only makes difference in few cases where we are checking against `token::Ident` or `token::Lifetime` specifically. This PR uses `normalized_token` for those cases. Using normalization explicitly means that people writing code should remember about `NtIdent` and `NtLifetime` in general. (That is alleviated by the fact that `token.ident()` and `fn parse_ident_*` are already written.) Remembering about `NtIdent`, was, however, already the case, kind of, because the implicit normalization was performed only for the current/previous token, but not for things like `look_ahead`. As a result, most of token classification methods in `token.rs` already take `NtIdent` into account (this PR fixes a few pre-existing minor mistakes though). The next step is removing `normalized(_prev)_token` entirely and replacing it with `token.ident()` (mostly) and `token.normalize()` (occasionally). I want to make it a separate PR for that and run it though perf. `normalized_token` filled on every bump has both a potential to avoid repeated normalization, and to do unnecessary work in advance (it probably doesn't matter anyway, the normalization is very cheap). r? @Centril
2 parents 76fe449 + b2605c1 commit 3828fa2

File tree

16 files changed

+132
-125
lines changed

16 files changed

+132
-125
lines changed

src/librustc_builtin_macros/format.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ fn parse_args<'a>(
158158
} // accept trailing commas
159159
if p.token.is_ident() && p.look_ahead(1, |t| *t == token::Eq) {
160160
named = true;
161-
let name = if let token::Ident(name, _) = p.token.kind {
161+
let name = if let token::Ident(name, _) = p.normalized_token.kind {
162162
p.bump();
163163
name
164164
} else {

src/librustc_expand/mbe/macro_parser.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,12 @@ pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> Na
753753
fn get_macro_name(token: &Token) -> Option<(Name, bool)> {
754754
match token.kind {
755755
token::Ident(name, is_raw) if name != kw::Underscore => Some((name, is_raw)),
756+
token::Interpolated(ref nt) => match **nt {
757+
token::NtIdent(ident, is_raw) if ident.name != kw::Underscore => {
758+
Some((ident.name, is_raw))
759+
}
760+
_ => None,
761+
},
756762
_ => None,
757763
}
758764
}
@@ -883,9 +889,8 @@ fn parse_nt_inner<'a>(p: &mut Parser<'a>, sp: Span, name: Symbol) -> PResult<'a,
883889
// this could be handled like a token, since it is one
884890
sym::ident => {
885891
if let Some((name, is_raw)) = get_macro_name(&p.token) {
886-
let span = p.token.span;
887892
p.bump();
888-
token::NtIdent(Ident::new(name, span), is_raw)
893+
token::NtIdent(Ident::new(name, p.normalized_prev_token.span), is_raw)
889894
} else {
890895
let token_str = pprust::token_to_string(&p.token);
891896
let msg = &format!("expected ident, found {}", &token_str);

src/librustc_parse/parser/diagnostics.rs

+6-7
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use syntax::ast::{
1313
};
1414
use syntax::ast::{AttrVec, ItemKind, Mutability, Pat, PatKind, PathSegment, QSelf, Ty, TyKind};
1515
use syntax::ptr::P;
16-
use syntax::token::{self, token_can_begin_expr, TokenKind};
16+
use syntax::token::{self, TokenKind};
1717
use syntax::util::parser::AssocOp;
1818

1919
use log::{debug, trace};
@@ -192,12 +192,12 @@ impl<'a> Parser<'a> {
192192
TokenKind::CloseDelim(token::DelimToken::Brace),
193193
TokenKind::CloseDelim(token::DelimToken::Paren),
194194
];
195-
if let token::Ident(name, false) = self.token.kind {
196-
if Ident::new(name, self.token.span).is_raw_guess()
195+
if let token::Ident(name, false) = self.normalized_token.kind {
196+
if Ident::new(name, self.normalized_token.span).is_raw_guess()
197197
&& self.look_ahead(1, |t| valid_follow.contains(&t.kind))
198198
{
199199
err.span_suggestion(
200-
self.token.span,
200+
self.normalized_token.span,
201201
"you can escape reserved keywords to use them as identifiers",
202202
format!("r#{}", name),
203203
Applicability::MaybeIncorrect,
@@ -900,8 +900,7 @@ impl<'a> Parser<'a> {
900900
} else if !sm.is_multiline(self.prev_span.until(self.token.span)) {
901901
// The current token is in the same line as the prior token, not recoverable.
902902
} else if self.look_ahead(1, |t| {
903-
t == &token::CloseDelim(token::Brace)
904-
|| token_can_begin_expr(t) && t.kind != token::Colon
903+
t == &token::CloseDelim(token::Brace) || t.can_begin_expr() && t.kind != token::Colon
905904
}) && [token::Comma, token::Colon].contains(&self.token.kind)
906905
{
907906
// Likely typo: `,` → `;` or `:` → `;`. This is triggered if the current token is
@@ -919,7 +918,7 @@ impl<'a> Parser<'a> {
919918
} else if self.look_ahead(0, |t| {
920919
t == &token::CloseDelim(token::Brace)
921920
|| (
922-
token_can_begin_expr(t) && t != &token::Semi && t != &token::Pound
921+
t.can_begin_expr() && t != &token::Semi && t != &token::Pound
923922
// Avoid triggering with too many trailing `#` in raw string.
924923
)
925924
}) {

src/librustc_parse/parser/expr.rs

+18-21
Original file line numberDiff line numberDiff line change
@@ -97,15 +97,14 @@ impl<'a> Parser<'a> {
9797
fn parse_expr_catch_underscore(&mut self) -> PResult<'a, P<Expr>> {
9898
match self.parse_expr() {
9999
Ok(expr) => Ok(expr),
100-
Err(mut err) => match self.token.kind {
100+
Err(mut err) => match self.normalized_token.kind {
101101
token::Ident(name, false)
102102
if name == kw::Underscore && self.look_ahead(1, |t| t == &token::Comma) =>
103103
{
104104
// Special-case handling of `foo(_, _, _)`
105105
err.emit();
106-
let sp = self.token.span;
107106
self.bump();
108-
Ok(self.mk_expr(sp, ExprKind::Err, AttrVec::new()))
107+
Ok(self.mk_expr(self.prev_token.span, ExprKind::Err, AttrVec::new()))
109108
}
110109
_ => Err(err),
111110
},
@@ -166,7 +165,7 @@ impl<'a> Parser<'a> {
166165
while let Some(op) = self.check_assoc_op() {
167166
// Adjust the span for interpolated LHS to point to the `$lhs` token
168167
// and not to what it refers to.
169-
let lhs_span = match self.unnormalized_prev_token.kind {
168+
let lhs_span = match self.prev_token.kind {
170169
TokenKind::Interpolated(..) => self.prev_span,
171170
_ => lhs.span,
172171
};
@@ -333,7 +332,7 @@ impl<'a> Parser<'a> {
333332
/// Also performs recovery for `and` / `or` which are mistaken for `&&` and `||` respectively.
334333
fn check_assoc_op(&self) -> Option<Spanned<AssocOp>> {
335334
Some(Spanned {
336-
node: match (AssocOp::from_token(&self.token), &self.token.kind) {
335+
node: match (AssocOp::from_token(&self.token), &self.normalized_token.kind) {
337336
(Some(op), _) => op,
338337
(None, token::Ident(sym::and, false)) => {
339338
self.error_bad_logical_op("and", "&&", "conjunction");
@@ -345,7 +344,7 @@ impl<'a> Parser<'a> {
345344
}
346345
_ => return None,
347346
},
348-
span: self.token.span,
347+
span: self.normalized_token.span,
349348
})
350349
}
351350

@@ -437,7 +436,7 @@ impl<'a> Parser<'a> {
437436
let attrs = self.parse_or_use_outer_attributes(attrs)?;
438437
let lo = self.token.span;
439438
// Note: when adding new unary operators, don't forget to adjust TokenKind::can_begin_expr()
440-
let (hi, ex) = match self.token.kind {
439+
let (hi, ex) = match self.normalized_token.kind {
441440
token::Not => self.parse_unary_expr(lo, UnOp::Not), // `!expr`
442441
token::Tilde => self.recover_tilde_expr(lo), // `~expr`
443442
token::BinOp(token::Minus) => self.parse_unary_expr(lo, UnOp::Neg), // `-expr`
@@ -523,7 +522,7 @@ impl<'a> Parser<'a> {
523522
) -> PResult<'a, (Span, P<Expr>)> {
524523
expr.map(|e| {
525524
(
526-
match self.unnormalized_prev_token.kind {
525+
match self.prev_token.kind {
527526
TokenKind::Interpolated(..) => self.prev_span,
528527
_ => e.span,
529528
},
@@ -704,7 +703,7 @@ impl<'a> Parser<'a> {
704703
}
705704

706705
fn parse_dot_suffix_expr(&mut self, lo: Span, base: P<Expr>) -> PResult<'a, P<Expr>> {
707-
match self.token.kind {
706+
match self.normalized_token.kind {
708707
token::Ident(..) => self.parse_dot_suffix(base, lo),
709708
token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) => {
710709
Ok(self.parse_tuple_field_access_expr(lo, base, symbol, suffix))
@@ -773,8 +772,8 @@ impl<'a> Parser<'a> {
773772
field: Symbol,
774773
suffix: Option<Symbol>,
775774
) -> P<Expr> {
776-
let span = self.token.span;
777775
self.bump();
776+
let span = self.prev_token.span;
778777
let field = ExprKind::Field(base, Ident::new(field, span));
779778
self.expect_no_suffix(span, "a tuple index", suffix);
780779
self.mk_expr(lo.to(span), field, AttrVec::new())
@@ -798,7 +797,7 @@ impl<'a> Parser<'a> {
798797

799798
/// Assuming we have just parsed `.`, continue parsing into an expression.
800799
fn parse_dot_suffix(&mut self, self_arg: P<Expr>, lo: Span) -> PResult<'a, P<Expr>> {
801-
if self.token.span.rust_2018() && self.eat_keyword(kw::Await) {
800+
if self.normalized_token.span.rust_2018() && self.eat_keyword(kw::Await) {
802801
return self.mk_await_expr(self_arg, lo);
803802
}
804803

@@ -912,7 +911,7 @@ impl<'a> Parser<'a> {
912911
// | ^ expected expression
913912
self.bump();
914913
Ok(self.mk_expr_err(self.token.span))
915-
} else if self.token.span.rust_2018() {
914+
} else if self.normalized_token.span.rust_2018() {
916915
// `Span::rust_2018()` is somewhat expensive; don't get it repeatedly.
917916
if self.check_keyword(kw::Async) {
918917
if self.is_async_block() {
@@ -1342,7 +1341,7 @@ impl<'a> Parser<'a> {
13421341
if self.eat_keyword(kw::Static) { Movability::Static } else { Movability::Movable };
13431342

13441343
let asyncness =
1345-
if self.token.span.rust_2018() { self.parse_asyncness() } else { Async::No };
1344+
if self.normalized_token.span.rust_2018() { self.parse_asyncness() } else { Async::No };
13461345
if asyncness.is_async() {
13471346
// Feature-gate `async ||` closures.
13481347
self.sess.gated_spans.gate(sym::async_closure, self.prev_span);
@@ -1556,9 +1555,8 @@ impl<'a> Parser<'a> {
15561555

15571556
fn eat_label(&mut self) -> Option<Label> {
15581557
self.token.lifetime().map(|ident| {
1559-
let span = self.token.span;
15601558
self.bump();
1561-
Label { ident: Ident::new(ident.name, span) }
1559+
Label { ident }
15621560
})
15631561
}
15641562

@@ -1700,7 +1698,7 @@ impl<'a> Parser<'a> {
17001698
fn is_try_block(&self) -> bool {
17011699
self.token.is_keyword(kw::Try) &&
17021700
self.look_ahead(1, |t| *t == token::OpenDelim(token::Brace)) &&
1703-
self.token.span.rust_2018() &&
1701+
self.normalized_token.span.rust_2018() &&
17041702
// Prevent `while try {} {}`, `if try {} {} else {}`, etc.
17051703
!self.restrictions.contains(Restrictions::NO_STRUCT_LITERAL)
17061704
}
@@ -1850,13 +1848,12 @@ impl<'a> Parser<'a> {
18501848

18511849
/// Use in case of error after field-looking code: `S { foo: () with a }`.
18521850
fn find_struct_error_after_field_looking_code(&self) -> Option<Field> {
1853-
if let token::Ident(name, _) = self.token.kind {
1851+
if let token::Ident(name, _) = self.normalized_token.kind {
18541852
if !self.token.is_reserved_ident() && self.look_ahead(1, |t| *t == token::Colon) {
1855-
let span = self.token.span;
18561853
return Some(ast::Field {
1857-
ident: Ident::new(name, span),
1858-
span,
1859-
expr: self.mk_expr_err(span),
1854+
ident: Ident::new(name, self.normalized_token.span),
1855+
span: self.token.span,
1856+
expr: self.mk_expr_err(self.token.span),
18601857
is_shorthand: false,
18611858
attrs: AttrVec::new(),
18621859
id: DUMMY_NODE_ID,

src/librustc_parse/parser/item.rs

+6-8
Original file line numberDiff line numberDiff line change
@@ -741,11 +741,10 @@ impl<'a> Parser<'a> {
741741
}
742742

743743
fn parse_ident_or_underscore(&mut self) -> PResult<'a, ast::Ident> {
744-
match self.token.kind {
744+
match self.normalized_token.kind {
745745
token::Ident(name @ kw::Underscore, false) => {
746-
let span = self.token.span;
747746
self.bump();
748-
Ok(Ident::new(name, span))
747+
Ok(Ident::new(name, self.normalized_prev_token.span))
749748
}
750749
_ => self.parse_ident(),
751750
}
@@ -1537,7 +1536,7 @@ impl<'a> Parser<'a> {
15371536

15381537
let is_name_required = match self.token.kind {
15391538
token::DotDotDot => false,
1540-
_ => req_name(&self.token),
1539+
_ => req_name(&self.normalized_token),
15411540
};
15421541
let (pat, ty) = if is_name_required || self.is_named_param() {
15431542
debug!("parse_param_general parse_pat (is_name_required:{})", is_name_required);
@@ -1603,12 +1602,11 @@ impl<'a> Parser<'a> {
16031602
fn parse_self_param(&mut self) -> PResult<'a, Option<Param>> {
16041603
// Extract an identifier *after* having confirmed that the token is one.
16051604
let expect_self_ident = |this: &mut Self| {
1606-
match this.token.kind {
1605+
match this.normalized_token.kind {
16071606
// Preserve hygienic context.
16081607
token::Ident(name, _) => {
1609-
let span = this.token.span;
16101608
this.bump();
1611-
Ident::new(name, span)
1609+
Ident::new(name, this.normalized_prev_token.span)
16121610
}
16131611
_ => unreachable!(),
16141612
}
@@ -1645,7 +1643,7 @@ impl<'a> Parser<'a> {
16451643
// Only a limited set of initial token sequences is considered `self` parameters; anything
16461644
// else is parsed as a normal function parameter list, so some lookahead is required.
16471645
let eself_lo = self.token.span;
1648-
let (eself, eself_ident, eself_hi) = match self.token.kind {
1646+
let (eself, eself_ident, eself_hi) = match self.normalized_token.kind {
16491647
token::BinOp(token::And) => {
16501648
let eself = if is_isolated_self(self, 1) {
16511649
// `&self`

src/librustc_parse/parser/mod.rs

+23-25
Original file line numberDiff line numberDiff line change
@@ -86,23 +86,22 @@ macro_rules! maybe_recover_from_interpolated_ty_qpath {
8686
#[derive(Clone)]
8787
pub struct Parser<'a> {
8888
pub sess: &'a ParseSess,
89+
/// The current non-normalized token.
90+
pub token: Token,
8991
/// The current normalized token.
9092
/// "Normalized" means that some interpolated tokens
9193
/// (`$i: ident` and `$l: lifetime` meta-variables) are replaced
9294
/// with non-interpolated identifier and lifetime tokens they refer to.
93-
/// Use span from this token if you need an isolated span.
94-
pub token: Token,
95-
/// The current non-normalized token if it's different from `token`.
96-
/// Use span from this token if you need to concatenate it with some neighbouring spans.
97-
unnormalized_token: Token,
95+
/// Use this if you need to check for `token::Ident` or `token::Lifetime` specifically,
96+
/// this also includes edition checks for edition-specific keyword identifiers.
97+
pub normalized_token: Token,
98+
/// The previous non-normalized token.
99+
pub prev_token: Token,
98100
/// The previous normalized token.
99-
/// Use span from this token if you need an isolated span.
100-
prev_token: Token,
101-
/// The previous non-normalized token if it's different from `prev_token`.
102-
/// Use span from this token if you need to concatenate it with some neighbouring spans.
103-
unnormalized_prev_token: Token,
104-
/// Equivalent to `unnormalized_prev_token.span`.
105-
/// FIXME: Remove in favor of `(unnormalized_)prev_token.span`.
101+
/// Use this if you need to check for `token::Ident` or `token::Lifetime` specifically,
102+
/// this also includes edition checks for edition-specific keyword identifiers.
103+
pub normalized_prev_token: Token,
104+
/// FIXME: Remove in favor of the equivalent `prev_token.span`.
106105
pub prev_span: Span,
107106
restrictions: Restrictions,
108107
/// Used to determine the path to externally loaded source files.
@@ -375,9 +374,9 @@ impl<'a> Parser<'a> {
375374
let mut parser = Parser {
376375
sess,
377376
token: Token::dummy(),
378-
unnormalized_token: Token::dummy(),
377+
normalized_token: Token::dummy(),
379378
prev_token: Token::dummy(),
380-
unnormalized_prev_token: Token::dummy(),
379+
normalized_prev_token: Token::dummy(),
381380
prev_span: DUMMY_SP,
382381
restrictions: Restrictions::empty(),
383382
recurse_into_file_modules,
@@ -482,7 +481,7 @@ impl<'a> Parser<'a> {
482481
}
483482

484483
fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, ast::Ident> {
485-
match self.token.kind {
484+
match self.normalized_token.kind {
486485
token::Ident(name, _) => {
487486
if self.token.is_reserved_ident() {
488487
let mut err = self.expected_ident_found();
@@ -492,9 +491,8 @@ impl<'a> Parser<'a> {
492491
return Err(err);
493492
}
494493
}
495-
let span = self.token.span;
496494
self.bump();
497-
Ok(Ident::new(name, span))
495+
Ok(Ident::new(name, self.normalized_prev_token.span))
498496
}
499497
_ => Err(match self.prev_token.kind {
500498
TokenKind::DocComment(..) => {
@@ -824,16 +822,16 @@ impl<'a> Parser<'a> {
824822
// tokens are replaced with usual identifier and lifetime tokens,
825823
// so the former are never encountered during normal parsing.
826824
crate fn set_token(&mut self, token: Token) {
827-
self.unnormalized_token = token;
828-
self.token = match &self.unnormalized_token.kind {
825+
self.token = token;
826+
self.normalized_token = match &self.token.kind {
829827
token::Interpolated(nt) => match **nt {
830828
token::NtIdent(ident, is_raw) => {
831829
Token::new(token::Ident(ident.name, is_raw), ident.span)
832830
}
833831
token::NtLifetime(ident) => Token::new(token::Lifetime(ident.name), ident.span),
834-
_ => self.unnormalized_token.clone(),
832+
_ => self.token.clone(),
835833
},
836-
_ => self.unnormalized_token.clone(),
834+
_ => self.token.clone(),
837835
}
838836
}
839837

@@ -847,19 +845,19 @@ impl<'a> Parser<'a> {
847845

848846
// Update the current and previous tokens.
849847
self.prev_token = self.token.take();
850-
self.unnormalized_prev_token = self.unnormalized_token.take();
848+
self.normalized_prev_token = self.normalized_token.take();
851849
self.set_token(next_token);
852850

853851
// Update fields derived from the previous token.
854-
self.prev_span = self.unnormalized_prev_token.span;
852+
self.prev_span = self.prev_token.span;
855853

856854
// Diagnostics.
857855
self.expected_tokens.clear();
858856
}
859857

860858
/// Advance the parser by one token.
861859
pub fn bump(&mut self) {
862-
let next_token = self.next_tok(self.unnormalized_token.span);
860+
let next_token = self.next_tok(self.token.span);
863861
self.bump_with(next_token);
864862
}
865863

@@ -890,7 +888,7 @@ impl<'a> Parser<'a> {
890888
/// Parses asyncness: `async` or nothing.
891889
fn parse_asyncness(&mut self) -> Async {
892890
if self.eat_keyword(kw::Async) {
893-
let span = self.prev_span;
891+
let span = self.normalized_prev_token.span;
894892
Async::Yes { span, closure_id: DUMMY_NODE_ID, return_impl_trait_id: DUMMY_NODE_ID }
895893
} else {
896894
Async::No

0 commit comments

Comments
 (0)