WIP

rust-lang · Dec 31, 2023 · 3b0619b · 3b0619b
1 parent fcfe05a
commit 3b0619b
Show file tree

Hide file tree

Showing 65 changed files with 527 additions and 365 deletions.
diff --git a/compiler/rustc_ast/src/ast_traits.rs b/compiler/rustc_ast/src/ast_traits.rs
@@ -240,7 +240,7 @@ impl HasTokens for Nonterminal {
             Nonterminal::NtPath(path) => path.tokens(),
             Nonterminal::NtVis(vis) => vis.tokens(),
             Nonterminal::NtBlock(block) => block.tokens(),
-            Nonterminal::NtIdent(..) | Nonterminal::NtLifetime(..) => None,
+            Nonterminal::NtLifetime(..) => None,
         }
     }
     fn tokens_mut(&mut self) -> Option<&mut Option<LazyAttrTokenStream>> {
@@ -254,7 +254,7 @@ impl HasTokens for Nonterminal {
             Nonterminal::NtPath(path) => path.tokens_mut(),
             Nonterminal::NtVis(vis) => vis.tokens_mut(),
             Nonterminal::NtBlock(block) => block.tokens_mut(),
-            Nonterminal::NtIdent(..) | Nonterminal::NtLifetime(..) => None,
+            Nonterminal::NtLifetime(..) => None,
         }
     }
 }

diff --git a/compiler/rustc_ast/src/mut_visit.rs b/compiler/rustc_ast/src/mut_visit.rs
@@ -816,7 +816,6 @@ pub fn visit_nonterminal<T: MutVisitor>(nt: &mut token::Nonterminal, vis: &mut T
         token::NtPat(pat) => vis.visit_pat(pat),
         token::NtExpr(expr) => vis.visit_expr(expr),
         token::NtTy(ty) => vis.visit_ty(ty),
-        token::NtIdent(ident, _is_raw) => vis.visit_ident(ident),
         token::NtLifetime(ident) => vis.visit_ident(ident),
         token::NtLiteral(expr) => vis.visit_expr(expr),
         token::NtMeta(item) => {

diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs
@@ -295,9 +295,6 @@ pub enum TokenKind {
     Literal(Lit),
 
     /// Identifier token.
-    /// Do not forget about `NtIdent` when you want to match on identifiers.
-    /// It's recommended to use `Token::(ident,uninterpolate,uninterpolated_span)` to
-    /// treat regular and interpolated identifiers in the same way.
     Ident(Symbol, /* is_raw */ bool),
     /// Lifetime identifier token.
     /// Do not forget about `NtLifetime` when you want to match on lifetime identifiers.
@@ -590,9 +587,6 @@ impl Token {
     pub fn uninterpolate(&self) -> Cow<'_, Token> {
         match &self.kind {
             Interpolated(nt) => match &nt.0 {
-                NtIdent(ident, is_raw) => {
-                    Cow::Owned(Token::new(Ident(ident.name, *is_raw), ident.span))
-                }
                 NtLifetime(ident) => Cow::Owned(Token::new(Lifetime(ident.name), ident.span)),
                 _ => Cow::Borrowed(self),
             },
@@ -606,10 +600,6 @@ impl Token {
         // We avoid using `Token::uninterpolate` here because it's slow.
         match &self.kind {
             &Ident(name, is_raw) => Some((Ident::new(name, self.span), is_raw)),
-            Interpolated(nt) => match &nt.0 {
-                NtIdent(ident, is_raw) => Some((*ident, *is_raw)),
-                _ => None,
-            },
             _ => None,
         }
     }
@@ -836,7 +826,6 @@ pub enum Nonterminal {
     NtPat(P<ast::Pat>),
     NtExpr(P<ast::Expr>),
     NtTy(P<ast::Ty>),
-    NtIdent(Ident, /* is_raw */ bool),
     NtLifetime(Ident),
     NtLiteral(P<ast::Expr>),
     /// Stuff inside brackets for attributes
@@ -932,7 +921,7 @@ impl Nonterminal {
             NtPat(pat) => pat.span,
             NtExpr(expr) | NtLiteral(expr) => expr.span,
             NtTy(ty) => ty.span,
-            NtIdent(ident, _) | NtLifetime(ident) => ident.span,
+            NtLifetime(ident) => ident.span,
             NtMeta(attr_item) => attr_item.span(),
             NtPath(path) => path.span,
             NtVis(vis) => vis.span,
@@ -948,7 +937,6 @@ impl Nonterminal {
             NtExpr(..) => "expression",
             NtLiteral(..) => "literal",
             NtTy(..) => "type",
-            NtIdent(..) => "identifier",
             NtLifetime(..) => "lifetime",
             NtMeta(..) => "attribute",
             NtPath(..) => "path",
@@ -960,9 +948,6 @@ impl Nonterminal {
 impl PartialEq for Nonterminal {
     fn eq(&self, rhs: &Self) -> bool {
         match (self, rhs) {
-            (NtIdent(ident_lhs, is_raw_lhs), NtIdent(ident_rhs, is_raw_rhs)) => {
-                ident_lhs == ident_rhs && is_raw_lhs == is_raw_rhs
-            }
             (NtLifetime(ident_lhs), NtLifetime(ident_rhs)) => ident_lhs == ident_rhs,
             // FIXME: Assume that all "complex" nonterminal are not equal, we can't compare them
             // correctly based on data from AST. This will prevent them from matching each other
@@ -982,7 +967,6 @@ impl fmt::Debug for Nonterminal {
             NtPat(..) => f.pad("NtPat(..)"),
             NtExpr(..) => f.pad("NtExpr(..)"),
             NtTy(..) => f.pad("NtTy(..)"),
-            NtIdent(..) => f.pad("NtIdent(..)"),
             NtLiteral(..) => f.pad("NtLiteral(..)"),
             NtMeta(..) => f.pad("NtMeta(..)"),
             NtPath(..) => f.pad("NtPath(..)"),

diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs
@@ -26,7 +26,7 @@ use rustc_span::{sym, Span, Symbol, DUMMY_SP};
 use smallvec::{smallvec, SmallVec};
 
 use std::borrow::Cow;
-use std::{cmp, fmt, iter, mem};
+use std::{cmp, fmt, iter};
 
 /// When the main Rust parser encounters a syntax-extension invocation, it
 /// parses the arguments to the invocation as a token tree. This is a very
@@ -81,14 +81,6 @@ impl TokenTree {
         }
     }
 
-    /// Modify the `TokenTree`'s span in-place.
-    pub fn set_span(&mut self, span: Span) {
-        match self {
-            TokenTree::Token(token, _) => token.span = span,
-            TokenTree::Delimited(dspan, ..) => *dspan = DelimSpan::from_single(span),
-        }
-    }
-
     /// Create a `TokenTree::Token` with alone spacing.
     pub fn token_alone(kind: TokenKind, span: Span) -> TokenTree {
         TokenTree::Token(Token::new(kind, span), Spacing::Alone)
@@ -461,19 +453,6 @@ impl TokenStream {
         t1.next().is_none() && t2.next().is_none()
     }
 
-    /// Applies the supplied function to each `TokenTree` and its index in `self`, returning a new `TokenStream`
-    ///
-    /// It is equivalent to `TokenStream::new(self.trees().cloned().enumerate().map(|(i, tt)| f(i, tt)).collect())`.
-    pub fn map_enumerated_owned(
-        mut self,
-        mut f: impl FnMut(usize, TokenTree) -> TokenTree,
-    ) -> TokenStream {
-        let owned = Lrc::make_mut(&mut self.0); // clone if necessary
-        // rely on vec's in-place optimizations to avoid another allocation
-        *owned = mem::take(owned).into_iter().enumerate().map(|(i, tree)| f(i, tree)).collect();
-        self
-    }
-
     /// Create a token stream containing a single token with alone spacing. The
     /// spacing used for the final token in a constructed stream doesn't matter
     /// because it's never used. In practice we arbitrarily use
@@ -499,9 +478,6 @@ impl TokenStream {
 
     pub fn from_nonterminal_ast(nt: &Nonterminal) -> TokenStream {
         match nt {
-            Nonterminal::NtIdent(ident, is_raw) => {
-                TokenStream::token_alone(token::Ident(ident.name, *is_raw), ident.span)
-            }
             Nonterminal::NtLifetime(ident) => {
                 TokenStream::token_alone(token::Lifetime(ident.name), ident.span)
             }
@@ -523,9 +499,6 @@ impl TokenStream {
 
     fn flatten_token(token: &Token, spacing: Spacing) -> TokenTree {
         match &token.kind {
-            token::Interpolated(nt) if let token::NtIdent(ident, is_raw) = nt.0 => {
-                TokenTree::Token(Token::new(token::Ident(ident.name, is_raw), ident.span), spacing)
-            }
             token::Interpolated(nt) => TokenTree::Delimited(
                 DelimSpan::from_single(token.span),
                 DelimSpacing::new(Spacing::JointHidden, spacing),

diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs
@@ -715,7 +715,6 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
             token::NtBlock(e) => self.block_to_string(e),
             token::NtStmt(e) => self.stmt_to_string(e),
             token::NtPat(e) => self.pat_to_string(e),
-            token::NtIdent(e, is_raw) => IdentPrinter::for_ast_ident(*e, *is_raw).to_string(),
             token::NtLifetime(e) => e.to_string(),
             token::NtLiteral(e) => self.expr_to_string(e),
             token::NtVis(e) => self.vis_to_string(e),

diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs
@@ -10,7 +10,7 @@ use crate::mbe::transcribe::transcribe;
 
 use rustc_ast as ast;
 use rustc_ast::token::{self, Delimiter, NonterminalKind, Token, TokenKind, TokenKind::*};
-use rustc_ast::tokenstream::{DelimSpan, TokenStream, TokenTree};
+use rustc_ast::tokenstream::{DelimSpan, TokenStream};
 use rustc_ast::{NodeId, DUMMY_NODE_ID};
 use rustc_ast_pretty::pprust;
 use rustc_attr::{self as attr, TransparencyError};
@@ -213,45 +213,14 @@ fn expand_macro<'cx>(
             let arm_span = rhses[i].span();
 
             // rhs has holes ( `$id` and `$(...)` that need filled)
-            let mut tts = match transcribe(cx, &named_matches, rhs, rhs_span, transparency) {
+            let tts = match transcribe(cx, &named_matches, rhs, rhs_span, transparency) {
                 Ok(tts) => tts,
                 Err(mut err) => {
                     err.emit();
                     return DummyResult::any(arm_span);
                 }
             };
 
-            // Replace all the tokens for the corresponding positions in the macro, to maintain
-            // proper positions in error reporting, while maintaining the macro_backtrace.
-            if tts.len() == rhs.tts.len() {
-                tts = tts.map_enumerated_owned(|i, mut tt| {
-                    let rhs_tt = &rhs.tts[i];
-                    let ctxt = tt.span().ctxt();
-                    match (&mut tt, rhs_tt) {
-                        // preserve the delim spans if able
-                        (
-                            TokenTree::Delimited(target_sp, ..),
-                            mbe::TokenTree::Delimited(source_sp, ..),
-                        ) => {
-                            target_sp.open = source_sp.open.with_ctxt(ctxt);
-                            target_sp.close = source_sp.close.with_ctxt(ctxt);
-                        }
-                        (
-                            TokenTree::Delimited(target_sp, ..),
-                            mbe::TokenTree::MetaVar(source_sp, ..),
-                        ) => {
-                            target_sp.open = source_sp.with_ctxt(ctxt);
-                            target_sp.close = source_sp.with_ctxt(ctxt).shrink_to_hi();
-                        }
-                        _ => {
-                            let sp = rhs_tt.span().with_ctxt(ctxt);
-                            tt.set_span(sp);
-                        }
-                    }
-                    tt
-                });
-            }
-
             if cx.trace_macros() {
                 let msg = format!("to `{}`", pprust::tts_to_string(&tts));
                 trace_macros_note(&mut cx.expansions, sp, msg);

diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs
@@ -4,7 +4,7 @@ use crate::errors::{
     NoSyntaxVarsExprRepeat, VarStillRepeating,
 };
 use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, MatchedTokenTree, NamedMatch};
-use crate::mbe::{self, MetaVarExpr};
+use crate::mbe::{self, KleeneOp, MetaVarExpr};
 use rustc_ast::mut_visit::{self, MutVisitor};
 use rustc_ast::token::{self, Delimiter, Token, TokenKind};
 use rustc_ast::tokenstream::{DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree};
@@ -13,7 +13,7 @@ use rustc_errors::DiagnosticBuilder;
 use rustc_errors::{pluralize, PResult};
 use rustc_span::hygiene::{LocalExpnId, Transparency};
 use rustc_span::symbol::{sym, Ident, MacroRulesNormalizedIdent};
-use rustc_span::Span;
+use rustc_span::{try_insert_metavar_span, Span};
 
 use smallvec::{smallvec, SmallVec};
 use std::mem;
@@ -42,6 +42,7 @@ enum Frame<'a> {
         tts: &'a [mbe::TokenTree],
         idx: usize,
         sep: Option<Token>,
+        kleene_op: KleeneOp,
     },
 }
 
@@ -207,7 +208,7 @@ pub(super) fn transcribe<'a>(
 
                         // Is the repetition empty?
                         if len == 0 {
-                            if seq.kleene.op == mbe::KleeneOp::OneOrMore {
+                            if seq.kleene.op == KleeneOp::OneOrMore {
                                 // FIXME: this really ought to be caught at macro definition
                                 // time... It happens when the Kleene operator in the matcher and
                                 // the body for the same meta-variable do not match.
@@ -227,6 +228,7 @@ pub(super) fn transcribe<'a>(
                                 idx: 0,
                                 sep: seq.separator.clone(),
                                 tts: &delimited.tts,
+                                kleene_op: seq.kleene.op,
                             });
                         }
                     }
@@ -243,7 +245,8 @@ pub(super) fn transcribe<'a>(
                         MatchedTokenTree(tt) => {
                             // `tt`s are emitted into the output stream directly as "raw tokens",
                             // without wrapping them into groups.
-                            result.push(tt.clone());
+                            marker.visit_span(&mut sp);
+                            result.push(maybe_use_metavar_location(cx, &stack, sp, tt));
                         }
                         MatchedNonterminal(nt) => {
                             // Other variables are emitted into the output stream as groups with
@@ -308,6 +311,82 @@ pub(super) fn transcribe<'a>(
     }
 }
 
+/// Store the metavariable span for this original span into a side table.
+/// FIXME: Try to put the metavariable span into `SpanData` instead of a side table (#118517).
+/// An optimal encoding for inlined spans will need to be selected to minimize regressions.
+/// The side table approach is relatively good, but not perfect due to collisions.
+/// The old heuristic below is used to improve spans in case of collisions, but diagnostics are
+/// still degraded sometimes in those cases.
+///
+/// The old heuristic:
+///
+/// Usually metavariables `$var` produce interpolated tokens, which have an additional place for
+/// keeping both the original span and the metavariable span. For `tt` metavariables that's not the
+/// case however, and there's no place for keeping a second span. So we try to give the single
+/// produced span a location that would be most useful in practice (the hygiene part of the span
+/// must not be changed).
+///
+/// Different locations are useful for different purposes:
+/// - The original location is useful when we need to report a diagnostic for the original token in
+///   isolation, without combining it with any surrounding tokens. This case occurs, but it is not
+///   very common in practice.
+/// - The metavariable location is useful when we need to somehow combine the token span with spans
+///   of its surrounding tokens. This is the most common way to use token spans.
+///
+/// So this function replaces the original location with the metavariable location in all cases
+/// except these two:
+/// - The metavariable is an element of undelimited sequence `$($tt)*`.
+///   These are typically used for passing larger amounts of code, and tokens in that code usually
+///   combine with each other and not with tokens outside of the sequence.
+/// - The metavariable span comes from a different crate, then we prefer the more local span.
+fn maybe_use_metavar_location(
+    cx: &ExtCtxt<'_>,
+    stack: &[Frame<'_>],
+    metavar_span: Span,
+    orig_tt: &TokenTree,
+) -> TokenTree {
+    let no_collision = match orig_tt {
+        TokenTree::Token(token, ..) => try_insert_metavar_span(token.span, metavar_span),
+        TokenTree::Delimited(dspan, ..) => {
+            try_insert_metavar_span(dspan.open, metavar_span)
+                && try_insert_metavar_span(dspan.close, metavar_span)
+                && try_insert_metavar_span(dspan.entire(), metavar_span)
+        }
+    };
+    let undelimited_seq = || {
+        matches!(
+            stack.last(),
+            Some(Frame::Sequence {
+                tts: [_],
+                sep: None,
+                kleene_op: KleeneOp::ZeroOrMore | KleeneOp::OneOrMore,
+                ..
+            })
+        )
+    };
+    if no_collision || undelimited_seq() || cx.source_map().is_imported(metavar_span) {
+        return orig_tt.clone();
+    }
+
+    // Setting metavar spans for the heuristic spans gives better opportunities for combining them
+    // with neighboring spans even despite their different syntactic contexts.
+    match orig_tt {
+        TokenTree::Token(Token { kind, span }, spacing) => {
+            let span = metavar_span.with_ctxt(span.ctxt());
+            let _ = try_insert_metavar_span(span, metavar_span);
+            TokenTree::Token(Token { kind: kind.clone(), span }, *spacing)
+        }
+        TokenTree::Delimited(dspan, dspacing, delimiter, tts) => {
+            let open = metavar_span.with_ctxt(dspan.open.ctxt());
+            let close = metavar_span.with_ctxt(dspan.close.ctxt());
+            let _ = try_insert_metavar_span(open, metavar_span);
+            let _ = try_insert_metavar_span(close, metavar_span);
+            let dspan = DelimSpan::from_pair(open, close);
+            TokenTree::Delimited(dspan, *dspacing, *delimiter, tts.clone())
+        }
+    }
+}
+
 /// Lookup the meta-var named `ident` and return the matched token tree from the invocation using
 /// the set of matches `interpolations`.
 ///

diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs
@@ -242,14 +242,6 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec<TokenTree<TokenStre
                     }));
                 }
 
-                Interpolated(ref nt) if let NtIdent(ident, is_raw) = &nt.0 => {
-                    trees.push(TokenTree::Ident(Ident {
-                        sym: ident.name,
-                        is_raw: *is_raw,
-                        span: ident.span,
-                    }))
-                }
-
                 Interpolated(nt) => {
                     let stream = TokenStream::from_nonterminal_ast(&nt.0);
                     // A hack used to pass AST fragments to attribute and derive

diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs
@@ -226,8 +226,7 @@ impl<'a> Parser<'a> {
         let (mut ret, trailing) = ret?;
 
         // When we're not in `capture-cfg` mode, then bail out early if:
-        // 1. Our target doesn't support tokens at all (e.g we're parsing an `NtIdent`)
-        //    so there's nothing for us to do.
+        // 1. Our target doesn't support tokens at all, so there's nothing for us to do.
         // 2. Our target already has tokens set (e.g. we've parsed something
         // like `#[my_attr] $item`. The actual parsing code takes care of prepending
         // any attributes to the nonterminal, so we don't need to modify the