diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs index 545b98a9135af..f02fe4cf0a728 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state.rs @@ -681,22 +681,40 @@ pub trait PrintState<'a>: std::ops::Deref + std::ops::Dere } } + // The easiest way to implement token stream pretty printing would be to + // print each token followed by a single space. But that would produce ugly + // output, so we go to some effort to do better. + // + // First, we track whether each token that appears in source code is + // followed by a space, with `Spacing`, and reproduce that in the output. + // This works well in a lot of cases. E.g. `stringify!(x + y)` produces + // "x + y" and `stringify!(x+y)` produces "x+y". + // + // But this doesn't work for code produced by proc macros (which have no + // original source text representation) nor for code produced by decl + // macros (which are tricky because the whitespace after tokens appearing + // in macro rules isn't always what you want in the produced output). For + // these we mostly use `Spacing::Alone`, which is the conservative choice. + // + // So we have a backup mechanism for when `Spacing::Alone` occurs between a + // pair of tokens: we check if that pair of tokens can obviously go + // together without a space between them. E.g. token `x` followed by token + // `,` is better printed as `x,` than `x ,`. (Even if the original source + // code was `x ,`.) + // + // Finally, we must be careful about changing the output. Token pretty + // printing is used by `stringify!` and `impl Display for + // proc_macro::TokenStream`, and some programs rely on the output having a + // particular form, even though they shouldn't. In particular, some proc + // macros do `format!({stream})` on a token stream and then "parse" the + // output with simple string matching that can't handle whitespace changes. + // E.g. we have seen cases where a proc macro can handle `a :: b` but not + // `a::b`. See #117433 for some examples. fn print_tts(&mut self, tts: &TokenStream, convert_dollar_crate: bool) { let mut iter = tts.trees().peekable(); while let Some(tt) = iter.next() { let spacing = self.print_tt(tt, convert_dollar_crate); if let Some(next) = iter.peek() { - // Should we print a space after `tt`? There are two guiding - // factors. - // - `spacing` is the more important and accurate one. Most - // tokens have good spacing information, and - // `Joint`/`JointHidden` get used a lot. - // - `space_between` is the backup. Code produced by proc - // macros has worse spacing information, with no - // `JointHidden` usage and too much `Alone` usage, which - // would result in over-spaced output such as - // `( x () , y . z )`. `space_between` avoids some of the - // excess whitespace. if spacing == Spacing::Alone && space_between(tt, next) { self.space(); } diff --git a/compiler/rustc_expand/src/mbe.rs b/compiler/rustc_expand/src/mbe.rs index 32d12c4fd0cc9..08d4a03945489 100644 --- a/compiler/rustc_expand/src/mbe.rs +++ b/compiler/rustc_expand/src/mbe.rs @@ -68,6 +68,8 @@ pub(crate) enum KleeneOp { /// `MetaVarExpr` are "first-class" token trees. Useful for parsing macros. #[derive(Debug, PartialEq, Encodable, Decodable)] enum TokenTree { + /// A token. Unlike `tokenstream::TokenTree::Token` this lacks a `Spacing`. + /// See the comments about `Spacing` in the `transcribe` function. Token(Token), /// A delimited sequence, e.g. `($e:expr)` (RHS) or `{ $e }` (LHS). Delimited(DelimSpan, DelimSpacing, Delimited), diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs index 3901b82eb52ec..8a084dcb4fe3a 100644 --- a/compiler/rustc_expand/src/mbe/transcribe.rs +++ b/compiler/rustc_expand/src/mbe/transcribe.rs @@ -253,8 +253,23 @@ pub(super) fn transcribe<'a>( mbe::TokenTree::MetaVar(mut sp, mut original_ident) => { // Find the matched nonterminal from the macro invocation, and use it to replace // the meta-var. + // + // We use `Spacing::Alone` everywhere here, because that's the conservative choice + // and spacing of declarative macros is tricky. E.g. in this macro: + // ``` + // macro_rules! idents { + // ($($a:ident,)*) => { stringify!($($a)*) } + // } + // ``` + // `$a` has no whitespace after it and will be marked `JointHidden`. If you then + // call `idents!(x,y,z,)`, each of `x`, `y`, and `z` will be marked as `Joint`. So + // if you choose to use `$x`'s spacing or the identifier's spacing, you'll end up + // producing "xyz", which is bad because it effectively merges tokens. + // `Spacing::Alone` is the safer option. Fortunately, `space_between` will avoid + // some of the unnecessary whitespace. let ident = MacroRulesNormalizedIdent::new(original_ident); if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) { + // njn: explain the use of alone here let tt = match cur_matched { MatchedSingle(ParseNtResult::Tt(tt)) => { // `tt`s are emitted into the output stream directly as "raw tokens",