Skip to content

Commit fb848a6

Browse files
authored
Rollup merge of #72388 - Aaron1011:fix/deep-tokenstream-equality, r=petrochenkov
Recursively expand `TokenKind::Interpolated` in `probably_equal_for_proc_macro` Fixes #68430 When comparing the captured and re-parsed `TokenStream` for a `TokenKind::Interpolated`, we currently treat any nested `TokenKind::Interpolated` tokens as unequal. If a `TokenKind::Interpolated` token shows up in the captured `TokenStream` due to a `macro_rules!` expansion, we will throw away the captured `TokenStream`, losing span information. This PR recursively invokes `nt_to_tokenstream` on nested `TokenKind::Interpolated` tokens, effectively flattening the stream into a sequence of non-interpolated tokens. This allows it to compare equal with the re-parsed stream, allowing us to keep the original captured `TokenStream` (with span information). This requires all of the `probably_equal_for_proc_macro` methods to be moved from `librustc_ast` to `librustc_parse` so that they can call `nt_to_tokenstream`.
2 parents 3137f8e + 5685e4d commit fb848a6

File tree

7 files changed

+241
-186
lines changed

7 files changed

+241
-186
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -4178,6 +4178,7 @@ dependencies = [
41784178
"rustc_lexer",
41794179
"rustc_session",
41804180
"rustc_span",
4181+
"smallvec 1.4.0",
41814182
"unicode-normalization",
41824183
]
41834184

src/librustc_ast/token.rs

-56
Original file line numberDiff line numberDiff line change
@@ -673,62 +673,6 @@ impl Token {
673673

674674
Some(Token::new(kind, self.span.to(joint.span)))
675675
}
676-
677-
// See comments in `Nonterminal::to_tokenstream` for why we care about
678-
// *probably* equal here rather than actual equality
679-
crate fn probably_equal_for_proc_macro(&self, other: &Token) -> bool {
680-
if mem::discriminant(&self.kind) != mem::discriminant(&other.kind) {
681-
return false;
682-
}
683-
match (&self.kind, &other.kind) {
684-
(&Eq, &Eq)
685-
| (&Lt, &Lt)
686-
| (&Le, &Le)
687-
| (&EqEq, &EqEq)
688-
| (&Ne, &Ne)
689-
| (&Ge, &Ge)
690-
| (&Gt, &Gt)
691-
| (&AndAnd, &AndAnd)
692-
| (&OrOr, &OrOr)
693-
| (&Not, &Not)
694-
| (&Tilde, &Tilde)
695-
| (&At, &At)
696-
| (&Dot, &Dot)
697-
| (&DotDot, &DotDot)
698-
| (&DotDotDot, &DotDotDot)
699-
| (&DotDotEq, &DotDotEq)
700-
| (&Comma, &Comma)
701-
| (&Semi, &Semi)
702-
| (&Colon, &Colon)
703-
| (&ModSep, &ModSep)
704-
| (&RArrow, &RArrow)
705-
| (&LArrow, &LArrow)
706-
| (&FatArrow, &FatArrow)
707-
| (&Pound, &Pound)
708-
| (&Dollar, &Dollar)
709-
| (&Question, &Question)
710-
| (&Whitespace, &Whitespace)
711-
| (&Comment, &Comment)
712-
| (&Eof, &Eof) => true,
713-
714-
(&BinOp(a), &BinOp(b)) | (&BinOpEq(a), &BinOpEq(b)) => a == b,
715-
716-
(&OpenDelim(a), &OpenDelim(b)) | (&CloseDelim(a), &CloseDelim(b)) => a == b,
717-
718-
(&DocComment(a), &DocComment(b)) | (&Shebang(a), &Shebang(b)) => a == b,
719-
720-
(&Literal(a), &Literal(b)) => a == b,
721-
722-
(&Lifetime(a), &Lifetime(b)) => a == b,
723-
(&Ident(a, b), &Ident(c, d)) => {
724-
b == d && (a == c || a == kw::DollarCrate || c == kw::DollarCrate)
725-
}
726-
727-
(&Interpolated(_), &Interpolated(_)) => false,
728-
729-
_ => panic!("forgot to add a token?"),
730-
}
731-
}
732676
}
733677

734678
impl PartialEq<TokenKind> for Token {

src/librustc_ast/tokenstream.rs

-125
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ use rustc_macros::HashStable_Generic;
2121
use rustc_span::{Span, DUMMY_SP};
2222
use smallvec::{smallvec, SmallVec};
2323

24-
use log::debug;
25-
2624
use std::{iter, mem};
2725

2826
/// When the main rust parser encounters a syntax-extension invocation, it
@@ -68,23 +66,6 @@ impl TokenTree {
6866
}
6967
}
7068

71-
// See comments in `Nonterminal::to_tokenstream` for why we care about
72-
// *probably* equal here rather than actual equality
73-
//
74-
// This is otherwise the same as `eq_unspanned`, only recursing with a
75-
// different method.
76-
pub fn probably_equal_for_proc_macro(&self, other: &TokenTree) -> bool {
77-
match (self, other) {
78-
(TokenTree::Token(token), TokenTree::Token(token2)) => {
79-
token.probably_equal_for_proc_macro(token2)
80-
}
81-
(TokenTree::Delimited(_, delim, tts), TokenTree::Delimited(_, delim2, tts2)) => {
82-
delim == delim2 && tts.probably_equal_for_proc_macro(&tts2)
83-
}
84-
_ => false,
85-
}
86-
}
87-
8869
/// Retrieves the TokenTree's span.
8970
pub fn span(&self) -> Span {
9071
match self {
@@ -307,112 +288,6 @@ impl TokenStream {
307288
t1.next().is_none() && t2.next().is_none()
308289
}
309290

310-
// See comments in `Nonterminal::to_tokenstream` for why we care about
311-
// *probably* equal here rather than actual equality
312-
//
313-
// This is otherwise the same as `eq_unspanned`, only recursing with a
314-
// different method.
315-
pub fn probably_equal_for_proc_macro(&self, other: &TokenStream) -> bool {
316-
// When checking for `probably_eq`, we ignore certain tokens that aren't
317-
// preserved in the AST. Because they are not preserved, the pretty
318-
// printer arbitrarily adds or removes them when printing as token
319-
// streams, making a comparison between a token stream generated from an
320-
// AST and a token stream which was parsed into an AST more reliable.
321-
fn semantic_tree(tree: &TokenTree) -> bool {
322-
if let TokenTree::Token(token) = tree {
323-
if let
324-
// The pretty printer tends to add trailing commas to
325-
// everything, and in particular, after struct fields.
326-
| token::Comma
327-
// The pretty printer emits `NoDelim` as whitespace.
328-
| token::OpenDelim(DelimToken::NoDelim)
329-
| token::CloseDelim(DelimToken::NoDelim)
330-
// The pretty printer collapses many semicolons into one.
331-
| token::Semi
332-
// The pretty printer collapses whitespace arbitrarily and can
333-
// introduce whitespace from `NoDelim`.
334-
| token::Whitespace
335-
// The pretty printer can turn `$crate` into `::crate_name`
336-
| token::ModSep = token.kind {
337-
return false;
338-
}
339-
}
340-
true
341-
}
342-
343-
// When comparing two `TokenStream`s, we ignore the `IsJoint` information.
344-
//
345-
// However, `rustc_parse::lexer::tokentrees::TokenStreamBuilder` will
346-
// use `Token.glue` on adjacent tokens with the proper `IsJoint`.
347-
// Since we are ignoreing `IsJoint`, a 'glued' token (e.g. `BinOp(Shr)`)
348-
// and its 'split'/'unglued' compoenents (e.g. `Gt, Gt`) are equivalent
349-
// when determining if two `TokenStream`s are 'probably equal'.
350-
//
351-
// Therefore, we use `break_two_token_op` to convert all tokens
352-
// to the 'unglued' form (if it exists). This ensures that two
353-
// `TokenStream`s which differ only in how their tokens are glued
354-
// will be considered 'probably equal', which allows us to keep spans.
355-
//
356-
// This is important when the original `TokenStream` contained
357-
// extra spaces (e.g. `f :: < Vec < _ > > ( ) ;'). These extra spaces
358-
// will be omitted when we pretty-print, which can cause the original
359-
// and reparsed `TokenStream`s to differ in the assignment of `IsJoint`,
360-
// leading to some tokens being 'glued' together in one stream but not
361-
// the other. See #68489 for more details.
362-
fn break_tokens(tree: TokenTree) -> impl Iterator<Item = TokenTree> {
363-
// In almost all cases, we should have either zero or one levels
364-
// of 'unglueing'. However, in some unusual cases, we may need
365-
// to iterate breaking tokens mutliple times. For example:
366-
// '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
367-
let mut token_trees: SmallVec<[_; 2]>;
368-
if let TokenTree::Token(token) = &tree {
369-
let mut out = SmallVec::<[_; 2]>::new();
370-
out.push(token.clone());
371-
// Iterate to fixpoint:
372-
// * We start off with 'out' containing our initial token, and `temp` empty
373-
// * If we are able to break any tokens in `out`, then `out` will have
374-
// at least one more element than 'temp', so we will try to break tokens
375-
// again.
376-
// * If we cannot break any tokens in 'out', we are done
377-
loop {
378-
let mut temp = SmallVec::<[_; 2]>::new();
379-
let mut changed = false;
380-
381-
for token in out.into_iter() {
382-
if let Some((first, second)) = token.kind.break_two_token_op() {
383-
temp.push(Token::new(first, DUMMY_SP));
384-
temp.push(Token::new(second, DUMMY_SP));
385-
changed = true;
386-
} else {
387-
temp.push(token);
388-
}
389-
}
390-
out = temp;
391-
if !changed {
392-
break;
393-
}
394-
}
395-
token_trees = out.into_iter().map(|t| TokenTree::Token(t)).collect();
396-
if token_trees.len() != 1 {
397-
debug!("break_tokens: broke {:?} to {:?}", tree, token_trees);
398-
}
399-
} else {
400-
token_trees = SmallVec::new();
401-
token_trees.push(tree);
402-
}
403-
token_trees.into_iter()
404-
}
405-
406-
let mut t1 = self.trees().filter(semantic_tree).flat_map(break_tokens);
407-
let mut t2 = other.trees().filter(semantic_tree).flat_map(break_tokens);
408-
for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
409-
if !t1.probably_equal_for_proc_macro(&t2) {
410-
return false;
411-
}
412-
}
413-
t1.next().is_none() && t2.next().is_none()
414-
}
415-
416291
pub fn map_enumerated<F: FnMut(usize, TokenTree) -> TokenTree>(self, mut f: F) -> TokenStream {
417292
TokenStream(Lrc::new(
418293
self.0

src/librustc_parse/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ doctest = false
1212
[dependencies]
1313
bitflags = "1.0"
1414
log = "0.4"
15+
smallvec = { version = "1.0", features = ["union", "may_dangle"] }
1516
rustc_ast_pretty = { path = "../librustc_ast_pretty" }
1617
rustc_data_structures = { path = "../librustc_data_structures" }
1718
rustc_feature = { path = "../librustc_feature" }

0 commit comments

Comments
 (0)