Skip to content

Commit

Permalink
proc_macro: Stay on the "use the cache" path more
Browse files Browse the repository at this point in the history
Discovered in rust-lang#50061 we're falling off the "happy path" of using a stringified
token stream more often than we should. This was due to the fact that a
user-written token like `0xf` is equality-different from the stringified token
of `15` (despite being semantically equivalent).

This patch updates the call to `eq_unspanned` with an even more awful solution,
`probably_equal_for_proc_macro`, which ignores the value of each token and
basically only compares the structure of the token stream, assuming that the AST
doesn't change just one token at a time.

While this is a step towards fixing rust-lang#50061 there is still one regression
from rust-lang#49154 which needs to be fixed.
  • Loading branch information
alexcrichton committed Apr 19, 2018
1 parent ac3c228 commit e934873
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 9 deletions.
100 changes: 91 additions & 9 deletions src/libsyntax/parse/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use tokenstream::{TokenStream, TokenTree};
use tokenstream;

use std::{cmp, fmt};
use std::mem;
use rustc_data_structures::sync::{Lrc, Lock};

#[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug, Copy)]
Expand Down Expand Up @@ -88,6 +89,12 @@ impl Lit {
ByteStr(_) | ByteStrRaw(..) => "byte string"
}
}

// See comments in `interpolated_to_tokenstream` for why we care about
// *probably* equal here rather than actual equality
fn probably_equal_for_proc_macro(&self, other: &Lit) -> bool {
mem::discriminant(self) == mem::discriminant(other)
}
}

pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool {
Expand Down Expand Up @@ -530,14 +537,6 @@ impl Token {
// stream they came from. Here we attempt to extract these
// lossless token streams before we fall back to the
// stringification.
//
// During early phases of the compiler, though, the AST could
// get modified directly (e.g. attributes added or removed) and
// the internal cache of tokens my not be invalidated or
// updated. Consequently if the "lossless" token stream
// disagrees with our actuall stringification (which has
// historically been much more battle-tested) then we go with
// the lossy stream anyway (losing span information).
let mut tokens = None;

match nt.0 {
Expand Down Expand Up @@ -569,13 +568,96 @@ impl Token {
let source = pprust::token_to_string(self);
parse_stream_from_source_str(FileName::MacroExpansion, source, sess, Some(span))
});

// During early phases of the compiler the AST could get modified
// directly (e.g. attributes added or removed) and the internal cache
// of tokens my not be invalidated or updated. Consequently if the
// "lossless" token stream disagrees with our actual stringification
// (which has historically been much more battle-tested) then we go
// with the lossy stream anyway (losing span information).
//
// Note that the comparison isn't `==` here to avoid comparing spans,
// but it *also* is a "probable" equality which is a pretty weird
// definition. We mostly want to catch actual changes to the AST
// like a `#[cfg]` being processed or some weird `macro_rules!`
// expansion.
//
// What we *don't* want to catch is the fact that a user-defined
// literal like `0xf` is stringified as `15`, causing the cached token
// stream to not be literal `==` token-wise (ignoring spans) to the
// token stream we got from stringification.
//
// Instead the "probably equal" check here is "does each token
// recursively have the same discriminant?" We basically don't look at
// the token values here and assume that such fine grained modifications
// of token streams doesn't happen.
if let Some(tokens) = tokens {
if tokens.eq_unspanned(&tokens_for_real) {
if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
return tokens
}
}
return tokens_for_real
}

// See comments in `interpolated_to_tokenstream` for why we care about
// *probably* equal here rather than actual equality
pub fn probably_equal_for_proc_macro(&self, other: &Token) -> bool {
if mem::discriminant(self) != mem::discriminant(other) {
return false
}
match (self, other) {
(&Eq, &Eq) |
(&Lt, &Lt) |
(&Le, &Le) |
(&EqEq, &EqEq) |
(&Ne, &Ne) |
(&Ge, &Ge) |
(&Gt, &Gt) |
(&AndAnd, &AndAnd) |
(&OrOr, &OrOr) |
(&Not, &Not) |
(&Tilde, &Tilde) |
(&At, &At) |
(&Dot, &Dot) |
(&DotDot, &DotDot) |
(&DotDotDot, &DotDotDot) |
(&DotDotEq, &DotDotEq) |
(&DotEq, &DotEq) |
(&Comma, &Comma) |
(&Semi, &Semi) |
(&Colon, &Colon) |
(&ModSep, &ModSep) |
(&RArrow, &RArrow) |
(&LArrow, &LArrow) |
(&FatArrow, &FatArrow) |
(&Pound, &Pound) |
(&Dollar, &Dollar) |
(&Question, &Question) |
(&Whitespace, &Whitespace) |
(&Comment, &Comment) |
(&Eof, &Eof) => true,

(&BinOp(a), &BinOp(b)) |
(&BinOpEq(a), &BinOpEq(b)) => a == b,

(&OpenDelim(a), &OpenDelim(b)) |
(&CloseDelim(a), &CloseDelim(b)) => a == b,

(&DocComment(a), &DocComment(b)) |
(&Shebang(a), &Shebang(b)) => a == b,

(&Lifetime(a), &Lifetime(b)) => a.name == b.name,
(&Ident(a, b), &Ident(c, d)) => a.name == c.name && b == d,

(&Literal(ref a, b), &Literal(ref c, d)) => {
b == d && a.probably_equal_for_proc_macro(c)
}

(&Interpolated(_), &Interpolated(_)) => false,

_ => panic!("forgot to add a token?"),
}
}
}

#[derive(Clone, RustcEncodable, RustcDecodable, Eq, Hash)]
Expand Down
34 changes: 34 additions & 0 deletions src/libsyntax/tokenstream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,24 @@ impl TokenTree {
}
}

// See comments in `interpolated_to_tokenstream` for why we care about
// *probably* equal here rather than actual equality
//
// This is otherwise the same as `eq_unspanned`, only recursing with a
// different method.
pub fn probably_equal_for_proc_macro(&self, other: &TokenTree) -> bool {
match (self, other) {
(&TokenTree::Token(_, ref tk), &TokenTree::Token(_, ref tk2)) => {
tk.probably_equal_for_proc_macro(tk2)
}
(&TokenTree::Delimited(_, ref dl), &TokenTree::Delimited(_, ref dl2)) => {
dl.delim == dl2.delim &&
dl.stream().probably_equal_for_proc_macro(&dl2.stream())
}
(_, _) => false,
}
}

/// Retrieve the TokenTree's span.
pub fn span(&self) -> Span {
match *self {
Expand Down Expand Up @@ -250,6 +268,22 @@ impl TokenStream {
t1.next().is_none() && t2.next().is_none()
}

// See comments in `interpolated_to_tokenstream` for why we care about
// *probably* equal here rather than actual equality
//
// This is otherwise the same as `eq_unspanned`, only recursing with a
// different method.
pub fn probably_equal_for_proc_macro(&self, other: &TokenStream) -> bool {
let mut t1 = self.trees();
let mut t2 = other.trees();
for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
if !t1.probably_equal_for_proc_macro(&t2) {
return false;
}
}
t1.next().is_none() && t2.next().is_none()
}

/// Precondition: `self` consists of a single token tree.
/// Returns true if the token tree is a joint operation w.r.t. `proc_macro::TokenNode`.
pub fn as_tree(self) -> (TokenTree, bool /* joint? */) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ use attribute_with_error::foo;
fn test1() {
let a: i32 = "foo";
//~^ ERROR: mismatched types
let b: i32 = "f'oo";
//~^ ERROR: mismatched types
}

fn test2() {
Expand Down

0 comments on commit e934873

Please sign in to comment.