@@ -10,12 +10,14 @@ use rustc_ast as ast;
10
10
use rustc_ast:: token:: { self , DelimToken , Nonterminal , Token , TokenKind } ;
11
11
use rustc_ast:: tokenstream:: { self , LazyTokenStream , TokenStream , TokenTree } ;
12
12
use rustc_ast_pretty:: pprust;
13
+ use rustc_data_structures:: fx:: FxHashSet ;
13
14
use rustc_data_structures:: sync:: Lrc ;
14
15
use rustc_errors:: { Diagnostic , FatalError , Level , PResult } ;
15
16
use rustc_session:: parse:: ParseSess ;
16
17
use rustc_span:: { symbol:: kw, FileName , SourceFile , Span , DUMMY_SP } ;
17
18
18
19
use smallvec:: SmallVec ;
20
+ use std:: cell:: RefCell ;
19
21
use std:: mem;
20
22
use std:: path:: Path ;
21
23
use std:: str;
@@ -282,14 +284,33 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
282
284
}
283
285
} ;
284
286
287
+ // Caches the stringification of 'good' `TokenStreams` which passed
288
+ // `tokenstream_probably_equal_for_proc_macro`. This allows us to avoid
289
+ // repeatedly stringifying and comparing the same `TokenStream` for deeply
290
+ // nested nonterminals.
291
+ //
292
+ // We cache by the strinification instead of the `TokenStream` to avoid
293
+ // needing to implement `Hash` for `TokenStream`. Note that it's possible to
294
+ // have two distinct `TokenStream`s that stringify to the same result
295
+ // (e.g. if they differ only in hygiene information). However, any
296
+ // information lost during the stringification process is also intentionally
297
+ // ignored by `tokenstream_probably_equal_for_proc_macro`, so it's fine
298
+ // that a single cache entry may 'map' to multiple distinct `TokenStream`s.
299
+ //
300
+ // This is a temporary hack to prevent compilation blowup on certain inputs.
301
+ // The entire pretty-print/retokenize process will be removed soon.
302
+ thread_local ! {
303
+ static GOOD_TOKEN_CACHE : RefCell <FxHashSet <String >> = Default :: default ( ) ;
304
+ }
305
+
285
306
// FIXME(#43081): Avoid this pretty-print + reparse hack
286
307
// Pretty-print the AST struct without inserting any parenthesis
287
308
// beyond those explicitly written by the user (e.g. `ExpnKind::Paren`).
288
309
// The resulting stream may have incorrect precedence, but it's only
289
310
// ever used for a comparison against the capture tokenstream.
290
311
let source = pprust:: nonterminal_to_string_no_extra_parens ( nt) ;
291
312
let filename = FileName :: macro_expansion_source_code ( & source) ;
292
- let reparsed_tokens = parse_stream_from_source_str ( filename, source, sess, Some ( span) ) ;
313
+ let reparsed_tokens = parse_stream_from_source_str ( filename, source. clone ( ) , sess, Some ( span) ) ;
293
314
294
315
// During early phases of the compiler the AST could get modified
295
316
// directly (e.g., attributes added or removed) and the internal cache
@@ -315,8 +336,13 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
315
336
// modifications, including adding/removing typically non-semantic
316
337
// tokens such as extra braces and commas, don't happen.
317
338
if let Some ( tokens) = tokens {
339
+ if GOOD_TOKEN_CACHE . with ( |cache| cache. borrow ( ) . contains ( & source) ) {
340
+ return tokens;
341
+ }
342
+
318
343
// Compare with a non-relaxed delim match to start.
319
344
if tokenstream_probably_equal_for_proc_macro ( & tokens, & reparsed_tokens, sess, false ) {
345
+ GOOD_TOKEN_CACHE . with ( |cache| cache. borrow_mut ( ) . insert ( source. clone ( ) ) ) ;
320
346
return tokens;
321
347
}
322
348
@@ -325,6 +351,11 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
325
351
// token stream to match up with inserted parenthesis in the reparsed stream.
326
352
let source_with_parens = pprust:: nonterminal_to_string ( nt) ;
327
353
let filename_with_parens = FileName :: macro_expansion_source_code ( & source_with_parens) ;
354
+
355
+ if GOOD_TOKEN_CACHE . with ( |cache| cache. borrow ( ) . contains ( & source_with_parens) ) {
356
+ return tokens;
357
+ }
358
+
328
359
let reparsed_tokens_with_parens = parse_stream_from_source_str (
329
360
filename_with_parens,
330
361
source_with_parens,
@@ -340,6 +371,7 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
340
371
sess,
341
372
true ,
342
373
) {
374
+ GOOD_TOKEN_CACHE . with ( |cache| cache. borrow_mut ( ) . insert ( source. clone ( ) ) ) ;
343
375
return tokens;
344
376
}
345
377
@@ -419,9 +451,9 @@ pub fn tokenstream_probably_equal_for_proc_macro(
419
451
// to iterate breaking tokens mutliple times. For example:
420
452
// '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
421
453
let mut token_trees: SmallVec < [ _ ; 2 ] > ;
422
- if let TokenTree :: Token ( token) = & tree {
454
+ if let TokenTree :: Token ( token) = tree {
423
455
let mut out = SmallVec :: < [ _ ; 2 ] > :: new ( ) ;
424
- out. push ( token. clone ( ) ) ;
456
+ out. push ( token) ;
425
457
// Iterate to fixpoint:
426
458
// * We start off with 'out' containing our initial token, and `temp` empty
427
459
// * If we are able to break any tokens in `out`, then `out` will have
0 commit comments