Skip to content

Commit 238d907

Browse files
committed
Call compute_locs once per rule.
Currently it's called in `parse_tt` every time a match rule is invoked. This commit moves it so it's called instead once per match rule, in `compile_declarative_macro. This is a performance win. The commit also moves `compute_locs` out of `TtParser`, because there's no longer any reason for it to be in there.
1 parent 7300bd6 commit 238d907

File tree

2 files changed

+123
-114
lines changed

2 files changed

+123
-114
lines changed

compiler/rustc_expand/src/mbe/macro_parser.rs

+89-97
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
106106
///
107107
/// This means a matcher can be represented by `&[MatcherLoc]`, and traversal mostly involves
108108
/// simply incrementing the current matcher position index by one.
109-
enum MatcherLoc {
109+
pub(super) enum MatcherLoc {
110110
Token {
111111
token: Token,
112112
},
@@ -138,6 +138,78 @@ enum MatcherLoc {
138138
Eof,
139139
}
140140

141+
pub(super) fn compute_locs(sess: &ParseSess, matcher: &[TokenTree]) -> Vec<MatcherLoc> {
142+
fn inner(
143+
sess: &ParseSess,
144+
tts: &[TokenTree],
145+
locs: &mut Vec<MatcherLoc>,
146+
next_metavar: &mut usize,
147+
seq_depth: usize,
148+
) {
149+
for tt in tts {
150+
match tt {
151+
TokenTree::Token(token) => {
152+
locs.push(MatcherLoc::Token { token: token.clone() });
153+
}
154+
TokenTree::Delimited(_, delimited) => {
155+
locs.push(MatcherLoc::Delimited);
156+
inner(sess, &delimited.all_tts, locs, next_metavar, seq_depth);
157+
}
158+
TokenTree::Sequence(_, seq) => {
159+
// We can't determine `idx_first_after` and construct the final
160+
// `MatcherLoc::Sequence` until after `inner()` is called and the sequence end
161+
// pieces are processed. So we push a dummy value (`Eof` is cheapest to
162+
// construct) now, and overwrite it with the proper value below.
163+
let dummy = MatcherLoc::Eof;
164+
locs.push(dummy);
165+
166+
let next_metavar_orig = *next_metavar;
167+
let op = seq.kleene.op;
168+
let idx_first = locs.len();
169+
let idx_seq = idx_first - 1;
170+
inner(sess, &seq.tts, locs, next_metavar, seq_depth + 1);
171+
172+
if let Some(separator) = &seq.separator {
173+
locs.push(MatcherLoc::SequenceSep { separator: separator.clone() });
174+
locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first });
175+
} else {
176+
locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first });
177+
}
178+
179+
// Overwrite the dummy value pushed above with the proper value.
180+
locs[idx_seq] = MatcherLoc::Sequence {
181+
op,
182+
num_metavar_decls: seq.num_captures,
183+
idx_first_after: locs.len(),
184+
next_metavar: next_metavar_orig,
185+
seq_depth,
186+
};
187+
}
188+
&TokenTree::MetaVarDecl(span, bind, kind) => {
189+
locs.push(MatcherLoc::MetaVarDecl {
190+
span,
191+
bind,
192+
kind,
193+
next_metavar: *next_metavar,
194+
seq_depth,
195+
});
196+
*next_metavar += 1;
197+
}
198+
TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
199+
}
200+
}
201+
}
202+
203+
let mut locs = vec![];
204+
let mut next_metavar = 0;
205+
inner(sess, matcher, &mut locs, &mut next_metavar, /* seq_depth */ 0);
206+
207+
// A final entry is needed for eof.
208+
locs.push(MatcherLoc::Eof);
209+
210+
locs
211+
}
212+
141213
/// A single matcher position, representing the state of matching.
142214
struct MatcherPos {
143215
/// The index into `TtParser::locs`, which represents the "dot".
@@ -301,9 +373,6 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool {
301373
pub struct TtParser {
302374
macro_name: Ident,
303375

304-
/// The matcher of the current rule.
305-
locs: Vec<MatcherLoc>,
306-
307376
/// The set of current mps to be processed. This should be empty by the end of a successful
308377
/// execution of `parse_tt_inner`.
309378
cur_mps: Vec<MatcherPos>,
@@ -324,92 +393,13 @@ impl TtParser {
324393
pub(super) fn new(macro_name: Ident) -> TtParser {
325394
TtParser {
326395
macro_name,
327-
locs: vec![],
328396
cur_mps: vec![],
329397
next_mps: vec![],
330398
bb_mps: vec![],
331399
empty_matches: Lrc::new(smallvec![]),
332400
}
333401
}
334402

335-
/// Convert a `&[TokenTree]` to a `&[MatcherLoc]`. Note: this conversion happens every time the
336-
/// macro is called, which may be many times if there are many call sites or if it is
337-
/// recursive. This conversion is fairly cheap and the representation is sufficiently better
338-
/// for matching than `&[TokenTree]` that it's a clear performance win even with the overhead.
339-
/// But it might be possible to move the conversion outwards so it only occurs once per macro.
340-
fn compute_locs(&mut self, sess: &ParseSess, matcher: &[TokenTree]) -> usize {
341-
fn inner(
342-
sess: &ParseSess,
343-
tts: &[TokenTree],
344-
locs: &mut Vec<MatcherLoc>,
345-
next_metavar: &mut usize,
346-
seq_depth: usize,
347-
) {
348-
for tt in tts {
349-
match tt {
350-
TokenTree::Token(token) => {
351-
locs.push(MatcherLoc::Token { token: token.clone() });
352-
}
353-
TokenTree::Delimited(_, delimited) => {
354-
locs.push(MatcherLoc::Delimited);
355-
inner(sess, &delimited.all_tts, locs, next_metavar, seq_depth);
356-
}
357-
TokenTree::Sequence(_, seq) => {
358-
// We can't determine `idx_first_after` and construct the final
359-
// `MatcherLoc::Sequence` until after `inner()` is called and the sequence
360-
// end pieces are processed. So we push a dummy value (`Eof` is cheapest to
361-
// construct) now, and overwrite it with the proper value below.
362-
let dummy = MatcherLoc::Eof;
363-
locs.push(dummy);
364-
365-
let next_metavar_orig = *next_metavar;
366-
let op = seq.kleene.op;
367-
let idx_first = locs.len();
368-
let idx_seq = idx_first - 1;
369-
inner(sess, &seq.tts, locs, next_metavar, seq_depth + 1);
370-
371-
if let Some(separator) = &seq.separator {
372-
locs.push(MatcherLoc::SequenceSep { separator: separator.clone() });
373-
locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first });
374-
} else {
375-
locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first });
376-
}
377-
378-
// Overwrite the dummy value pushed above with the proper value.
379-
locs[idx_seq] = MatcherLoc::Sequence {
380-
op,
381-
num_metavar_decls: seq.num_captures,
382-
idx_first_after: locs.len(),
383-
next_metavar: next_metavar_orig,
384-
seq_depth,
385-
};
386-
}
387-
&TokenTree::MetaVarDecl(span, bind, kind) => {
388-
locs.push(MatcherLoc::MetaVarDecl {
389-
span,
390-
bind,
391-
kind,
392-
next_metavar: *next_metavar,
393-
seq_depth,
394-
});
395-
*next_metavar += 1;
396-
}
397-
TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
398-
}
399-
}
400-
}
401-
402-
self.locs.clear();
403-
let mut next_metavar = 0;
404-
inner(sess, matcher, &mut self.locs, &mut next_metavar, /* seq_depth */ 0);
405-
406-
// A final entry is needed for eof.
407-
self.locs.push(MatcherLoc::Eof);
408-
409-
// This is the number of metavar decls.
410-
next_metavar
411-
}
412-
413403
/// Process the matcher positions of `cur_mps` until it is empty. In the process, this will
414404
/// produce more mps in `next_mps` and `bb_mps`.
415405
///
@@ -420,15 +410,15 @@ impl TtParser {
420410
fn parse_tt_inner(
421411
&mut self,
422412
sess: &ParseSess,
423-
num_metavar_decls: usize,
413+
matcher: &[MatcherLoc],
424414
token: &Token,
425415
) -> Option<NamedParseResult> {
426416
// Matcher positions that would be valid if the macro invocation was over now. Only
427417
// modified if `token == Eof`.
428418
let mut eof_mps = EofMatcherPositions::None;
429419

430420
while let Some(mut mp) = self.cur_mps.pop() {
431-
match &self.locs[mp.idx] {
421+
match &matcher[mp.idx] {
432422
MatcherLoc::Token { token: t } => {
433423
// If it's a doc comment, we just ignore it and move on to the next tt in the
434424
// matcher. This is a bug, but #95267 showed that existing programs rely on
@@ -536,7 +526,7 @@ impl TtParser {
536526
}
537527
MatcherLoc::Eof => {
538528
// We are past the matcher's end, and not in a sequence. Try to end things.
539-
debug_assert_eq!(mp.idx, self.locs.len() - 1);
529+
debug_assert_eq!(mp.idx, matcher.len() - 1);
540530
if *token == token::Eof {
541531
eof_mps = match eof_mps {
542532
EofMatcherPositions::None => EofMatcherPositions::One(mp),
@@ -554,11 +544,10 @@ impl TtParser {
554544
if *token == token::Eof {
555545
Some(match eof_mps {
556546
EofMatcherPositions::One(mut eof_mp) => {
557-
assert_eq!(eof_mp.matches.len(), num_metavar_decls);
558547
// Need to take ownership of the matches from within the `Lrc`.
559548
Lrc::make_mut(&mut eof_mp.matches);
560549
let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter();
561-
self.nameize(sess, matches)
550+
self.nameize(sess, matcher, matches)
562551
}
563552
EofMatcherPositions::Multiple => {
564553
Error(token.span, "ambiguity: multiple successful parses".to_string())
@@ -580,10 +569,8 @@ impl TtParser {
580569
pub(super) fn parse_tt(
581570
&mut self,
582571
parser: &mut Cow<'_, Parser<'_>>,
583-
matcher: &[TokenTree],
572+
matcher: &[MatcherLoc],
584573
) -> NamedParseResult {
585-
let num_metavar_decls = self.compute_locs(parser.sess, matcher);
586-
587574
// A queue of possible matcher positions. We initialize it with the matcher position in
588575
// which the "dot" is before the first token of the first token tree in `matcher`.
589576
// `parse_tt_inner` then processes all of these possible matcher positions and produces
@@ -598,7 +585,7 @@ impl TtParser {
598585

599586
// Process `cur_mps` until either we have finished the input or we need to get some
600587
// parsing from the black-box parser done.
601-
if let Some(res) = self.parse_tt_inner(&parser.sess, num_metavar_decls, &parser.token) {
588+
if let Some(res) = self.parse_tt_inner(&parser.sess, matcher, &parser.token) {
602589
return res;
603590
}
604591

@@ -626,7 +613,7 @@ impl TtParser {
626613
(0, 1) => {
627614
// We need to call the black-box parser to get some nonterminal.
628615
let mut mp = self.bb_mps.pop().unwrap();
629-
let loc = &self.locs[mp.idx];
616+
let loc = &matcher[mp.idx];
630617
if let &MatcherLoc::MetaVarDecl {
631618
span,
632619
kind: Some(kind),
@@ -664,19 +651,23 @@ impl TtParser {
664651

665652
(_, _) => {
666653
// Too many possibilities!
667-
return self.ambiguity_error(parser.token.span);
654+
return self.ambiguity_error(matcher, parser.token.span);
668655
}
669656
}
670657

671658
assert!(!self.cur_mps.is_empty());
672659
}
673660
}
674661

675-
fn ambiguity_error(&self, token_span: rustc_span::Span) -> NamedParseResult {
662+
fn ambiguity_error(
663+
&self,
664+
matcher: &[MatcherLoc],
665+
token_span: rustc_span::Span,
666+
) -> NamedParseResult {
676667
let nts = self
677668
.bb_mps
678669
.iter()
679-
.map(|mp| match &self.locs[mp.idx] {
670+
.map(|mp| match &matcher[mp.idx] {
680671
MatcherLoc::MetaVarDecl { bind, kind: Some(kind), .. } => {
681672
format!("{} ('{}')", kind, bind)
682673
}
@@ -702,12 +693,13 @@ impl TtParser {
702693
fn nameize<I: Iterator<Item = NamedMatch>>(
703694
&self,
704695
sess: &ParseSess,
696+
matcher: &[MatcherLoc],
705697
mut res: I,
706698
) -> NamedParseResult {
707699
// Make that each metavar has _exactly one_ binding. If so, insert the binding into the
708700
// `NamedParseResult`. Otherwise, it's an error.
709701
let mut ret_val = FxHashMap::default();
710-
for loc in self.locs.iter() {
702+
for loc in matcher {
711703
if let &MatcherLoc::MetaVarDecl { span, bind, kind, .. } = loc {
712704
if kind.is_some() {
713705
match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) {

0 commit comments

Comments
 (0)