Skip to content

Commit 16c6594

Browse files
authored
Rollup merge of #94547 - nnethercote:parse_tt-cleanups, r=petrochenkov
`parse_tt` cleanups I've been looking closely at this code, and saw some opportunities to improve its readability. r? ```````@petrochenkov```````
2 parents fec7a79 + 97eb1b4 commit 16c6594

File tree

1 file changed

+115
-101
lines changed

1 file changed

+115
-101
lines changed

compiler/rustc_expand/src/mbe/macro_parser.rs

+115-101
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ type NamedMatchVec = SmallVec<[NamedMatch; 4]>;
154154
/// lifetime. By separating `'tt` from `'root`, we can show that.
155155
#[derive(Clone)]
156156
struct MatcherPos<'root, 'tt> {
157-
/// The token or sequence of tokens that make up the matcher
157+
/// The token or sequence of tokens that make up the matcher. `elts` is short for "elements".
158158
top_elts: TokenTreeOrTokenTreeSlice<'tt>,
159159

160160
/// The position of the "dot" in this matcher
@@ -184,17 +184,8 @@ struct MatcherPos<'root, 'tt> {
184184
/// in this matcher.
185185
match_hi: usize,
186186

187-
// The following fields are used if we are matching a repetition. If we aren't, they should be
188-
// `None`.
189-
/// The KleeneOp of this sequence if we are in a repetition.
190-
seq_op: Option<mbe::KleeneOp>,
191-
192-
/// The separator if we are in a repetition.
193-
sep: Option<Token>,
194-
195-
/// The "parent" matcher position if we are in a repetition. That is, the matcher position just
196-
/// before we enter the sequence.
197-
up: Option<MatcherPosHandle<'root, 'tt>>,
187+
/// This field is only used if we are matching a repetition.
188+
repetition: Option<MatcherPosRepetition<'root, 'tt>>,
198189

199190
/// Specifically used to "unzip" token trees. By "unzip", we mean to unwrap the delimiters from
200191
/// a delimited token tree (e.g., something wrapped in `(` `)`) or to get the contents of a doc
@@ -207,14 +198,58 @@ struct MatcherPos<'root, 'tt> {
207198
stack: SmallVec<[MatcherTtFrame<'tt>; 1]>,
208199
}
209200

201+
// This type is used a lot. Make sure it doesn't unintentionally get bigger.
202+
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
203+
rustc_data_structures::static_assert_size!(MatcherPos<'_, '_>, 192);
204+
210205
impl<'root, 'tt> MatcherPos<'root, 'tt> {
206+
/// Generates the top-level matcher position in which the "dot" is before the first token of
207+
/// the matcher `ms`.
208+
fn new(ms: &'tt [TokenTree]) -> Self {
209+
let match_idx_hi = count_names(ms);
210+
MatcherPos {
211+
// Start with the top level matcher given to us.
212+
top_elts: TtSeq(ms),
213+
214+
// The "dot" is before the first token of the matcher.
215+
idx: 0,
216+
217+
// Initialize `matches` to a bunch of empty `Vec`s -- one for each metavar in
218+
// `top_elts`. `match_lo` for `top_elts` is 0 and `match_hi` is `match_idx_hi`.
219+
// `match_cur` is 0 since we haven't actually matched anything yet.
220+
matches: create_matches(match_idx_hi),
221+
match_lo: 0,
222+
match_cur: 0,
223+
match_hi: match_idx_hi,
224+
225+
// Haven't descended into any delimiters, so this is empty.
226+
stack: smallvec![],
227+
228+
// Haven't descended into any sequences, so this is `None`.
229+
repetition: None,
230+
}
231+
}
232+
211233
/// Adds `m` as a named match for the `idx`-th metavar.
212234
fn push_match(&mut self, idx: usize, m: NamedMatch) {
213235
let matches = Lrc::make_mut(&mut self.matches[idx]);
214236
matches.push(m);
215237
}
216238
}
217239

240+
#[derive(Clone)]
241+
struct MatcherPosRepetition<'root, 'tt> {
242+
/// The KleeneOp of this sequence.
243+
seq_op: mbe::KleeneOp,
244+
245+
/// The separator.
246+
sep: Option<Token>,
247+
248+
/// The "parent" matcher position. That is, the matcher position just before we enter the
249+
/// sequence.
250+
up: MatcherPosHandle<'root, 'tt>,
251+
}
252+
218253
// Lots of MatcherPos instances are created at runtime. Allocating them on the
219254
// heap is slow. Furthermore, using SmallVec<MatcherPos> to allocate them all
220255
// on the stack is also slow, because MatcherPos is quite a large type and
@@ -258,6 +293,12 @@ impl<'root, 'tt> DerefMut for MatcherPosHandle<'root, 'tt> {
258293
}
259294
}
260295

296+
enum EofItems<'root, 'tt> {
297+
None,
298+
One(MatcherPosHandle<'root, 'tt>),
299+
Multiple,
300+
}
301+
261302
/// Represents the possible results of an attempted parse.
262303
crate enum ParseResult<T> {
263304
/// Parsed successfully.
@@ -300,35 +341,6 @@ fn create_matches(len: usize) -> Box<[Lrc<NamedMatchVec>]> {
300341
.into_boxed_slice()
301342
}
302343

303-
/// Generates the top-level matcher position in which the "dot" is before the first token of the
304-
/// matcher `ms`.
305-
fn initial_matcher_pos<'root, 'tt>(ms: &'tt [TokenTree]) -> MatcherPos<'root, 'tt> {
306-
let match_idx_hi = count_names(ms);
307-
let matches = create_matches(match_idx_hi);
308-
MatcherPos {
309-
// Start with the top level matcher given to us
310-
top_elts: TtSeq(ms), // "elts" is an abbr. for "elements"
311-
// The "dot" is before the first token of the matcher
312-
idx: 0,
313-
314-
// Initialize `matches` to a bunch of empty `Vec`s -- one for each metavar in `top_elts`.
315-
// `match_lo` for `top_elts` is 0 and `match_hi` is `matches.len()`. `match_cur` is 0 since
316-
// we haven't actually matched anything yet.
317-
matches,
318-
match_lo: 0,
319-
match_cur: 0,
320-
match_hi: match_idx_hi,
321-
322-
// Haven't descended into any delimiters, so empty stack
323-
stack: smallvec![],
324-
325-
// Haven't descended into any sequences, so both of these are `None`.
326-
seq_op: None,
327-
sep: None,
328-
up: None,
329-
}
330-
}
331-
332344
/// `NamedMatch` is a pattern-match result for a single `token::MATCH_NONTERMINAL`:
333345
/// so it is associated with a single ident in a parse, and all
334346
/// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type
@@ -475,10 +487,10 @@ fn inner_parse_loop<'root, 'tt>(
475487
sess: &ParseSess,
476488
cur_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>,
477489
next_items: &mut Vec<MatcherPosHandle<'root, 'tt>>,
478-
eof_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>,
479490
bb_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>,
491+
eof_items: &mut EofItems<'root, 'tt>,
480492
token: &Token,
481-
) -> ParseResult<()> {
493+
) -> Result<(), (rustc_span::Span, String)> {
482494
// Pop items from `cur_items` until it is empty.
483495
while let Some(mut item) = cur_items.pop() {
484496
// When unzipped trees end, remove them. This corresponds to backtracking out of a
@@ -504,7 +516,7 @@ fn inner_parse_loop<'root, 'tt>(
504516
// We are repeating iff there is a parent. If the matcher is inside of a repetition,
505517
// then we could be at the end of a sequence or at the beginning of the next
506518
// repetition.
507-
if item.up.is_some() {
519+
if let Some(repetition) = &item.repetition {
508520
// At this point, regardless of whether there is a separator, we should add all
509521
// matches from the complete repetition of the sequence to the shared, top-level
510522
// `matches` list (actually, `up.matches`, which could itself not be the top-level,
@@ -515,7 +527,7 @@ fn inner_parse_loop<'root, 'tt>(
515527
// NOTE: removing the condition `idx == len` allows trailing separators.
516528
if idx == len {
517529
// Get the `up` matcher
518-
let mut new_pos = item.up.clone().unwrap();
530+
let mut new_pos = repetition.up.clone();
519531

520532
// Add matches from this repetition to the `matches` of `up`
521533
for idx in item.match_lo..item.match_hi {
@@ -530,32 +542,33 @@ fn inner_parse_loop<'root, 'tt>(
530542
}
531543

532544
// Check if we need a separator.
533-
if idx == len && item.sep.is_some() {
545+
if idx == len && repetition.sep.is_some() {
534546
// We have a separator, and it is the current token. We can advance past the
535547
// separator token.
536-
if item.sep.as_ref().map_or(false, |sep| token_name_eq(token, sep)) {
548+
if repetition.sep.as_ref().map_or(false, |sep| token_name_eq(token, sep)) {
537549
item.idx += 1;
538550
next_items.push(item);
539551
}
540-
}
541-
// We don't need a separator. Move the "dot" back to the beginning of the matcher
542-
// and try to match again UNLESS we are only allowed to have _one_ repetition.
543-
else if item.seq_op != Some(mbe::KleeneOp::ZeroOrOne) {
552+
} else if repetition.seq_op != mbe::KleeneOp::ZeroOrOne {
553+
// We don't need a separator. Move the "dot" back to the beginning of the
554+
// matcher and try to match again UNLESS we are only allowed to have _one_
555+
// repetition.
544556
item.match_cur = item.match_lo;
545557
item.idx = 0;
546558
cur_items.push(item);
547559
}
560+
} else {
561+
// If we are not in a repetition, then being at the end of a matcher means that we
562+
// have reached the potential end of the input.
563+
*eof_items = match eof_items {
564+
EofItems::None => EofItems::One(item),
565+
EofItems::One(_) | EofItems::Multiple => EofItems::Multiple,
566+
}
548567
}
549-
// If we are not in a repetition, then being at the end of a matcher means that we have
550-
// reached the potential end of the input.
551-
else {
552-
eof_items.push(item);
553-
}
554-
}
555-
// We are in the middle of a matcher.
556-
else {
557-
// Look at what token in the matcher we are trying to match the current token (`token`)
558-
// against. Depending on that, we may generate new items.
568+
} else {
569+
// We are in the middle of a matcher. Look at what token in the matcher we are trying
570+
// to match the current token (`token`) against. Depending on that, we may generate new
571+
// items.
559572
match item.top_elts.get_tt(idx) {
560573
// Need to descend into a sequence
561574
TokenTree::Sequence(sp, seq) => {
@@ -578,22 +591,24 @@ fn inner_parse_loop<'root, 'tt>(
578591
let matches = create_matches(item.matches.len());
579592
cur_items.push(MatcherPosHandle::Box(Box::new(MatcherPos {
580593
stack: smallvec![],
581-
sep: seq.separator.clone(),
582-
seq_op: Some(seq.kleene.op),
583594
idx: 0,
584595
matches,
585596
match_lo: item.match_cur,
586597
match_cur: item.match_cur,
587598
match_hi: item.match_cur + seq.num_captures,
588-
up: Some(item),
599+
repetition: Some(MatcherPosRepetition {
600+
up: item,
601+
sep: seq.separator.clone(),
602+
seq_op: seq.kleene.op,
603+
}),
589604
top_elts: Tt(TokenTree::Sequence(sp, seq)),
590605
})));
591606
}
592607

593608
// We need to match a metavar (but the identifier is invalid)... this is an error
594609
TokenTree::MetaVarDecl(span, _, None) => {
595610
if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() {
596-
return Error(span, "missing fragment specifier".to_string());
611+
return Err((span, "missing fragment specifier".to_string()));
597612
}
598613
}
599614

@@ -641,7 +656,7 @@ fn inner_parse_loop<'root, 'tt>(
641656
}
642657

643658
// Yay a successful parse (so far)!
644-
Success(())
659+
Ok(())
645660
}
646661

647662
/// Use the given sequence of token trees (`ms`) as a matcher. Match the token
@@ -659,17 +674,18 @@ pub(super) fn parse_tt(
659674
//
660675
// This MatcherPos instance is allocated on the stack. All others -- and
661676
// there are frequently *no* others! -- are allocated on the heap.
662-
let mut initial = initial_matcher_pos(ms);
677+
let mut initial = MatcherPos::new(ms);
663678
let mut cur_items = smallvec![MatcherPosHandle::Ref(&mut initial)];
664679
let mut next_items = Vec::new();
665680

666681
loop {
682+
assert!(next_items.is_empty());
683+
667684
// Matcher positions black-box parsed by parser.rs (`parser`)
668685
let mut bb_items = SmallVec::new();
669686

670687
// Matcher positions that would be valid if the macro invocation was over now
671-
let mut eof_items = SmallVec::new();
672-
assert!(next_items.is_empty());
688+
let mut eof_items = EofItems::None;
673689

674690
// Process `cur_items` until either we have finished the input or we need to get some
675691
// parsing from the black-box parser done. The result is that `next_items` will contain a
@@ -678,37 +694,34 @@ pub(super) fn parse_tt(
678694
parser.sess,
679695
&mut cur_items,
680696
&mut next_items,
681-
&mut eof_items,
682697
&mut bb_items,
698+
&mut eof_items,
683699
&parser.token,
684700
) {
685-
Success(_) => {}
686-
Failure(token, msg) => return Failure(token, msg),
687-
Error(sp, msg) => return Error(sp, msg),
688-
ErrorReported => return ErrorReported,
701+
Ok(()) => {}
702+
Err((sp, msg)) => return Error(sp, msg),
689703
}
690704

691705
// inner parse loop handled all cur_items, so it's empty
692706
assert!(cur_items.is_empty());
693707

694-
// We need to do some post processing after the `inner_parser_loop`.
708+
// We need to do some post processing after the `inner_parse_loop`.
695709
//
696710
// Error messages here could be improved with links to original rules.
697711

698712
// If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise,
699713
// either the parse is ambiguous (which should never happen) or there is a syntax error.
700714
if parser.token == token::Eof {
701-
if eof_items.len() == 1 {
702-
let matches =
703-
eof_items[0].matches.iter_mut().map(|dv| Lrc::make_mut(dv).pop().unwrap());
704-
return nameize(parser.sess, ms, matches);
705-
} else if eof_items.len() > 1 {
706-
return Error(
707-
parser.token.span,
708-
"ambiguity: multiple successful parses".to_string(),
709-
);
710-
} else {
711-
return Failure(
715+
return match eof_items {
716+
EofItems::One(mut eof_item) => {
717+
let matches =
718+
eof_item.matches.iter_mut().map(|dv| Lrc::make_mut(dv).pop().unwrap());
719+
nameize(parser.sess, ms, matches)
720+
}
721+
EofItems::Multiple => {
722+
Error(parser.token.span, "ambiguity: multiple successful parses".to_string())
723+
}
724+
EofItems::None => Failure(
712725
Token::new(
713726
token::Eof,
714727
if parser.token.span.is_dummy() {
@@ -718,22 +731,23 @@ pub(super) fn parse_tt(
718731
},
719732
),
720733
"missing tokens in macro arguments",
721-
);
722-
}
734+
),
735+
};
723736
}
724-
// Performance hack: eof_items may share matchers via Rc with other things that we want
725-
// to modify. Dropping eof_items now may drop these refcounts to 1, preventing an
726-
// unnecessary implicit clone later in Rc::make_mut.
737+
// Performance hack: `eof_items` may share matchers via `Rc` with other things that we want
738+
// to modify. Dropping `eof_items` now may drop these refcounts to 1, preventing an
739+
// unnecessary implicit clone later in `Rc::make_mut`.
727740
drop(eof_items);
728741

729742
// If there are no possible next positions AND we aren't waiting for the black-box parser,
730743
// then there is a syntax error.
731744
if bb_items.is_empty() && next_items.is_empty() {
732745
return Failure(parser.token.clone(), "no rules expected this token in macro call");
733746
}
734-
// Another possibility is that we need to call out to parse some rust nonterminal
735-
// (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong.
736-
else if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 {
747+
748+
if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 {
749+
// We need to call out to parse some rust nonterminal (black-box) parser. But something
750+
// is wrong, because there is not EXACTLY ONE of these.
737751
let nts = bb_items
738752
.iter()
739753
.map(|item| match item.top_elts.get_tt(item.idx) {
@@ -755,15 +769,15 @@ pub(super) fn parse_tt(
755769
),
756770
);
757771
}
758-
// Dump all possible `next_items` into `cur_items` for the next iteration.
759-
else if !next_items.is_empty() {
760-
// Now process the next token
772+
773+
if !next_items.is_empty() {
774+
// Dump all possible `next_items` into `cur_items` for the next iteration. Then process
775+
// the next token.
761776
cur_items.extend(next_items.drain(..));
762777
parser.to_mut().bump();
763-
}
764-
// Finally, we have the case where we need to call the black-box parser to get some
765-
// nonterminal.
766-
else {
778+
} else {
779+
// Finally, we have the case where we need to call the black-box parser to get some
780+
// nonterminal.
767781
assert_eq!(bb_items.len(), 1);
768782

769783
let mut item = bb_items.pop().unwrap();

0 commit comments

Comments
 (0)