Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 464d0ee

Browse files
committedDec 17, 2024·
Overhaul TokenTreeCursor.
- Move it to `rustc_parse`, which is the only crate that uses it. This lets us remove all the `pub` markers from it. - Change `next_ref` and `look_ahead` to `get` and `bump`, which work better for the `rustc_parse` uses. - This requires adding a `TokenStream::get` method, which is simple. - In `TokenCursor`, we currently duplicate the `DelimSpan`/`DelimSpacing`/`Delimiter` from the surrounding `TokenTree::Delimited` in the stack. This isn't necessary so long as we don't prematurely move past the `Delimited`, and is a small perf win on a very hot code path. - In `parse_token_tree`, we can just clone the relevant `TokenTree::Delimited` instead of constructing an identical one from pieces.
1 parent fd83954 commit 464d0ee

File tree

3 files changed

+67
-70
lines changed

3 files changed

+67
-70
lines changed
 

‎compiler/rustc_ast/src/tokenstream.rs

+4-37
Original file line numberDiff line numberDiff line change
@@ -423,12 +423,12 @@ impl TokenStream {
423423
self.0.len()
424424
}
425425

426-
pub fn iter(&self) -> TokenStreamIter<'_> {
427-
TokenStreamIter::new(self)
426+
pub fn get(&self, index: usize) -> Option<&TokenTree> {
427+
self.0.get(index)
428428
}
429429

430-
pub fn into_trees(self) -> TokenTreeCursor {
431-
TokenTreeCursor::new(self)
430+
pub fn iter(&self) -> TokenStreamIter<'_> {
431+
TokenStreamIter::new(self)
432432
}
433433

434434
/// Compares two `TokenStream`s, checking equality without regarding span information.
@@ -695,39 +695,6 @@ impl<'t> Iterator for TokenStreamIter<'t> {
695695
}
696696
}
697697

698-
/// Owning by-value iterator over a [`TokenStream`], that produces `&TokenTree`
699-
/// items.
700-
///
701-
/// Doesn't impl `Iterator` because Rust doesn't permit an owning iterator to
702-
/// return `&T` from `next`; the need for an explicit lifetime in the `Item`
703-
/// associated type gets in the way. Instead, use `next_ref` (which doesn't
704-
/// involve associated types) for getting individual elements, or
705-
/// `TokenStreamIter` if you really want an `Iterator`, e.g. in a `for`
706-
/// loop.
707-
#[derive(Clone, Debug)]
708-
pub struct TokenTreeCursor {
709-
pub stream: TokenStream,
710-
index: usize,
711-
}
712-
713-
impl TokenTreeCursor {
714-
fn new(stream: TokenStream) -> Self {
715-
TokenTreeCursor { stream, index: 0 }
716-
}
717-
718-
#[inline]
719-
pub fn next_ref(&mut self) -> Option<&TokenTree> {
720-
self.stream.0.get(self.index).map(|tree| {
721-
self.index += 1;
722-
tree
723-
})
724-
}
725-
726-
pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
727-
self.stream.0.get(self.index + n)
728-
}
729-
}
730-
731698
#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
732699
pub struct DelimSpan {
733700
pub open: Span,

‎compiler/rustc_parse/src/parser/expr.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use ast::token::IdentIsRaw;
88
use ast::{CoroutineKind, ForLoopKind, GenBlockKind, MatchKind, Pat, Path, PathSegment, Recovered};
99
use rustc_ast::ptr::P;
1010
use rustc_ast::token::{self, Delimiter, Token, TokenKind};
11+
use rustc_ast::tokenstream::TokenTree;
1112
use rustc_ast::util::case::Case;
1213
use rustc_ast::util::classify;
1314
use rustc_ast::util::parser::{AssocOp, ExprPrecedence, Fixity, prec_let_scrutinee_needs_par};
@@ -2393,7 +2394,8 @@ impl<'a> Parser<'a> {
23932394
}
23942395

23952396
if self.token == TokenKind::Semi
2396-
&& matches!(self.token_cursor.stack.last(), Some((.., Delimiter::Parenthesis)))
2397+
&& let Some(last) = self.token_cursor.stack.last()
2398+
&& let Some(TokenTree::Delimited(_, _, Delimiter::Parenthesis, _)) = last.curr()
23972399
&& self.may_recover()
23982400
{
23992401
// It is likely that the closure body is a block but where the

‎compiler/rustc_parse/src/parser/mod.rs

+60-32
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,7 @@ use rustc_ast::ptr::P;
2424
use rustc_ast::token::{
2525
self, Delimiter, IdentIsRaw, InvisibleOrigin, MetaVarKind, Nonterminal, Token, TokenKind,
2626
};
27-
use rustc_ast::tokenstream::{
28-
AttrsTarget, DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree, TokenTreeCursor,
29-
};
27+
use rustc_ast::tokenstream::{AttrsTarget, Spacing, TokenStream, TokenTree};
3028
use rustc_ast::util::case::Case;
3129
use rustc_ast::{
3230
self as ast, AnonConst, AttrArgs, AttrId, ByRef, Const, CoroutineKind, DUMMY_NODE_ID,
@@ -273,21 +271,48 @@ struct CaptureState {
273271
seen_attrs: IntervalSet<AttrId>,
274272
}
275273

276-
/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
274+
#[derive(Clone, Debug)]
275+
struct TokenTreeCursor {
276+
stream: TokenStream,
277+
/// Points to the current token tree in the stream. In `TokenCursor::curr`,
278+
/// this can be any token tree. In `TokenCursor::stack`, this is always a
279+
/// `TokenTree::Delimited`.
280+
index: usize,
281+
}
282+
283+
impl TokenTreeCursor {
284+
#[inline]
285+
fn new(stream: TokenStream) -> Self {
286+
TokenTreeCursor { stream, index: 0 }
287+
}
288+
289+
#[inline]
290+
fn curr(&self) -> Option<&TokenTree> {
291+
self.stream.get(self.index)
292+
}
293+
294+
#[inline]
295+
fn bump(&mut self) {
296+
self.index += 1;
297+
}
298+
}
299+
300+
/// A `TokenStream` cursor that produces `Token`s. It's a bit odd that
277301
/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
278302
/// use this type to emit them as a linear sequence. But a linear sequence is
279303
/// what the parser expects, for the most part.
280304
#[derive(Clone, Debug)]
281305
struct TokenCursor {
282-
// Cursor for the current (innermost) token stream. The delimiters for this
283-
// token stream are found in `self.stack.last()`; when that is `None` then
284-
// we are in the outermost token stream which never has delimiters.
285-
tree_cursor: TokenTreeCursor,
286-
287-
// Token streams surrounding the current one. The delimiters for stack[n]'s
288-
// tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
289-
// because it's the outermost token stream which never has delimiters.
290-
stack: Vec<(TokenTreeCursor, DelimSpan, DelimSpacing, Delimiter)>,
306+
// Cursor for the current (innermost) token stream. The index within the
307+
// cursor can point to any token tree in the stream (or one past the end).
308+
// The delimiters for this token stream are found in `self.stack.last()`;
309+
// if that is `None` we are in the outermost token stream which never has
310+
// delimiters.
311+
curr: TokenTreeCursor,
312+
313+
// Token streams surrounding the current one. The index within each cursor
314+
// always points to a `TokenTree::Delimited`.
315+
stack: Vec<TokenTreeCursor>,
291316
}
292317

293318
impl TokenCursor {
@@ -302,32 +327,33 @@ impl TokenCursor {
302327
// FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
303328
// #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
304329
// below can be removed.
305-
if let Some(tree) = self.tree_cursor.next_ref() {
330+
if let Some(tree) = self.curr.curr() {
306331
match tree {
307332
&TokenTree::Token(ref token, spacing) => {
308333
debug_assert!(!matches!(
309334
token.kind,
310335
token::OpenDelim(_) | token::CloseDelim(_)
311336
));
312-
return (token.clone(), spacing);
337+
let res = (token.clone(), spacing);
338+
self.curr.bump();
339+
return res;
313340
}
314341
&TokenTree::Delimited(sp, spacing, delim, ref tts) => {
315-
let trees = tts.clone().into_trees();
316-
self.stack.push((
317-
mem::replace(&mut self.tree_cursor, trees),
318-
sp,
319-
spacing,
320-
delim,
321-
));
342+
let trees = TokenTreeCursor::new(tts.clone());
343+
self.stack.push(mem::replace(&mut self.curr, trees));
322344
if !delim.skip() {
323345
return (Token::new(token::OpenDelim(delim), sp.open), spacing.open);
324346
}
325347
// No open delimiter to return; continue on to the next iteration.
326348
}
327349
};
328-
} else if let Some((tree_cursor, span, spacing, delim)) = self.stack.pop() {
350+
} else if let Some(parent) = self.stack.pop() {
329351
// We have exhausted this token stream. Move back to its parent token stream.
330-
self.tree_cursor = tree_cursor;
352+
let Some(&TokenTree::Delimited(span, spacing, delim, _)) = parent.curr() else {
353+
panic!("parent should be Delimited")
354+
};
355+
self.curr = parent;
356+
self.curr.bump(); // move past the `Delimited`
331357
if !delim.skip() {
332358
return (Token::new(token::CloseDelim(delim), span.close), spacing.close);
333359
}
@@ -466,7 +492,7 @@ impl<'a> Parser<'a> {
466492
capture_cfg: false,
467493
restrictions: Restrictions::empty(),
468494
expected_tokens: Vec::new(),
469-
token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() },
495+
token_cursor: TokenCursor { curr: TokenTreeCursor::new(stream), stack: Vec::new() },
470496
num_bump_calls: 0,
471497
break_last_token: 0,
472498
unmatched_angle_bracket_count: 0,
@@ -1192,7 +1218,7 @@ impl<'a> Parser<'a> {
11921218
if dist == 1 {
11931219
// The index is zero because the tree cursor's index always points
11941220
// to the next token to be gotten.
1195-
match self.token_cursor.tree_cursor.look_ahead(0) {
1221+
match self.token_cursor.curr.curr() {
11961222
Some(tree) => {
11971223
// Indexing stayed within the current token tree.
11981224
match tree {
@@ -1202,12 +1228,13 @@ impl<'a> Parser<'a> {
12021228
return looker(&Token::new(token::OpenDelim(delim), dspan.open));
12031229
}
12041230
}
1205-
};
1231+
}
12061232
}
12071233
None => {
12081234
// The tree cursor lookahead went (one) past the end of the
12091235
// current token tree. Try to return a close delimiter.
1210-
if let Some(&(_, span, _, delim)) = self.token_cursor.stack.last()
1236+
if let Some(last) = self.token_cursor.stack.last()
1237+
&& let Some(&TokenTree::Delimited(span, _, delim, _)) = last.curr()
12111238
&& !delim.skip()
12121239
{
12131240
// We are not in the outermost token stream, so we have
@@ -1399,9 +1426,10 @@ impl<'a> Parser<'a> {
13991426
pub fn parse_token_tree(&mut self) -> TokenTree {
14001427
match self.token.kind {
14011428
token::OpenDelim(..) => {
1402-
// Grab the tokens within the delimiters.
1403-
let stream = self.token_cursor.tree_cursor.stream.clone();
1404-
let (_, span, spacing, delim) = *self.token_cursor.stack.last().unwrap();
1429+
// Clone the `TokenTree::Delimited` that we are currently
1430+
// within. That's what we are going to return.
1431+
let tree = self.token_cursor.stack.last().unwrap().curr().unwrap().clone();
1432+
debug_assert_matches!(tree, TokenTree::Delimited(..));
14051433

14061434
// Advance the token cursor through the entire delimited
14071435
// sequence. After getting the `OpenDelim` we are *within* the
@@ -1421,7 +1449,7 @@ impl<'a> Parser<'a> {
14211449

14221450
// Consume close delimiter
14231451
self.bump();
1424-
TokenTree::Delimited(span, spacing, delim, stream)
1452+
tree
14251453
}
14261454
token::CloseDelim(_) | token::Eof => unreachable!(),
14271455
_ => {

0 commit comments

Comments
 (0)
Please sign in to comment.