Skip to content

Commit 7874297

Browse files
committed
Overhaul TokenTreeCursor.
- Move it to `rustc_parse`, which is the only crate that uses it. This lets us remove all the `pub` markers from it. - Change `next_ref` and `look_ahead` to `get` and `bump`, which work better for the use `rustc_parse` uses. - This requires adding a `TokenStream::get` method, which is easy. - In `TokenCursor`, we currently duplicate the `DelimSpan`/`DelimSpacing`/`Delimiter` from the surrounding `TokenTree::Delimited` in the stack. This isn't necessary so long as we don't prematurely move past the `Delimited`, which is a small perf win on a very hot code path. - In `parse_token_tree`, we can just clone the relevant `TokenTree::Delimited` instead of constructing an identical one from pieces.
1 parent 0e5a56b commit 7874297

File tree

3 files changed

+57
-64
lines changed

3 files changed

+57
-64
lines changed

Diff for: compiler/rustc_ast/src/tokenstream.rs

+4-37
Original file line numberDiff line numberDiff line change
@@ -423,12 +423,12 @@ impl TokenStream {
423423
self.0.len()
424424
}
425425

426-
pub fn iter(&self) -> TokenStreamIter<'_> {
427-
TokenStreamIter::new(self)
426+
pub fn get(&self, index: usize) -> Option<&TokenTree> {
427+
self.0.get(index)
428428
}
429429

430-
pub fn into_trees(self) -> TokenTreeCursor {
431-
TokenTreeCursor::new(self)
430+
pub fn iter(&self) -> TokenStreamIter<'_> {
431+
TokenStreamIter::new(self)
432432
}
433433

434434
/// Compares two `TokenStream`s, checking equality without regarding span information.
@@ -695,39 +695,6 @@ impl<'t> Iterator for TokenStreamIter<'t> {
695695
}
696696
}
697697

698-
/// Owning by-value iterator over a [`TokenStream`], that produces `&TokenTree`
699-
/// items.
700-
///
701-
/// Doesn't impl `Iterator` because Rust doesn't permit an owning iterator to
702-
/// return `&T` from `next`; the need for an explicit lifetime in the `Item`
703-
/// associated type gets in the way. Instead, use `next_ref` (which doesn't
704-
/// involve associated types) for getting individual elements, or
705-
/// `TokenStreamIter` if you really want an `Iterator`, e.g. in a `for`
706-
/// loop.
707-
#[derive(Clone, Debug)]
708-
pub struct TokenTreeCursor {
709-
pub stream: TokenStream,
710-
index: usize,
711-
}
712-
713-
impl TokenTreeCursor {
714-
fn new(stream: TokenStream) -> Self {
715-
TokenTreeCursor { stream, index: 0 }
716-
}
717-
718-
#[inline]
719-
pub fn next_ref(&mut self) -> Option<&TokenTree> {
720-
self.stream.0.get(self.index).map(|tree| {
721-
self.index += 1;
722-
tree
723-
})
724-
}
725-
726-
pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
727-
self.stream.0.get(self.index + n)
728-
}
729-
}
730-
731698
#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
732699
pub struct DelimSpan {
733700
pub open: Span,

Diff for: compiler/rustc_parse/src/parser/expr.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use ast::token::IdentIsRaw;
88
use ast::{CoroutineKind, ForLoopKind, GenBlockKind, MatchKind, Pat, Path, PathSegment, Recovered};
99
use rustc_ast::ptr::P;
1010
use rustc_ast::token::{self, Delimiter, Token, TokenKind};
11+
use rustc_ast::tokenstream::TokenTree;
1112
use rustc_ast::util::case::Case;
1213
use rustc_ast::util::classify;
1314
use rustc_ast::util::parser::{AssocOp, ExprPrecedence, Fixity, prec_let_scrutinee_needs_par};
@@ -2376,7 +2377,8 @@ impl<'a> Parser<'a> {
23762377
}
23772378

23782379
if self.token == TokenKind::Semi
2379-
&& matches!(self.token_cursor.stack.last(), Some((.., Delimiter::Parenthesis)))
2380+
&& let Some(last) = self.token_cursor.stack.last()
2381+
&& let Some(TokenTree::Delimited(_, _, Delimiter::Parenthesis, _)) = last.get()
23802382
&& self.may_recover()
23812383
{
23822384
// It is likely that the closure body is a block but where the

Diff for: compiler/rustc_parse/src/parser/mod.rs

+50-26
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,7 @@ use rustc_ast::ptr::P;
2424
use rustc_ast::token::{
2525
self, Delimiter, IdentIsRaw, InvisibleOrigin, MetaVarKind, Nonterminal, Token, TokenKind,
2626
};
27-
use rustc_ast::tokenstream::{
28-
AttrsTarget, DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree, TokenTreeCursor,
29-
};
27+
use rustc_ast::tokenstream::{AttrsTarget, Spacing, TokenStream, TokenTree};
3028
use rustc_ast::util::case::Case;
3129
use rustc_ast::{
3230
self as ast, AnonConst, AttrArgs, AttrArgsEq, AttrId, ByRef, Const, CoroutineKind,
@@ -273,6 +271,29 @@ struct CaptureState {
273271
seen_attrs: IntervalSet<AttrId>,
274272
}
275273

274+
#[derive(Clone, Debug)]
275+
struct TokenTreeCursor {
276+
stream: TokenStream,
277+
index: usize,
278+
}
279+
280+
impl TokenTreeCursor {
281+
#[inline]
282+
fn new(stream: TokenStream) -> Self {
283+
TokenTreeCursor { stream, index: 0 }
284+
}
285+
286+
#[inline]
287+
fn get(&self) -> Option<&TokenTree> {
288+
self.stream.get(self.index)
289+
}
290+
291+
#[inline]
292+
fn bump(&mut self) {
293+
self.index += 1;
294+
}
295+
}
296+
276297
/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
277298
/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
278299
/// use this type to emit them as a linear sequence. But a linear sequence is
@@ -282,12 +303,12 @@ struct TokenCursor {
282303
// Cursor for the current (innermost) token stream. The delimiters for this
283304
// token stream are found in `self.stack.last()`; when that is `None` then
284305
// we are in the outermost token stream which never has delimiters.
285-
tree_cursor: TokenTreeCursor,
306+
curr: TokenTreeCursor,
286307

287-
// Token streams surrounding the current one. The delimiters for stack[n]'s
288-
// tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
289-
// because it's the outermost token stream which never has delimiters.
290-
stack: Vec<(TokenTreeCursor, DelimSpan, DelimSpacing, Delimiter)>,
308+
// Token streams surrounding the current one. The current position in each
309+
// of these cursors is always a `TokenTree::Delimited`, never a
310+
// `TokenTree::Token`.
311+
stack: Vec<TokenTreeCursor>,
291312
}
292313

293314
impl TokenCursor {
@@ -302,32 +323,33 @@ impl TokenCursor {
302323
// FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
303324
// #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
304325
// below can be removed.
305-
if let Some(tree) = self.tree_cursor.next_ref() {
326+
if let Some(tree) = self.curr.get() {
306327
match tree {
307328
&TokenTree::Token(ref token, spacing) => {
308329
debug_assert!(!matches!(
309330
token.kind,
310331
token::OpenDelim(_) | token::CloseDelim(_)
311332
));
312-
return (token.clone(), spacing);
333+
let res = (token.clone(), spacing);
334+
self.curr.bump();
335+
return res;
313336
}
314337
&TokenTree::Delimited(sp, spacing, delim, ref tts) => {
315-
let trees = tts.clone().into_trees();
316-
self.stack.push((
317-
mem::replace(&mut self.tree_cursor, trees),
318-
sp,
319-
spacing,
320-
delim,
321-
));
338+
let trees = TokenTreeCursor::new(tts.clone());
339+
self.stack.push(mem::replace(&mut self.curr, trees));
322340
if !delim.skip() {
323341
return (Token::new(token::OpenDelim(delim), sp.open), spacing.open);
324342
}
325343
// No open delimiter to return; continue on to the next iteration.
326344
}
327345
};
328-
} else if let Some((tree_cursor, span, spacing, delim)) = self.stack.pop() {
346+
} else if let Some(parent) = self.stack.pop() {
329347
// We have exhausted this token stream. Move back to its parent token stream.
330-
self.tree_cursor = tree_cursor;
348+
let Some(&TokenTree::Delimited(span, spacing, delim, _)) = parent.get() else {
349+
panic!("parent should be Delimited")
350+
};
351+
self.curr = parent;
352+
self.curr.bump(); // move past the `Delimited`
331353
if !delim.skip() {
332354
return (Token::new(token::CloseDelim(delim), span.close), spacing.close);
333355
}
@@ -466,7 +488,7 @@ impl<'a> Parser<'a> {
466488
capture_cfg: false,
467489
restrictions: Restrictions::empty(),
468490
expected_tokens: Vec::new(),
469-
token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() },
491+
token_cursor: TokenCursor { curr: TokenTreeCursor::new(stream), stack: Vec::new() },
470492
num_bump_calls: 0,
471493
break_last_token: 0,
472494
unmatched_angle_bracket_count: 0,
@@ -1192,7 +1214,7 @@ impl<'a> Parser<'a> {
11921214
if dist == 1 {
11931215
// The index is zero because the tree cursor's index always points
11941216
// to the next token to be gotten.
1195-
match self.token_cursor.tree_cursor.look_ahead(0) {
1217+
match self.token_cursor.curr.get() {
11961218
Some(tree) => {
11971219
// Indexing stayed within the current token tree.
11981220
match tree {
@@ -1207,7 +1229,8 @@ impl<'a> Parser<'a> {
12071229
None => {
12081230
// The tree cursor lookahead went (one) past the end of the
12091231
// current token tree. Try to return a close delimiter.
1210-
if let Some(&(_, span, _, delim)) = self.token_cursor.stack.last()
1232+
if let Some(last) = self.token_cursor.stack.last()
1233+
&& let Some(&TokenTree::Delimited(span, _, delim, _)) = last.get()
12111234
&& !delim.skip()
12121235
{
12131236
// We are not in the outermost token stream, so we have
@@ -1399,9 +1422,10 @@ impl<'a> Parser<'a> {
13991422
pub fn parse_token_tree(&mut self) -> TokenTree {
14001423
match self.token.kind {
14011424
token::OpenDelim(..) => {
1402-
// Grab the tokens within the delimiters.
1403-
let stream = self.token_cursor.tree_cursor.stream.clone();
1404-
let (_, span, spacing, delim) = *self.token_cursor.stack.last().unwrap();
1425+
// Clone the `TokenTree::Delimited` that we are currently
1426+
// within. That's what we are going to return.
1427+
let tree = self.token_cursor.stack.last().unwrap().get().unwrap().clone();
1428+
debug_assert_matches!(tree, TokenTree::Delimited(..));
14051429

14061430
// Advance the token cursor through the entire delimited
14071431
// sequence. After getting the `OpenDelim` we are *within* the
@@ -1421,7 +1445,7 @@ impl<'a> Parser<'a> {
14211445

14221446
// Consume close delimiter
14231447
self.bump();
1424-
TokenTree::Delimited(span, spacing, delim, stream)
1448+
tree
14251449
}
14261450
token::CloseDelim(_) | token::Eof => unreachable!(),
14271451
_ => {

0 commit comments

Comments
 (0)