Skip to content

Commit

Permalink
Overhaul TokenTreeCursor.
Browse files Browse the repository at this point in the history
- Move it to `rustc_parse`, which is the only crate that uses it. This
  lets us remove all the `pub` markers from it.

- Change `next_ref` and `look_ahead` to `get` and `bump`, which work
  better for the use `rustc_parse` uses.

- This requires adding a `TokenStream::get` method, which is easy.

- In `TokenCursor`, we currently duplicate the
  `DelimSpan`/`DelimSpacing`/`Delimiter` from the surrounding
  `TokenTree::Delimited` in the stack. This isn't necessary so long as
  we don't prematurely move past the `Delimited`, which is a small perf
  win on a very hot code path.

- In `parse_token_tree`, we can just clone the relevant
  `TokenTree::Delimited` instead of constructing an identical one from
  pieces.
  • Loading branch information
nnethercote committed Dec 11, 2024
1 parent df7616e commit efad936
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 64 deletions.
41 changes: 4 additions & 37 deletions compiler/rustc_ast/src/tokenstream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -423,12 +423,12 @@ impl TokenStream {
self.0.len()
}

pub fn iter(&self) -> TokenStreamIter<'_> {
TokenStreamIter::new(self)
pub fn get(&self, index: usize) -> Option<&TokenTree> {
self.0.get(index)
}

pub fn into_trees(self) -> TokenTreeCursor {
TokenTreeCursor::new(self)
pub fn iter(&self) -> TokenStreamIter<'_> {
TokenStreamIter::new(self)
}

/// Compares two `TokenStream`s, checking equality without regarding span information.
Expand Down Expand Up @@ -692,39 +692,6 @@ impl<'t> Iterator for TokenStreamIter<'t> {
}
}

/// Owning by-value iterator over a [`TokenStream`], that produces `&TokenTree`
/// items.
///
/// Doesn't impl `Iterator` because Rust doesn't permit an owning iterator to
/// return `&T` from `next`; the need for an explicit lifetime in the `Item`
/// associated type gets in the way. Instead, use `next_ref` (which doesn't
/// involve associated types) for getting individual elements, or
/// `TokenStreamIter` if you really want an `Iterator`, e.g. in a `for`
/// loop.
#[derive(Clone, Debug)]
pub struct TokenTreeCursor {
pub stream: TokenStream,
index: usize,
}

impl TokenTreeCursor {
fn new(stream: TokenStream) -> Self {
TokenTreeCursor { stream, index: 0 }
}

#[inline]
pub fn next_ref(&mut self) -> Option<&TokenTree> {
self.stream.0.get(self.index).map(|tree| {
self.index += 1;
tree
})
}

pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
self.stream.0.get(self.index + n)
}
}

#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
pub struct DelimSpan {
pub open: Span,
Expand Down
4 changes: 3 additions & 1 deletion compiler/rustc_parse/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use ast::token::IdentIsRaw;
use ast::{CoroutineKind, ForLoopKind, GenBlockKind, MatchKind, Pat, Path, PathSegment, Recovered};
use rustc_ast::ptr::P;
use rustc_ast::token::{self, Delimiter, Token, TokenKind};
use rustc_ast::tokenstream::TokenTree;
use rustc_ast::util::case::Case;
use rustc_ast::util::classify;
use rustc_ast::util::parser::{AssocOp, ExprPrecedence, Fixity, prec_let_scrutinee_needs_par};
Expand Down Expand Up @@ -2376,7 +2377,8 @@ impl<'a> Parser<'a> {
}

if self.token == TokenKind::Semi
&& matches!(self.token_cursor.stack.last(), Some((.., Delimiter::Parenthesis)))
&& let Some(last) = self.token_cursor.stack.last()
&& let Some(TokenTree::Delimited(_, _, Delimiter::Parenthesis, _)) = last.get()
&& self.may_recover()
{
// It is likely that the closure body is a block but where the
Expand Down
76 changes: 50 additions & 26 deletions compiler/rustc_parse/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ use rustc_ast::ptr::P;
use rustc_ast::token::{
self, Delimiter, IdentIsRaw, InvisibleOrigin, MetaVarKind, Nonterminal, Token, TokenKind,
};
use rustc_ast::tokenstream::{
AttrsTarget, DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree, TokenTreeCursor,
};
use rustc_ast::tokenstream::{AttrsTarget, Spacing, TokenStream, TokenTree};
use rustc_ast::util::case::Case;
use rustc_ast::{
self as ast, AnonConst, AttrArgs, AttrArgsEq, AttrId, ByRef, Const, CoroutineKind,
Expand Down Expand Up @@ -273,6 +271,29 @@ struct CaptureState {
seen_attrs: IntervalSet<AttrId>,
}

#[derive(Clone, Debug)]
struct TokenTreeCursor {
stream: TokenStream,
index: usize,
}

impl TokenTreeCursor {
#[inline]
fn new(stream: TokenStream) -> Self {
TokenTreeCursor { stream, index: 0 }
}

#[inline]
fn get(&self) -> Option<&TokenTree> {
self.stream.get(self.index)
}

#[inline]
fn bump(&mut self) {
self.index += 1;
}
}

/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
/// use this type to emit them as a linear sequence. But a linear sequence is
Expand All @@ -282,12 +303,12 @@ struct TokenCursor {
// Cursor for the current (innermost) token stream. The delimiters for this
// token stream are found in `self.stack.last()`; when that is `None` then
// we are in the outermost token stream which never has delimiters.
tree_cursor: TokenTreeCursor,
curr: TokenTreeCursor,

// Token streams surrounding the current one. The delimiters for stack[n]'s
// tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
// because it's the outermost token stream which never has delimiters.
stack: Vec<(TokenTreeCursor, DelimSpan, DelimSpacing, Delimiter)>,
// Token streams surrounding the current one. The current position in each
// of these cursors is always a `TokenTree::Delimited`, never a
// `TokenTree::Token`.
stack: Vec<TokenTreeCursor>,
}

impl TokenCursor {
Expand All @@ -302,32 +323,33 @@ impl TokenCursor {
// FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
// #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
// below can be removed.
if let Some(tree) = self.tree_cursor.next_ref() {
if let Some(tree) = self.curr.get() {
match tree {
&TokenTree::Token(ref token, spacing) => {
debug_assert!(!matches!(
token.kind,
token::OpenDelim(_) | token::CloseDelim(_)
));
return (token.clone(), spacing);
let res = (token.clone(), spacing);
self.curr.bump();
return res;
}
&TokenTree::Delimited(sp, spacing, delim, ref tts) => {
let trees = tts.clone().into_trees();
self.stack.push((
mem::replace(&mut self.tree_cursor, trees),
sp,
spacing,
delim,
));
let trees = TokenTreeCursor::new(tts.clone());
self.stack.push(mem::replace(&mut self.curr, trees));
if !delim.skip() {
return (Token::new(token::OpenDelim(delim), sp.open), spacing.open);
}
// No open delimiter to return; continue on to the next iteration.
}
};
} else if let Some((tree_cursor, span, spacing, delim)) = self.stack.pop() {
} else if let Some(parent) = self.stack.pop() {
// We have exhausted this token stream. Move back to its parent token stream.
self.tree_cursor = tree_cursor;
let Some(&TokenTree::Delimited(span, spacing, delim, _)) = parent.get() else {
panic!("parent should be Delimited")
};
self.curr = parent;
self.curr.bump(); // move past the `Delimited`
if !delim.skip() {
return (Token::new(token::CloseDelim(delim), span.close), spacing.close);
}
Expand Down Expand Up @@ -466,7 +488,7 @@ impl<'a> Parser<'a> {
capture_cfg: false,
restrictions: Restrictions::empty(),
expected_tokens: Vec::new(),
token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() },
token_cursor: TokenCursor { curr: TokenTreeCursor::new(stream), stack: Vec::new() },
num_bump_calls: 0,
break_last_token: 0,
unmatched_angle_bracket_count: 0,
Expand Down Expand Up @@ -1192,7 +1214,7 @@ impl<'a> Parser<'a> {
if dist == 1 {
// The index is zero because the tree cursor's index always points
// to the next token to be gotten.
match self.token_cursor.tree_cursor.look_ahead(0) {
match self.token_cursor.curr.get() {
Some(tree) => {
// Indexing stayed within the current token tree.
match tree {
Expand All @@ -1207,7 +1229,8 @@ impl<'a> Parser<'a> {
None => {
// The tree cursor lookahead went (one) past the end of the
// current token tree. Try to return a close delimiter.
if let Some(&(_, span, _, delim)) = self.token_cursor.stack.last()
if let Some(last) = self.token_cursor.stack.last()
&& let Some(&TokenTree::Delimited(span, _, delim, _)) = last.get()
&& !delim.skip()
{
// We are not in the outermost token stream, so we have
Expand Down Expand Up @@ -1399,9 +1422,10 @@ impl<'a> Parser<'a> {
pub fn parse_token_tree(&mut self) -> TokenTree {
match self.token.kind {
token::OpenDelim(..) => {
// Grab the tokens within the delimiters.
let stream = self.token_cursor.tree_cursor.stream.clone();
let (_, span, spacing, delim) = *self.token_cursor.stack.last().unwrap();
// Clone the `TokenTree::Delimited` that we are currently
// within. That's what we are going to return.
let tree = self.token_cursor.stack.last().unwrap().get().unwrap().clone();
debug_assert_matches!(tree, TokenTree::Delimited(..));

// Advance the token cursor through the entire delimited
// sequence. After getting the `OpenDelim` we are *within* the
Expand All @@ -1421,7 +1445,7 @@ impl<'a> Parser<'a> {

// Consume close delimiter
self.bump();
TokenTree::Delimited(span, spacing, delim, stream)
tree
}
token::CloseDelim(_) | token::Eof => unreachable!(),
_ => {
Expand Down

0 comments on commit efad936

Please sign in to comment.