From 1cf79546970c4245a475207ab89b8fe55a2c00f2 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Wed, 20 Mar 2024 16:43:32 +0530 Subject: [PATCH] Use `TokenId` to track parser progress (#10486) ## Summary This PR updates the parser progress mechanism to use a token id instead of the token kind and range. The ID is stored on the Parser and is incremented every time the `next_token` method is called. The old logic would lead to panic if there were multiple levels of indentation before the program ends when the lexer would emit `Dedent` tokens for the same range. This makes it seem that the parser isn't progressing. ## Test Plan Tested it with the following program which gets stuck on `dhruv/parser` because the parser isn't able to recover from the invalid mapping key pattern: ```py match subject: case {*key}: pass ``` The reason it's only failing for invalid programs is because otherwise the parsing logic (`parse_block`) would consume the `Dedent` token. --- crates/ruff_python_parser/src/parser/mod.rs | 15 ++++++++++++++- .../ruff_python_parser/src/parser/progress.rs | 19 ++++++++++++++----- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs index 9db294cd24f9bc..2d090426dd7a2c 100644 --- a/crates/ruff_python_parser/src/parser/mod.rs +++ b/crates/ruff_python_parser/src/parser/mod.rs @@ -8,7 +8,7 @@ use ruff_python_ast as ast; use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::lexer::lex; -use crate::parser::progress::ParserProgress; +use crate::parser::progress::{ParserProgress, TokenId}; use crate::{ lexer::{LexResult, Spanned}, token_set::TokenSet, @@ -101,6 +101,10 @@ pub(crate) struct Parser<'src> { current: Spanned, + /// The ID of the current token. This is used to track the progress of the parser + /// to avoid infinite loops when the parser is stuck. + current_token_id: TokenId, + /// The end of the last processed. Used to determine a node's end. last_token_end: TextSize, @@ -164,6 +168,7 @@ impl<'src> Parser<'src> { recovery_context: RecoveryContext::empty(), last_token_end: tokens_range.start(), current, + current_token_id: TokenId::default(), tokens_range, } } @@ -307,6 +312,8 @@ impl<'src> Parser<'src> { .next() .unwrap_or_else(|| (Tok::EndOfFile, TextRange::empty(self.tokens_range.end()))); + self.current_token_id.increment(); + let current = std::mem::replace(&mut self.current, next); if !matches!( @@ -357,6 +364,12 @@ impl<'src> Parser<'src> { self.current.1 } + /// Returns the current token ID. + #[inline] + fn current_token_id(&self) -> TokenId { + self.current_token_id + } + /// Eat the current token if it is of the given kind, returning `true` in /// that case. Otherwise, return `false`. fn eat(&mut self, kind: TokenKind) -> bool { diff --git a/crates/ruff_python_parser/src/parser/progress.rs b/crates/ruff_python_parser/src/parser/progress.rs index d21eba150434e9..8f7e8751164ecb 100644 --- a/crates/ruff_python_parser/src/parser/progress.rs +++ b/crates/ruff_python_parser/src/parser/progress.rs @@ -1,10 +1,19 @@ use crate::parser::Parser; -use crate::TokenKind; -use ruff_text_size::TextSize; + +#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] +pub(super) struct TokenId(u32); + +impl TokenId { + /// Increments the value of the token ID. + pub(super) fn increment(&mut self) { + // SAFETY: We don't support files larger than 4GB, so this should never overflow. + self.0 = self.0.checked_add(1).expect("TokenId overflow"); + } +} /// Captures the progress of the parser and allows to test if the parsing is still making progress #[derive(Debug, Copy, Clone, Default)] -pub(super) struct ParserProgress(Option<(TokenKind, TextSize)>); +pub(super) struct ParserProgress(Option); impl ParserProgress { /// Returns true if the parser has passed this position @@ -12,7 +21,7 @@ impl ParserProgress { fn has_progressed(self, p: &Parser) -> bool { match self.0 { None => true, - Some(snapshot) => snapshot != (p.current_token_kind(), p.current_token_range().start()), + Some(prev_token_id) => prev_token_id != p.current_token_id(), } } @@ -31,6 +40,6 @@ impl ParserProgress { p.current_token_range(), ); - self.0 = Some((p.current_token_kind(), p.current_token_range().start())); + self.0 = Some(p.current_token_id()); } }