From 1cf79546970c4245a475207ab89b8fe55a2c00f2 Mon Sep 17 00:00:00 2001
From: Dhruv Manilawala <dhruvmanila@gmail.com>
Date: Wed, 20 Mar 2024 16:43:32 +0530
Subject: [PATCH] Use `TokenId` to track parser progress (#10486)

## Summary

This PR updates the parser progress mechanism to use a token id instead
of the token kind and range. The ID is stored on the Parser and is
incremented every time the `next_token` method is called.

The old logic would lead to panic if there were multiple levels of
indentation before the program ends when the lexer would emit `Dedent`
tokens for the same range. This makes it seem that the parser isn't
progressing.

## Test Plan

Tested it with the following program which gets stuck on `dhruv/parser`
because the parser isn't able to recover from the invalid mapping key
pattern:

```py
match subject:
    case {*key}:
        pass
```

The reason it's only failing for invalid programs is because otherwise
the parsing logic (`parse_block`) would consume the `Dedent` token.
---
 crates/ruff_python_parser/src/parser/mod.rs   | 15 ++++++++++++++-
 .../ruff_python_parser/src/parser/progress.rs | 19 ++++++++++++++-----
 2 files changed, 28 insertions(+), 6 deletions(-)
diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs
index 9db294cd24f9bc..2d090426dd7a2c 100644
--- a/crates/ruff_python_parser/src/parser/mod.rs
+++ b/crates/ruff_python_parser/src/parser/mod.rs
@@ -8,7 +8,7 @@ use ruff_python_ast as ast;
 use ruff_text_size::{Ranged, TextRange, TextSize};
 
 use crate::lexer::lex;
-use crate::parser::progress::ParserProgress;
+use crate::parser::progress::{ParserProgress, TokenId};
 use crate::{
     lexer::{LexResult, Spanned},
     token_set::TokenSet,
@@ -101,6 +101,10 @@ pub(crate) struct Parser<'src> {
 
     current: Spanned,
 
+    /// The ID of the current token. This is used to track the progress of the parser
+    /// to avoid infinite loops when the parser is stuck.
+    current_token_id: TokenId,
+
     /// The end of the last processed. Used to determine a node's end.
     last_token_end: TextSize,
 
@@ -164,6 +168,7 @@ impl<'src> Parser<'src> {
             recovery_context: RecoveryContext::empty(),
             last_token_end: tokens_range.start(),
             current,
+            current_token_id: TokenId::default(),
             tokens_range,
         }
     }
@@ -307,6 +312,8 @@ impl<'src> Parser<'src> {
             .next()
             .unwrap_or_else(|| (Tok::EndOfFile, TextRange::empty(self.tokens_range.end())));
 
+        self.current_token_id.increment();
+
         let current = std::mem::replace(&mut self.current, next);
 
         if !matches!(
@@ -357,6 +364,12 @@ impl<'src> Parser<'src> {
         self.current.1
     }
 
+    /// Returns the current token ID.
+    #[inline]
+    fn current_token_id(&self) -> TokenId {
+        self.current_token_id
+    }
+
     /// Eat the current token if it is of the given kind, returning `true` in
     /// that case. Otherwise, return `false`.
     fn eat(&mut self, kind: TokenKind) -> bool {
diff --git a/crates/ruff_python_parser/src/parser/progress.rs b/crates/ruff_python_parser/src/parser/progress.rs
index d21eba150434e9..8f7e8751164ecb 100644
--- a/crates/ruff_python_parser/src/parser/progress.rs
+++ b/crates/ruff_python_parser/src/parser/progress.rs
@@ -1,10 +1,19 @@
 use crate::parser::Parser;
-use crate::TokenKind;
-use ruff_text_size::TextSize;
+
+#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
+pub(super) struct TokenId(u32);
+
+impl TokenId {
+    /// Increments the value of the token ID.
+    pub(super) fn increment(&mut self) {
+        // SAFETY: We don't support files larger than 4GB, so this should never overflow.
+        self.0 = self.0.checked_add(1).expect("TokenId overflow");
+    }
+}
 
 /// Captures the progress of the parser and allows to test if the parsing is still making progress
 #[derive(Debug, Copy, Clone, Default)]
-pub(super) struct ParserProgress(Option<(TokenKind, TextSize)>);
+pub(super) struct ParserProgress(Option<TokenId>);
 
 impl ParserProgress {
     /// Returns true if the parser has passed this position
@@ -12,7 +21,7 @@ impl ParserProgress {
     fn has_progressed(self, p: &Parser) -> bool {
         match self.0 {
             None => true,
-            Some(snapshot) => snapshot != (p.current_token_kind(), p.current_token_range().start()),
+            Some(prev_token_id) => prev_token_id != p.current_token_id(),
         }
     }
 
@@ -31,6 +40,6 @@ impl ParserProgress {
             p.current_token_range(),
         );
 
-        self.0 = Some((p.current_token_kind(), p.current_token_range().start()));
+        self.0 = Some(p.current_token_id());
     }
 }