Skip to content

Commit 53fc061

Browse files
authored
Fix unreachable panic in parser (#19183)
Parsing the (invalid) expression `f"{\t"i}"` caused a panic because the `TStringMiddle` character was "unreachable" due the way the parser recovered from the line continuation (it ate the t-string start). The cause of the issue is as follows: The parser begins parsing the f-string and expects to see a list of objects, essentially alternating between _interpolated elements_ and ordinary strings. It is happy to see the first left brace, but then there is a lexical error caused by the line-continuation character. So instead of the parser seeing a list of elements with just one member, it sees a list that starts like this: - Interpolated element with an invalid token, stored as a `Name` - Something else built from tokens beginning with `TStringStart` and `TStringMiddle` When it sees the `TStringStart` error recovery says "that's a list element I don't know what to do with, let's skip it". When it sees `TStringMiddle` it says "oh, that looks like the middle of _some interpolated string_ so let's try to parse it as one of the literal elements of my `FString`". Unfortunately, the function being used to parse individual list elements thinks (arguably correctly) that it's not possible to have a `TStringMiddle` sitting in your `FString`, and hits `unreachable`. Two potential ways (among many) to solve this issue are: 1. Allow a `TStringMiddle` as a valid "literal" part of an f-string during parsing (with the hope/understanding that this would only occur in an invalid context) 2. Skip the `TStringMiddle` as an "unexpected/invalid list item" in the same way that we skipped `TStringStart`. I have opted for the second approach since it seems somehow more morally correct, even though it loses more information. To implement this, the recovery context needs to know whether we are in an f-string or t-string - hence the changes to that enum. As a bonus we get slightly more specific error messages in some cases. Closes #18860
1 parent 59249f4 commit 53fc061

10 files changed

+96
-27
lines changed

crates/ruff_linter/resources/test/fixtures/ruff/RUF027_0.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,8 @@ def in_type_def():
7979
from typing import cast
8080
a = 'int'
8181
cast('f"{a}"','11')
82+
83+
# Regression test for parser bug
84+
# https://github.com/astral-sh/ruff/issues/18860
85+
def fuzz_bug():
86+
c('{\t"i}')

crates/ruff_python_parser/src/parser/expression.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1527,7 +1527,7 @@ impl<'src> Parser<'src> {
15271527
self.bump(kind.start_token());
15281528
let elements = self.parse_interpolated_string_elements(
15291529
flags,
1530-
InterpolatedStringElementsKind::Regular,
1530+
InterpolatedStringElementsKind::Regular(kind),
15311531
kind,
15321532
);
15331533

crates/ruff_python_parser/src/parser/mod.rs

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use ruff_text_size::{Ranged, TextRange, TextSize};
88
use crate::error::UnsupportedSyntaxError;
99
use crate::parser::expression::ExpressionContext;
1010
use crate::parser::progress::{ParserProgress, TokenId};
11+
use crate::string::InterpolatedStringKind;
1112
use crate::token::TokenValue;
1213
use crate::token_set::TokenSet;
1314
use crate::token_source::{TokenSource, TokenSourceCheckpoint};
@@ -799,15 +800,15 @@ impl WithItemKind {
799800
}
800801
}
801802

802-
#[derive(Debug, PartialEq, Copy, Clone)]
803+
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
803804
enum InterpolatedStringElementsKind {
804805
/// The regular f-string elements.
805806
///
806807
/// For example, the `"hello "`, `x`, and `" world"` elements in:
807808
/// ```py
808809
/// f"hello {x:.2f} world"
809810
/// ```
810-
Regular,
811+
Regular(InterpolatedStringKind),
811812

812813
/// The f-string elements are part of the format specifier.
813814
///
@@ -819,15 +820,13 @@ enum InterpolatedStringElementsKind {
819820
}
820821

821822
impl InterpolatedStringElementsKind {
822-
const fn list_terminators(self) -> TokenSet {
823+
const fn list_terminator(self) -> TokenKind {
823824
match self {
824-
InterpolatedStringElementsKind::Regular => {
825-
TokenSet::new([TokenKind::FStringEnd, TokenKind::TStringEnd])
826-
}
825+
InterpolatedStringElementsKind::Regular(string_kind) => string_kind.end_token(),
827826
// test_ok fstring_format_spec_terminator
828827
// f"hello {x:} world"
829828
// f"hello {x:.3f} world"
830-
InterpolatedStringElementsKind::FormatSpec => TokenSet::new([TokenKind::Rbrace]),
829+
InterpolatedStringElementsKind::FormatSpec => TokenKind::Rbrace,
831830
}
832831
}
833832
}
@@ -1121,7 +1120,7 @@ impl RecoveryContextKind {
11211120
.then_some(ListTerminatorKind::Regular),
11221121
},
11231122
RecoveryContextKind::InterpolatedStringElements(kind) => {
1124-
if p.at_ts(kind.list_terminators()) {
1123+
if p.at(kind.list_terminator()) {
11251124
Some(ListTerminatorKind::Regular)
11261125
} else {
11271126
// test_err unterminated_fstring_newline_recovery
@@ -1177,13 +1176,23 @@ impl RecoveryContextKind {
11771176
) || p.at_name_or_soft_keyword()
11781177
}
11791178
RecoveryContextKind::WithItems(_) => p.at_expr(),
1180-
RecoveryContextKind::InterpolatedStringElements(_) => matches!(
1181-
p.current_token_kind(),
1182-
// Literal element
1183-
TokenKind::FStringMiddle | TokenKind::TStringMiddle
1184-
// Expression element
1185-
| TokenKind::Lbrace
1186-
),
1179+
RecoveryContextKind::InterpolatedStringElements(elements_kind) => {
1180+
match elements_kind {
1181+
InterpolatedStringElementsKind::Regular(interpolated_string_kind) => {
1182+
p.current_token_kind() == interpolated_string_kind.middle_token()
1183+
|| p.current_token_kind() == TokenKind::Lbrace
1184+
}
1185+
InterpolatedStringElementsKind::FormatSpec => {
1186+
matches!(
1187+
p.current_token_kind(),
1188+
// Literal element
1189+
TokenKind::FStringMiddle | TokenKind::TStringMiddle
1190+
// Expression element
1191+
| TokenKind::Lbrace
1192+
)
1193+
}
1194+
}
1195+
}
11871196
}
11881197
}
11891198

@@ -1272,8 +1281,8 @@ impl RecoveryContextKind {
12721281
),
12731282
},
12741283
RecoveryContextKind::InterpolatedStringElements(kind) => match kind {
1275-
InterpolatedStringElementsKind::Regular => ParseErrorType::OtherError(
1276-
"Expected an f-string or t-string element or the end of the f-string or t-string".to_string(),
1284+
InterpolatedStringElementsKind::Regular(string_kind) => ParseErrorType::OtherError(
1285+
format!("Expected an element of or the end of the {string_kind}"),
12771286
),
12781287
InterpolatedStringElementsKind::FormatSpec => ParseErrorType::OtherError(
12791288
"Expected an f-string or t-string element or a '}'".to_string(),
@@ -1316,8 +1325,9 @@ bitflags! {
13161325
const WITH_ITEMS_PARENTHESIZED = 1 << 25;
13171326
const WITH_ITEMS_PARENTHESIZED_EXPRESSION = 1 << 26;
13181327
const WITH_ITEMS_UNPARENTHESIZED = 1 << 28;
1319-
const FT_STRING_ELEMENTS = 1 << 29;
1320-
const FT_STRING_ELEMENTS_IN_FORMAT_SPEC = 1 << 30;
1328+
const F_STRING_ELEMENTS = 1 << 29;
1329+
const T_STRING_ELEMENTS = 1 << 30;
1330+
const FT_STRING_ELEMENTS_IN_FORMAT_SPEC = 1 << 31;
13211331
}
13221332
}
13231333

@@ -1371,7 +1381,13 @@ impl RecoveryContext {
13711381
WithItemKind::Unparenthesized => RecoveryContext::WITH_ITEMS_UNPARENTHESIZED,
13721382
},
13731383
RecoveryContextKind::InterpolatedStringElements(kind) => match kind {
1374-
InterpolatedStringElementsKind::Regular => RecoveryContext::FT_STRING_ELEMENTS,
1384+
InterpolatedStringElementsKind::Regular(InterpolatedStringKind::FString) => {
1385+
RecoveryContext::F_STRING_ELEMENTS
1386+
}
1387+
InterpolatedStringElementsKind::Regular(InterpolatedStringKind::TString) => {
1388+
RecoveryContext::T_STRING_ELEMENTS
1389+
}
1390+
13751391
InterpolatedStringElementsKind::FormatSpec => {
13761392
RecoveryContext::FT_STRING_ELEMENTS_IN_FORMAT_SPEC
13771393
}
@@ -1442,8 +1458,11 @@ impl RecoveryContext {
14421458
RecoveryContext::WITH_ITEMS_UNPARENTHESIZED => {
14431459
RecoveryContextKind::WithItems(WithItemKind::Unparenthesized)
14441460
}
1445-
RecoveryContext::FT_STRING_ELEMENTS => RecoveryContextKind::InterpolatedStringElements(
1446-
InterpolatedStringElementsKind::Regular,
1461+
RecoveryContext::F_STRING_ELEMENTS => RecoveryContextKind::InterpolatedStringElements(
1462+
InterpolatedStringElementsKind::Regular(InterpolatedStringKind::FString),
1463+
),
1464+
RecoveryContext::T_STRING_ELEMENTS => RecoveryContextKind::InterpolatedStringElements(
1465+
InterpolatedStringElementsKind::Regular(InterpolatedStringKind::TString),
14471466
),
14481467
RecoveryContext::FT_STRING_ELEMENTS_IN_FORMAT_SPEC => {
14491468
RecoveryContextKind::InterpolatedStringElements(
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
source: crates/ruff_python_parser/src/parser/tests.rs
3+
expression: error
4+
---
5+
ParseError {
6+
error: Lexical(
7+
LineContinuationError,
8+
),
9+
location: 3..4,
10+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
---
2+
source: crates/ruff_python_parser/src/parser/tests.rs
3+
expression: error
4+
---
5+
ParseError {
6+
error: Lexical(
7+
TStringError(
8+
SingleRbrace,
9+
),
10+
),
11+
location: 8..9,
12+
}

crates/ruff_python_parser/src/parser/tests.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,26 @@ foo.bar[0].baz[2].egg??
134134
.unwrap();
135135
insta::assert_debug_snapshot!(parsed.syntax());
136136
}
137+
138+
#[test]
139+
fn test_fstring_expr_inner_line_continuation_and_t_string() {
140+
let source = r#"f'{\t"i}'"#;
141+
142+
let parsed = parse_expression(source);
143+
144+
let error = parsed.unwrap_err();
145+
146+
insta::assert_debug_snapshot!(error);
147+
}
148+
149+
#[test]
150+
fn test_fstring_expr_inner_line_continuation_newline_t_string() {
151+
let source = r#"f'{\
152+
t"i}'"#;
153+
154+
let parsed = parse_expression(source);
155+
156+
let error = parsed.unwrap_err();
157+
158+
insta::assert_debug_snapshot!(error);
159+
}

crates/ruff_python_parser/src/string.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ impl From<StringType> for Expr {
4141
}
4242
}
4343

44-
#[derive(Debug, Clone, Copy)]
44+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4545
pub(crate) enum InterpolatedStringKind {
4646
FString,
4747
TString,

crates/ruff_python_parser/tests/snapshots/invalid_syntax@f_string_lambda_without_parentheses.py.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,5 +124,5 @@ Module(
124124

125125
|
126126
1 | f"{lambda x: x}"
127-
| ^ Syntax Error: Expected an f-string or t-string element or the end of the f-string or t-string
127+
| ^ Syntax Error: Expected an element of or the end of the f-string
128128
|

crates/ruff_python_parser/tests/snapshots/invalid_syntax@implicitly_concatenated_unterminated_string_multiline.py.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ Module(
221221
2 | 'hello'
222222
3 | f'world {x}
223223
4 | )
224-
| ^ Syntax Error: Expected an f-string or t-string element or the end of the f-string or t-string
224+
| ^ Syntax Error: Expected an element of or the end of the f-string
225225
5 | 1 + 1
226226
6 | (
227227
|

crates/ruff_python_parser/tests/snapshots/invalid_syntax@t_string_lambda_without_parentheses.py.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,5 +128,5 @@ Module(
128128
|
129129
1 | # parse_options: {"target-version": "3.14"}
130130
2 | t"{lambda x: x}"
131-
| ^ Syntax Error: Expected an f-string or t-string element or the end of the f-string or t-string
131+
| ^ Syntax Error: Expected an element of or the end of the t-string
132132
|

0 commit comments

Comments
 (0)