Skip to content

Commit 5078eed

Browse files
jx124sunmy2019Fidget-Spinnerpablogsal
authored
gh-104016: Fixed off by 1 error in f string tokenizer (#104047)
Co-authored-by: sunmy2019 <59365878+sunmy2019@users.noreply.github.com> Co-authored-by: Ken Jin <kenjin@python.org> Co-authored-by: Pablo Galindo <pablogsal@gmail.com>
1 parent 2d526cd commit 5078eed

File tree

3 files changed

+25
-5
lines changed

3 files changed

+25
-5
lines changed

Diff for: Lib/test/test_fstring.py

+16
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,23 @@ def test_fstring_nested_too_deeply(self):
565565
self.assertAllRaise(SyntaxError,
566566
"f-string: expressions nested too deeply",
567567
['f"{1+2:{1+2:{1+1:{1}}}}"'])
568+
569+
def create_nested_fstring(n):
570+
if n == 0:
571+
return "1+1"
572+
prev = create_nested_fstring(n-1)
573+
return f'f"{{{prev}}}"'
568574

575+
self.assertAllRaise(SyntaxError,
576+
"too many nested f-strings",
577+
[create_nested_fstring(160)])
578+
579+
def test_syntax_error_in_nested_fstring(self):
580+
# See gh-104016 for more information on this crash
581+
self.assertAllRaise(SyntaxError,
582+
"invalid syntax",
583+
['f"{1 1:' + ('{f"1:' * 199)])
584+
569585
def test_double_braces(self):
570586
self.assertEqual(f'{{', '{')
571587
self.assertEqual(f'a{{', 'a{')

Diff for: Parser/tokenizer.c

+5-2
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,12 @@
4343
#ifdef Py_DEBUG
4444
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
4545
assert(tok->tok_mode_stack_index >= 0);
46-
assert(tok->tok_mode_stack_index < MAXLEVEL);
46+
assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
4747
return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
4848
}
4949
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
5050
assert(tok->tok_mode_stack_index >= 0);
51-
assert(tok->tok_mode_stack_index < MAXLEVEL);
51+
assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
5252
return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
5353
}
5454
#else
@@ -2235,6 +2235,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
22352235

22362236
p_start = tok->start;
22372237
p_end = tok->cur;
2238+
if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
2239+
return MAKE_TOKEN(syntaxerror(tok, "too many nested f-strings"));
2240+
}
22382241
tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
22392242
the_current_tok->kind = TOK_FSTRING_MODE;
22402243
the_current_tok->f_string_quote = quote;

Diff for: Parser/tokenizer.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@ extern "C" {
1010

1111
#include "pycore_token.h" /* For token types */
1212

13-
#define MAXINDENT 100 /* Max indentation level */
14-
#define MAXLEVEL 200 /* Max parentheses level */
13+
#define MAXINDENT 100 /* Max indentation level */
14+
#define MAXLEVEL 200 /* Max parentheses level */
15+
#define MAXFSTRINGLEVEL 150 /* Max f-string nesting level */
1516

1617
enum decoding_state {
1718
STATE_INIT,
@@ -123,7 +124,7 @@ struct tok_state {
123124
enum interactive_underflow_t interactive_underflow;
124125
int report_warnings;
125126
// TODO: Factor this into its own thing
126-
tokenizer_mode tok_mode_stack[MAXLEVEL];
127+
tokenizer_mode tok_mode_stack[MAXFSTRINGLEVEL];
127128
int tok_mode_stack_index;
128129
int tok_report_warnings;
129130
#ifdef Py_DEBUG

0 commit comments

Comments
 (0)