From d5a97074d24cd14cb2a35a2b1ad3074863cde264 Mon Sep 17 00:00:00 2001 From: chgnrdv <52372310+chgnrdv@users.noreply.github.com> Date: Mon, 1 May 2023 18:26:43 +0300 Subject: [PATCH] gh-103824: fix use-after-free error in Parser/tokenizer.c (#103993) --- Lib/test/test_tokenize.py | 15 ++++++++++++++- Parser/tokenizer.c | 4 ++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 283a7c23609e67..911b53e5816588 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -11,7 +11,7 @@ from test.test_grammar import (VALID_UNDERSCORE_LITERALS, INVALID_UNDERSCORE_LITERALS) from test.support import os_helper -from test.support.script_helper import run_test_script, make_script +from test.support.script_helper import run_test_script, make_script, run_python_until_end import os import token @@ -1470,6 +1470,19 @@ def test_comment_at_the_end_of_the_source_without_newline(self): self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]]) self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER]) + def test_invalid_character_in_fstring_middle(self): + # See gh-103824 + script = b'''F""" + \xe5"""''' + + with os_helper.temp_dir() as temp_dir: + filename = os.path.join(temp_dir, "script.py") + with open(filename, 'wb') as file: + file.write(script) + rs, _ = run_python_until_end(filename) + self.assertIn(b"SyntaxError", rs.err) + + class UntokenizeTest(TestCase): def test_bad_input_order(self): diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 8de0572a1fc459..8fb9be7bfd0182 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2552,6 +2552,10 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct while (end_quote_size != current_tok->f_string_quote_size) { int c = tok_nextc(tok); if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) { + if (tok->decoding_erred) { + return MAKE_TOKEN(ERRORTOKEN); + } + assert(tok->multi_line_start != NULL); // shift the tok_state's location into // the start of string, and report the error