From c1c5a05502bc845a07658ef076380e79cb074798 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Tue, 3 Oct 2023 11:42:44 +0100 Subject: [PATCH] gh-110259: Fix f-strings with multiline expressions and format specs --- Lib/test/test_tokenize.py | 61 +++++++++++++++++++ ...-10-03-11-43-48.gh-issue-110259.ka93x5.rst | 3 + Parser/tokenizer.c | 22 +++++-- 3 files changed, 80 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 94fb6d933de1144..82671167860e986 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -566,6 +566,38 @@ def test_string(self): OP '=' (3, 0) (3, 1) OP '}' (3, 1) (3, 2) FSTRING_END "'''" (3, 2) (3, 5) + """) + self.check_tokenize("""\ +f'''__{ + x:a +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + OP '{' (1, 6) (1, 7) + NL '\\n' (1, 7) (1, 8) + NAME 'x' (2, 4) (2, 5) + OP ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a' (2, 6) (2, 7) + NL '\\n' (2, 7) (2, 8) + OP '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'''" (3, 3) (3, 6) + """) + self.check_tokenize("""\ +f'__{ + x:d +}__'""", """\ + FSTRING_START "f'" (1, 0) (1, 2) + FSTRING_MIDDLE '__' (1, 2) (1, 4) + OP '{' (1, 4) (1, 5) + NL '\\n' (1, 5) (1, 6) + NAME 'x' (2, 4) (2, 5) + OP ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'd' (2, 6) (2, 7) + NL '\\n' (2, 7) (2, 8) + OP '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'" (3, 3) (3, 4) """) def test_function(self): @@ -2277,6 +2309,35 @@ def test_string(self): FSTRING_START \'f"\' (1, 0) (1, 2) FSTRING_MIDDLE 'hola\\\\\\\\\\\\r\\\\ndfgf' (1, 2) (1, 16) FSTRING_END \'"\' (1, 16) (1, 17) + """) + + self.check_tokenize("""\ +f'''__{ + x:a +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + LBRACE '{' (1, 6) (1, 7) + NAME 'x' (2, 4) (2, 5) + COLON ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a' (2, 6) (2, 7) + RBRACE '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'''" (3, 3) (3, 6) + """) + self.check_tokenize("""\ +f'__{ + x:d +}__'""", """\ + FSTRING_START "f'" (1, 0) (1, 2) + FSTRING_MIDDLE '__' (1, 2) (1, 4) + LBRACE '{' (1, 4) (1, 5) + NAME 'x' (2, 4) (2, 5) + COLON ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'd' (2, 6) (2, 7) + RBRACE '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'" (3, 3) (3, 4) """) def test_function(self): diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst new file mode 100644 index 000000000000000..55c743d0e4917e5 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst @@ -0,0 +1,3 @@ +Correctly identify the format spec in f-strings (with single or triple +quotes) that have multiple lines in the expression part and include a +formatting spec. Patch by Pablo Galindo diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 41d0d16a471dd60..6b4bcb6a82c5bfa 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2690,11 +2690,26 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct if (tok->done == E_ERROR) { return MAKE_TOKEN(ERRORTOKEN); } + int in_format_spec = ( + current_tok->last_expr_end != -1 + && + INSIDE_FSTRING_EXPR(current_tok) + ); + + // If we are in a format spec and we found a newline, + // it means that the format spec ends here and we should + // return to the regular mode. + if (in_format_spec && c == '\n') { + tok_backup(tok, c); + TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(FSTRING_MIDDLE); + } if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) { if (tok->decoding_erred) { return MAKE_TOKEN(ERRORTOKEN); } - assert(tok->multi_line_start != NULL); // shift the tok_state's location into // the start of string, and report the error @@ -2726,11 +2741,6 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct end_quote_size = 0; } - int in_format_spec = ( - current_tok->last_expr_end != -1 - && - INSIDE_FSTRING_EXPR(current_tok) - ); if (c == '{') { int peek = tok_nextc(tok); if (peek != '{' || in_format_spec) {