diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index a9a2b7673887c9..5ac17095b185f5 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1870,7 +1870,7 @@ def readline(encoding): TokenInfo(type=NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1\n'), TokenInfo(type=OP, string='+', start=(1, 1), end=(1, 2), line='1+1\n'), TokenInfo(type=NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1\n'), - TokenInfo(type=NEWLINE, string='\n', start=(1, 3), end=(1, 4), line='1+1\n'), + TokenInfo(type=NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1\n'), TokenInfo(type=ENDMARKER, string='', start=(2, 0), end=(2, 0), line='') ] for encoding in ["utf-8", "latin-1", "utf-16"]: diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-06-06-11-37-53.gh-issue-105259.E2BGKL.rst b/Misc/NEWS.d/next/Core and Builtins/2023-06-06-11-37-53.gh-issue-105259.E2BGKL.rst new file mode 100644 index 00000000000000..75a63033750826 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-06-06-11-37-53.gh-issue-105259.E2BGKL.rst @@ -0,0 +1,2 @@ +Don't include newline character for trailing ``NEWLINE`` tokens emitted in +the :mod:`tokenize` module. Patch by Pablo Galindo diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index fae613e3a18c1d..89594e6974fe04 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -114,6 +114,7 @@ tok_new(void) tok->report_warnings = 1; tok->tok_extra_tokens = 0; tok->comment_newline = 0; + tok->implicit_newline = 0; tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0, .f_string_debug=0}; tok->tok_mode_stack_index = 0; tok->tok_report_warnings = 1; @@ -355,10 +356,12 @@ tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) { return -1; } strcpy(new_str + current_size, line); + tok->implicit_newline = 0; if (last_char != '\n') { /* Last line does not end in \n, fake one */ new_str[current_size + line_size - 1] = '\n'; new_str[current_size + line_size] = '\0'; + tok->implicit_newline = 1; } tok->interactive_src_start = new_str; tok->interactive_src_end = new_str + current_size + line_size; @@ -1262,11 +1265,13 @@ tok_underflow_file(struct tok_state *tok) { tok->done = E_EOF; return 0; } + tok->implicit_newline = 0; if (tok->inp[-1] != '\n') { assert(tok->inp + 1 < tok->end); /* Last line does not end in \n, fake one */ *tok->inp++ = '\n'; *tok->inp = '\0'; + tok->implicit_newline = 1; } ADVANCE_LINENO(); @@ -1304,11 +1309,13 @@ tok_underflow_readline(struct tok_state* tok) { tok->done = E_EOF; return 0; } + tok->implicit_newline = 0; if (tok->inp[-1] != '\n') { assert(tok->inp + 1 < tok->end); /* Last line does not end in \n, fake one */ *tok->inp++ = '\n'; *tok->inp = '\0'; + tok->implicit_newline = 1; } ADVANCE_LINENO(); diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 600d4297b6865a..16e919a8931edd 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -131,6 +131,7 @@ struct tok_state { int tok_report_warnings; int tok_extra_tokens; int comment_newline; + int implicit_newline; #ifdef Py_DEBUG int debug; #endif diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index a7933b2d6b0187..223de54d658507 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -243,10 +243,12 @@ tokenizeriter_next(tokenizeriterobject *it) } else if (type == NEWLINE) { Py_DECREF(str); - if (it->tok->start[0] == '\r') { - str = PyUnicode_FromString("\r\n"); - } else { - str = PyUnicode_FromString("\n"); + if (!it->tok->implicit_newline) { + if (it->tok->start[0] == '\r') { + str = PyUnicode_FromString("\r\n"); + } else { + str = PyUnicode_FromString("\n"); + } } end_col_offset++; }