From cc318ee7a9481b99e461107e673225011ba12457 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 19 Apr 2020 16:07:07 -0700 Subject: [PATCH 01/31] bpo-28002: Roundtrip f-strings with ast.unparse better By attempting to avoid backslashes in f-string expressions. We also now proactively raise errors for some backslashes we can't avoid while unparsing FormattedValues --- Lib/ast.py | 38 ++++++++++++++++++++++++++++++++++---- Lib/test/test_unparse.py | 26 ++++++++++++++++---------- 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 2edb7171e96719..63f48377765f3f 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -25,6 +25,7 @@ :license: Python License. """ import sys +import unicodedata from _ast import * from contextlib import contextmanager, nullcontext from enum import IntEnum, auto @@ -646,7 +647,8 @@ class _Unparser(NodeVisitor): output source code for the abstract syntax; original formatting is disregarded.""" - def __init__(self): + def __init__(self, avoid_backslashes=False): + self.avoid_backslashes = avoid_backslashes self._source = [] self._buffer = [] self._precedences = {} @@ -1046,15 +1048,39 @@ def visit_AsyncWith(self, node): with self.block(extra=self.get_type_comment(node)): self.traverse(node.body) + def _write_str_avoiding_backslashes(self, value): + """Write string literal value with a best effort attempt to avoid backslashes.""" + # str.__repr__ will escape backslashes, quotes, \n, \r, \t and non-printable characters + # We'll handle quotes, \n, \t and space, using triple quotes if necessary, but we'll + # just let repr handle any other unicode control and separator characters. + def should_use_repr(c): + return c == '\\' or ( + # This logic for determining non-printable characters is based + # on that in Tools/unicode/makeunicodedata.py + c not in (' ', '\n', '\t') and unicodedata.category(c)[0] in ("C", "Z") + ) + + if not any(should_use_repr(c) for c in value): + if "\n" in value: + quote_types = ["'''", '"""'] + else: + quote_types = ["'", '"', '"""', "'''"] + + for quote_type in quote_types: + if quote_type not in value: + self.write(f"{quote_type}{value}{quote_type}") + return + self.write(repr(value)) + def visit_JoinedStr(self, node): self.write("f") self._fstring_JoinedStr(node, self.buffer_writer) - self.write(repr(self.buffer)) + self._write_str_avoiding_backslashes(self.buffer) def visit_FormattedValue(self, node): self.write("f") self._fstring_FormattedValue(node, self.buffer_writer) - self.write(repr(self.buffer)) + self._write_str_avoiding_backslashes(self.buffer) def _fstring_JoinedStr(self, node, write): for value in node.values: @@ -1069,11 +1095,13 @@ def _fstring_Constant(self, node, write): def _fstring_FormattedValue(self, node, write): write("{") - unparser = type(self)() + unparser = type(self)(avoid_backslashes=True) unparser.set_precedence(_Precedence.TEST.next(), node.value) expr = unparser.visit(node.value) if expr.startswith("{"): write(" ") # Separate pair of opening brackets as "{ {" + if "\\" in expr: + raise ValueError("Unable to avoid backslash in f-string expression part") write(expr) if node.conversion != -1: conversion = chr(node.conversion) @@ -1117,6 +1145,8 @@ def _write_constant(self, value): if isinstance(value, (float, complex)): # Substitute overflowing decimal literal for AST infinities. self.write(repr(value).replace("inf", _INFSTR)) + elif self.avoid_backslashes and isinstance(value, str): + self._write_str_avoiding_backslashes(value) else: self.write(repr(value)) diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index 6d828721b7740e..a6429d18823a03 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -153,6 +153,15 @@ def test_fstrings(self): self.check_ast_roundtrip(r"""f'{f"{0}"*3}'""") self.check_ast_roundtrip(r"""f'{f"{y}"*3}'""") + def test_fstrings_complicated(self): + # See issue 28002 + self.check_ast_roundtrip("""f'''{"'"}'''""") + self.check_ast_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''') + self.check_ast_roundtrip('f"""{\'\'\'\n\'\'\'}"""') + self.check_ast_roundtrip('f"""{g(\'\'\'\n\'\'\')}"""') + self.check_ast_roundtrip('''f"a\\r\\nb"''') + self.check_ast_roundtrip('''f"\\u2028{'x'}"''') + def test_strings(self): self.check_ast_roundtrip("u'foo'") self.check_ast_roundtrip("r'foo'") @@ -308,6 +317,9 @@ def test_invalid_fstring_conversion(self): ) ) + def test_invalid_fstring_backslash(self): + self.check_invalid(ast.FormattedValue(value=ast.Constant(value="\\\\"))) + def test_invalid_set(self): self.check_invalid(ast.Set(elts=[])) @@ -413,7 +425,6 @@ def test_simple_expressions_parens(self): self.check_src_roundtrip("call((yield x))") self.check_src_roundtrip("return x + (yield x)") - def test_class_bases_and_keywords(self): self.check_src_roundtrip("class X:\n pass") self.check_src_roundtrip("class X(A):\n pass") @@ -426,6 +437,10 @@ def test_class_bases_and_keywords(self): self.check_src_roundtrip("class X(*args):\n pass") self.check_src_roundtrip("class X(*args, **kwargs):\n pass") + def test_fstrings(self): + self.check_src_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''') + self.check_src_roundtrip('''f"\\u2028{'x'}"''') + def test_docstrings(self): docstrings = ( '"""simple doc string"""', @@ -475,7 +490,6 @@ class DirectoryTestCase(ASTTestCase): lib_dir = pathlib.Path(__file__).parent / ".." test_directories = (lib_dir, lib_dir / "test") - skip_files = {"test_fstring.py"} run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py", "test_ast.py", "test_asdl_parser.py"} @@ -517,14 +531,6 @@ def test_files(self): if test.support.verbose: print(f"Testing {item.absolute()}") - # Some f-strings are not correctly round-tripped by - # Tools/parser/unparse.py. See issue 28002 for details. - # We need to skip files that contain such f-strings. - if item.name in self.skip_files: - if test.support.verbose: - print(f"Skipping {item.absolute()}: see issue 28002") - continue - with self.subTest(filename=item): source = read_pyfile(item) self.check_ast_roundtrip(source) From 4743d8df71704cf735ed91d13a2c5d2897f1a2cb Mon Sep 17 00:00:00 2001 From: Shantanu Date: Thu, 21 May 2020 13:03:12 -0700 Subject: [PATCH 02/31] Update Lib/ast.py Co-authored-by: Batuhan Taskaya --- Lib/ast.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 63f48377765f3f..955075313a3ae4 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1059,18 +1059,23 @@ def should_use_repr(c): # on that in Tools/unicode/makeunicodedata.py c not in (' ', '\n', '\t') and unicodedata.category(c)[0] in ("C", "Z") ) + use_repr = any(map(should_use_repr, value)) + + if not use_repr: + quote_types = ["'", '"', '"""', "'''"] - if not any(should_use_repr(c) for c in value): if "\n" in value: - quote_types = ["'''", '"""'] - else: - quote_types = ["'", '"', '"""', "'''"] + quote_types = quote_types[2:] for quote_type in quote_types: if quote_type not in value: self.write(f"{quote_type}{value}{quote_type}") - return - self.write(repr(value)) + break + else: + use_repr = True + + if use_repr: + self.write(repr(value)) def visit_JoinedStr(self, node): self.write("f") From 98878ba2dcba96717030657daab966634970916d Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Fri, 22 May 2020 15:07:25 -0700 Subject: [PATCH 03/31] _Unparser: make avoid_backslashes keyword only --- Lib/ast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/ast.py b/Lib/ast.py index 955075313a3ae4..4c495bbb798a7e 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -647,7 +647,7 @@ class _Unparser(NodeVisitor): output source code for the abstract syntax; original formatting is disregarded.""" - def __init__(self, avoid_backslashes=False): + def __init__(self, *, avoid_backslashes=False): self.avoid_backslashes = avoid_backslashes self._source = [] self._buffer = [] From a0a7b260bb33787ca77bd9fab20372c98d4b357e Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Fri, 22 May 2020 15:09:07 -0700 Subject: [PATCH 04/31] test_unparse: add test_fstring to run_always_files --- Lib/test/test_unparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index a6429d18823a03..3b4ff636d2ee6e 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -491,7 +491,7 @@ class DirectoryTestCase(ASTTestCase): lib_dir = pathlib.Path(__file__).parent / ".." test_directories = (lib_dir, lib_dir / "test") run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py", - "test_ast.py", "test_asdl_parser.py"} + "test_ast.py", "test_asdl_parser.py", "test_fstring.py"} _files_to_test = None From 0c95e56657cd27579328fd2c370f0ad7a76c7fd0 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Tue, 2 Jun 2020 17:43:54 -0700 Subject: [PATCH 05/31] unparse fstrings: improve cosmetics by escaping whitespace --- Lib/ast.py | 72 +++++++++++++++++++++++++++++++--------- Lib/test/test_unparse.py | 2 ++ 2 files changed, 58 insertions(+), 16 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 4c495bbb798a7e..d79cfcde8c3654 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1048,8 +1048,13 @@ def visit_AsyncWith(self, node): with self.block(extra=self.get_type_comment(node)): self.traverse(node.body) - def _write_str_avoiding_backslashes(self, value): - """Write string literal value with a best effort attempt to avoid backslashes.""" + def _str_literal_helper(self, value, quote_types, use_escaped_whitespace=False): + """Helper for writing string literals without using repr. + + Returns (possible quote types, string literal to write). + Throws ValueError if we need to use repr to write this string literal. + + """ # str.__repr__ will escape backslashes, quotes, \n, \r, \t and non-printable characters # We'll handle quotes, \n, \t and space, using triple quotes if necessary, but we'll # just let repr handle any other unicode control and separator characters. @@ -1059,28 +1064,63 @@ def should_use_repr(c): # on that in Tools/unicode/makeunicodedata.py c not in (' ', '\n', '\t') and unicodedata.category(c)[0] in ("C", "Z") ) - use_repr = any(map(should_use_repr, value)) - if not use_repr: - quote_types = ["'", '"', '"""', "'''"] + if any(map(should_use_repr, value)): + raise ValueError - if "\n" in value: - quote_types = quote_types[2:] + if use_escaped_whitespace: + value = value.replace("\n", "\\n") + value = value.replace("\t", "\\t") - for quote_type in quote_types: - if quote_type not in value: - self.write(f"{quote_type}{value}{quote_type}") - break - else: - use_repr = True + if "\n" in value: + quote_types = [quote for quote in quote_types if quote in ('"""', "'''")] + quote_types = [quote for quote in quote_types if quote not in value] + if not quote_types: + raise ValueError + return quote_types, value - if use_repr: + def _write_str_avoiding_backslashes(self, value): + """Write string literal value with a best effort attempt to avoid backslashes.""" + try: + quote_types, value = self._str_literal_helper(value, ["'", '"', '"""', "'''"]) + quote_type = quote_types[0] + self.write(f"{quote_type}{value}{quote_type}") + except ValueError: self.write(repr(value)) def visit_JoinedStr(self, node): self.write("f") - self._fstring_JoinedStr(node, self.buffer_writer) - self._write_str_avoiding_backslashes(self.buffer) + if self.avoid_backslashes: + self._fstring_JoinedStr(node, self.buffer_writer) + self._write_str_avoiding_backslashes(self.buffer) + return + + # If we don't need to avoid backslashes globally (i.e., we only need + # to avoid them inside FormattedValues), it's cosmetically preferred + # to use escaped whitespace. That is, it's preferred to use backslashes + # for cases like: f"{x}\n". To accomplish this, we keep track of what + # in our buffer corresponds to FormattedValues and what corresponds to + # Constant parts of the f-string, and allow escapes accordingly. + buffer = [] + for value in node.values: + meth = getattr(self, "_fstring_" + type(value).__name__) + meth(value, self.buffer_writer) + buffer.append((self.buffer, isinstance(value, Constant))) + # This part is analagous to _write_str_avoiding_backslashes + try: + new_buffer = [] + quote_types = ["'", '"', '"""', "'''"] + for i in range(len(buffer)): + value, is_constant = buffer[i] + quote_types, value = self._str_literal_helper( + value, quote_types, use_escaped_whitespace=is_constant + ) + new_buffer.append(value) + quote_type = quote_types[0] + value = "".join(new_buffer) + self.write(f"{quote_type}{value}{quote_type}") + except ValueError: + self.write(repr("".join(b[0] for b in buffer))) def visit_FormattedValue(self, node): self.write("f") diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index 3b4ff636d2ee6e..6f3b1da9a969e7 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -440,6 +440,8 @@ def test_class_bases_and_keywords(self): def test_fstrings(self): self.check_src_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''') self.check_src_roundtrip('''f"\\u2028{'x'}"''') + self.check_src_roundtrip(r"f'{x}\n'") + self.check_src_roundtrip('''f''\'{"""\n"""}\\n''\'''') def test_docstrings(self): docstrings = ( From ac49be13d85b435a5ab2c022d971fdccfbcc4f5e Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Tue, 2 Jun 2020 18:11:36 -0700 Subject: [PATCH 06/31] unparse: add another test case --- Lib/test/test_unparse.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index 6f3b1da9a969e7..94b6a0bb8f2fff 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -442,6 +442,7 @@ def test_fstrings(self): self.check_src_roundtrip('''f"\\u2028{'x'}"''') self.check_src_roundtrip(r"f'{x}\n'") self.check_src_roundtrip('''f''\'{"""\n"""}\\n''\'''') + self.check_src_roundtrip('''f''\'{f"""{x}\n"""}\\n''\'''') def test_docstrings(self): docstrings = ( From 714a2697588228b608843216cd945a93c5534808 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Tue, 2 Jun 2020 18:56:32 -0700 Subject: [PATCH 07/31] unparse: fix final quote escaping issues The previous cosmetic fix means we use triple quotes less often, so this is less likely to occur in practice, but should be fixed anyway. --- Lib/ast.py | 11 ++++++++++- Lib/test/test_unparse.py | 3 +++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Lib/ast.py b/Lib/ast.py index d79cfcde8c3654..7cdb27b0376973 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1077,6 +1077,9 @@ def should_use_repr(c): quote_types = [quote for quote in quote_types if quote not in value] if not quote_types: raise ValueError + if value: + # Sort so that we prefer '''"''' over """\"""" + quote_types.sort(key=lambda q: q[0] == value[-1]) return quote_types, value def _write_str_avoiding_backslashes(self, value): @@ -1084,6 +1087,9 @@ def _write_str_avoiding_backslashes(self, value): try: quote_types, value = self._str_literal_helper(value, ["'", '"', '"""', "'''"]) quote_type = quote_types[0] + # If we're using triple quotes and we'd need to escape a final quote, raise + if value and quote_type[0] == value[-1]: + raise ValueError self.write(f"{quote_type}{value}{quote_type}") except ValueError: self.write(repr(value)) @@ -1116,8 +1122,11 @@ def visit_JoinedStr(self, node): value, quote_types, use_escaped_whitespace=is_constant ) new_buffer.append(value) - quote_type = quote_types[0] value = "".join(new_buffer) + quote_type = quote_types[0] + # If we're using triple quotes and we'd need to escape a final quote, escape + if value and quote_type[0] == value[-1]: + value = value[:-1] + "\\" + value[-1] self.write(f"{quote_type}{value}{quote_type}") except ValueError: self.write(repr("".join(b[0] for b in buffer))) diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index 94b6a0bb8f2fff..5986636902bee9 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -152,11 +152,14 @@ def test_fstrings(self): # See issue 25180 self.check_ast_roundtrip(r"""f'{f"{0}"*3}'""") self.check_ast_roundtrip(r"""f'{f"{y}"*3}'""") + self.check_ast_roundtrip("""f''""") + self.check_ast_roundtrip('''f"""'end' "quote\\""""''') def test_fstrings_complicated(self): # See issue 28002 self.check_ast_roundtrip("""f'''{"'"}'''""") self.check_ast_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''') + self.check_ast_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-'single quote\\'\'\'\'''') self.check_ast_roundtrip('f"""{\'\'\'\n\'\'\'}"""') self.check_ast_roundtrip('f"""{g(\'\'\'\n\'\'\')}"""') self.check_ast_roundtrip('''f"a\\r\\nb"''') From 85521c0a27e3305bdc2f2efe880be90e7341d2b5 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Tue, 2 Jun 2020 19:18:37 -0700 Subject: [PATCH 08/31] unparse: simplify, share code with docstring writing --- Lib/ast.py | 122 +++++++++++++++------------------------ Lib/test/test_unparse.py | 8 ++- 2 files changed, 53 insertions(+), 77 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 7cdb27b0376973..976ca7f12b31d6 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1048,51 +1048,46 @@ def visit_AsyncWith(self, node): with self.block(extra=self.get_type_comment(node)): self.traverse(node.body) - def _str_literal_helper(self, value, quote_types, use_escaped_whitespace=False): - """Helper for writing string literals without using repr. - + def _str_literal_helper( + self, value, quote_types=("'", '"', '"""', "'''"), escape="" + ): + """Helper for writing string literals, minimising escapes. Returns (possible quote types, string literal to write). - Throws ValueError if we need to use repr to write this string literal. - """ - # str.__repr__ will escape backslashes, quotes, \n, \r, \t and non-printable characters - # We'll handle quotes, \n, \t and space, using triple quotes if necessary, but we'll - # just let repr handle any other unicode control and separator characters. - def should_use_repr(c): - return c == '\\' or ( - # This logic for determining non-printable characters is based - # on that in Tools/unicode/makeunicodedata.py - c not in (' ', '\n', '\t') and unicodedata.category(c)[0] in ("C", "Z") - ) - - if any(map(should_use_repr, value)): - raise ValueError - - if use_escaped_whitespace: - value = value.replace("\n", "\\n") - value = value.replace("\t", "\\t") - - if "\n" in value: - quote_types = [quote for quote in quote_types if quote in ('"""', "'''")] - quote_types = [quote for quote in quote_types if quote not in value] - if not quote_types: - raise ValueError - if value: + # Escape characters we've been told to escape and any non-printable + # characters. The logic for determining non-printable characters is + # based on that in Tools/unicode/makeunicodedata.py + escape = set(escape) | { + c for c in value + if c not in ' \n\t' and unicodedata.category(c)[0] in ("C", "Z") + } + qts = quote_types + val = value.replace("\\", "\\\\") + for c in escape: + val = val.replace(c, c.encode('unicode_escape').decode('ascii')) + if "\n" in val: + qts = [quote for quote in qts if quote in ('"""', "'''")] + qts = [quote for quote in qts if quote not in val] + if not qts: + # If there aren't any possible quote_types, fallback to using repr + # on the original value. Try to use a quote_type from quote_types. + value = repr(value) + quote_type = next((q for q in quote_types if value[0] in q), value[0]) + return value[1:-1], [quote_type] + if val: # Sort so that we prefer '''"''' over """\"""" - quote_types.sort(key=lambda q: q[0] == value[-1]) - return quote_types, value + qts.sort(key=lambda q: q[0] == val[-1]) + # If we're using triple quotes and we'd need to escape a final quote, escape + if qts[0][0] == val[-1]: + assert len(qts[0]) == 3 + val = val[:-1] + "\\" + val[-1] + return val, qts def _write_str_avoiding_backslashes(self, value): """Write string literal value with a best effort attempt to avoid backslashes.""" - try: - quote_types, value = self._str_literal_helper(value, ["'", '"', '"""', "'''"]) - quote_type = quote_types[0] - # If we're using triple quotes and we'd need to escape a final quote, raise - if value and quote_type[0] == value[-1]: - raise ValueError - self.write(f"{quote_type}{value}{quote_type}") - except ValueError: - self.write(repr(value)) + value, quote_types = self._str_literal_helper(value) + quote_type = quote_types[0] + self.write(f"{quote_type}{value}{quote_type}") def visit_JoinedStr(self, node): self.write("f") @@ -1113,23 +1108,16 @@ def visit_JoinedStr(self, node): meth(value, self.buffer_writer) buffer.append((self.buffer, isinstance(value, Constant))) # This part is analagous to _write_str_avoiding_backslashes - try: - new_buffer = [] - quote_types = ["'", '"', '"""', "'''"] - for i in range(len(buffer)): - value, is_constant = buffer[i] - quote_types, value = self._str_literal_helper( - value, quote_types, use_escaped_whitespace=is_constant - ) - new_buffer.append(value) - value = "".join(new_buffer) - quote_type = quote_types[0] - # If we're using triple quotes and we'd need to escape a final quote, escape - if value and quote_type[0] == value[-1]: - value = value[:-1] + "\\" + value[-1] - self.write(f"{quote_type}{value}{quote_type}") - except ValueError: - self.write(repr("".join(b[0] for b in buffer))) + new_buffer = [] + quote_types = ["'", '"', '"""', "'''"] + for value, is_constant in buffer: + value, quote_types = self._str_literal_helper( + value, quote_types, escape='\n\t' if is_constant else '' + ) + new_buffer.append(value) + value = "".join(new_buffer) + quote_type = quote_types[0] + self.write(f"{quote_type}{value}{quote_type}") def visit_FormattedValue(self, node): self.write("f") @@ -1172,28 +1160,12 @@ def visit_Name(self, node): self.write(node.id) def _write_docstring(self, node): - def esc_char(c): - if c in ("\n", "\t"): - # In the AST form, we don't know the author's intentation - # about how this should be displayed. We'll only escape - # \n and \t, because they are more likely to be unescaped - # in the source - return c - return c.encode('unicode_escape').decode('ascii') - self.fill() if node.kind == "u": self.write("u") - - value = node.value - if value: - # Preserve quotes in the docstring by escaping them - value = "".join(map(esc_char, value)) - if value[-1] == '"': - value = value.replace('"', '\\"', -1) - value = value.replace('"""', '""\\"') - - self.write(f'"""{value}"""') + value, quote_types = self._str_literal_helper(node.value, ('"""', "'''")) + quote_type = quote_types[0] + self.write(f"{quote_type}{value}{quote_type}") def _write_constant(self, value): if isinstance(value, (float, complex)): diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index 5986636902bee9..8f23b5b229357a 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -342,8 +342,8 @@ def test_docstrings(self): '\r\\r\t\\t\n\\n', '""">>> content = \"\"\"blabla\"\"\" <<<"""', r'foo\n\x00', - '🐍⛎𩸽üéş^\X\BB\N{LONG RIGHTWARDS SQUIGGLE ARROW}' - + "' \\'\\'\\'\"\"\" \"\"\\'\\' \\'", + '🐍⛎𩸽üéş^\\\\X\\\\BB\N{LONG RIGHTWARDS SQUIGGLE ARROW}' ) for docstring in docstrings: # check as Module docstrings for easy testing @@ -461,6 +461,10 @@ def test_docstrings(self): '""""""', '"""\'\'\'"""', '"""\'\'\'\'\'\'"""', + '"""🐍⛎𩸽üéş^\\\\X\\\\BB⟿"""', + '"""end in single \'quote\'"""', + "'''end in double \"quote\"'''", + '"""almost end in double "quote"."""', ) for prefix in docstring_prefixes: From 4e4a4b49d1c11fbb1857bb4bb55078509750eee7 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Wed, 3 Jun 2020 12:43:57 -0700 Subject: [PATCH 09/31] remove comment --- Lib/ast.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/ast.py b/Lib/ast.py index f2820a370230a6..95ff66ff5ce7cb 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1114,7 +1114,6 @@ def visit_JoinedStr(self, node): meth = getattr(self, "_fstring_" + type(value).__name__) meth(value, self.buffer_writer) buffer.append((self.buffer, isinstance(value, Constant))) - # This part is analagous to _write_str_avoiding_backslashes new_buffer = [] quote_types = ["'", '"', '"""', "'''"] for value, is_constant in buffer: From 91430f5f2fcdf331ff38bf0279cb0b9f44e06115 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Thu, 4 Jun 2020 12:04:59 -0700 Subject: [PATCH 10/31] reduce assignments to qts --- Lib/ast.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 95ff66ff5ce7cb..6da9b908b31b23 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1068,13 +1068,12 @@ def _str_literal_helper( c for c in value if c not in ' \n\t' and unicodedata.category(c)[0] in ("C", "Z") } - qts = quote_types val = value.replace("\\", "\\\\") for c in escape: val = val.replace(c, c.encode('unicode_escape').decode('ascii')) + qts = [quote for quote in quote_types if quote not in val] if "\n" in val: qts = [quote for quote in qts if quote in ('"""', "'''")] - qts = [quote for quote in qts if quote not in val] if not qts: # If there aren't any possible quote_types, fallback to using repr # on the original value. Try to use a quote_type from quote_types. From 81ff3cbbba73e57969131989127e6cce198ca7fe Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Thu, 4 Jun 2020 12:10:50 -0700 Subject: [PATCH 11/31] use join instead of multiple replace --- Lib/ast.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 6da9b908b31b23..01df9b7387ccf8 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1069,8 +1069,9 @@ def _str_literal_helper( if c not in ' \n\t' and unicodedata.category(c)[0] in ("C", "Z") } val = value.replace("\\", "\\\\") - for c in escape: - val = val.replace(c, c.encode('unicode_escape').decode('ascii')) + val = "".join( + (c.encode('unicode_escape').decode('ascii') if c in escape else c) for c in val + ) qts = [quote for quote in quote_types if quote not in val] if "\n" in val: qts = [quote for quote in qts if quote in ('"""', "'''")] From bd054287a70c04c13f5a483837647fe919d2d9ef Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Thu, 4 Jun 2020 12:15:55 -0700 Subject: [PATCH 12/31] use write_str_avoiding_backslashes for docstrings --- Lib/ast.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 01df9b7387ccf8..bf7a944c3360ac 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1090,9 +1090,9 @@ def _str_literal_helper( val = val[:-1] + "\\" + val[-1] return val, qts - def _write_str_avoiding_backslashes(self, value): + def _write_str_avoiding_backslashes(self, value, **kwargs): """Write string literal value with a best effort attempt to avoid backslashes.""" - value, quote_types = self._str_literal_helper(value) + value, quote_types = self._str_literal_helper(value, **kwargs) quote_type = quote_types[0] self.write(f"{quote_type}{value}{quote_type}") @@ -1169,9 +1169,7 @@ def _write_docstring(self, node): self.fill() if node.kind == "u": self.write("u") - value, quote_types = self._str_literal_helper(node.value, ('"""', "'''")) - quote_type = quote_types[0] - self.write(f"{quote_type}{value}{quote_type}") + self._write_str_avoiding_backslashes(node.value, quote_types=('"""', "'''")) def _write_constant(self, value): if isinstance(value, (float, complex)): From cd2c1074571178188a9fc3c569b5d66615411d86 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Thu, 4 Jun 2020 12:35:46 -0700 Subject: [PATCH 13/31] don't use replace for backslashes --- Lib/ast.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index bf7a944c3360ac..3c2d1a54457d02 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1061,16 +1061,15 @@ def _str_literal_helper( """Helper for writing string literals, minimising escapes. Returns (possible quote types, string literal to write). """ - # Escape characters we've been told to escape and any non-printable - # characters. The logic for determining non-printable characters is - # based on that in Tools/unicode/makeunicodedata.py - escape = set(escape) | { + # Escape characters we've been told to escape, backslashes, and any + # non-printable characters. The logic for determining non-printable + # characters is based on that in Tools/unicode/makeunicodedata.py + escape = {*escape, '\\'} | { c for c in value if c not in ' \n\t' and unicodedata.category(c)[0] in ("C", "Z") } - val = value.replace("\\", "\\\\") val = "".join( - (c.encode('unicode_escape').decode('ascii') if c in escape else c) for c in val + (c.encode('unicode_escape').decode('ascii') if c in escape else c) for c in value ) qts = [quote for quote in quote_types if quote not in val] if "\n" in val: From 4a7037aa86c3d7a03b9c1e1c0ff0c6cb84b6ba9f Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Thu, 4 Jun 2020 12:46:18 -0700 Subject: [PATCH 14/31] use isprintable I didn't know this existed, since I was just tracing the C code for repr. Also must have missed it on google --- Lib/ast.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 3c2d1a54457d02..3bd8bcfb6b8ccd 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -25,7 +25,6 @@ :license: Python License. """ import sys -import unicodedata from _ast import * from contextlib import contextmanager, nullcontext from enum import IntEnum, auto @@ -1062,14 +1061,12 @@ def _str_literal_helper( Returns (possible quote types, string literal to write). """ # Escape characters we've been told to escape, backslashes, and any - # non-printable characters. The logic for determining non-printable - # characters is based on that in Tools/unicode/makeunicodedata.py - escape = {*escape, '\\'} | { - c for c in value - if c not in ' \n\t' and unicodedata.category(c)[0] in ("C", "Z") - } + # non-printable characters. + escape = {*escape, '\\'} val = "".join( - (c.encode('unicode_escape').decode('ascii') if c in escape else c) for c in value + c.encode('unicode_escape').decode('ascii') + if c in escape or (not c.isprintable() and c not in '\n\t') else c + for c in value ) qts = [quote for quote in quote_types if quote not in val] if "\n" in val: From 228f0b32257a64ac9f1c1b6b5cbeb5bb1402c7c2 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Thu, 4 Jun 2020 12:49:59 -0700 Subject: [PATCH 15/31] fix comment --- Lib/ast.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 3bd8bcfb6b8ccd..5b0e880dc2beaa 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1060,8 +1060,8 @@ def _str_literal_helper( """Helper for writing string literals, minimising escapes. Returns (possible quote types, string literal to write). """ - # Escape characters we've been told to escape, backslashes, and any - # non-printable characters. + # Escape characters we've been told to escape, backslashes, and + # non-printable characters other than \n and \t. escape = {*escape, '\\'} val = "".join( c.encode('unicode_escape').decode('ascii') From debc3dda6fb65c3faba043f2aa00911f7bee7c17 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 7 Jun 2020 12:15:02 -0700 Subject: [PATCH 16/31] Use American English --- Lib/ast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/ast.py b/Lib/ast.py index 5b0e880dc2beaa..b4c416b954b0a9 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1057,7 +1057,7 @@ def visit_AsyncWith(self, node): def _str_literal_helper( self, value, quote_types=("'", '"', '"""', "'''"), escape="" ): - """Helper for writing string literals, minimising escapes. + """Helper for writing string literals, minimizing escapes. Returns (possible quote types, string literal to write). """ # Escape characters we've been told to escape, backslashes, and From 32d34e61bc84b02c9671ceb78811667884fb5377 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 7 Jun 2020 12:17:04 -0700 Subject: [PATCH 17/31] s/qts/possible_quotes/g --- Lib/ast.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index b4c416b954b0a9..aa365aba6daadc 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1068,10 +1068,10 @@ def _str_literal_helper( if c in escape or (not c.isprintable() and c not in '\n\t') else c for c in value ) - qts = [quote for quote in quote_types if quote not in val] + possible_quotes = [quote for quote in quote_types if quote not in val] if "\n" in val: - qts = [quote for quote in qts if quote in ('"""', "'''")] - if not qts: + possible_quotes = [quote for quote in possible_quotes if quote in ('"""', "'''")] + if not possible_quotes: # If there aren't any possible quote_types, fallback to using repr # on the original value. Try to use a quote_type from quote_types. value = repr(value) @@ -1079,12 +1079,12 @@ def _str_literal_helper( return value[1:-1], [quote_type] if val: # Sort so that we prefer '''"''' over """\"""" - qts.sort(key=lambda q: q[0] == val[-1]) + possible_quotes.sort(key=lambda q: q[0] == val[-1]) # If we're using triple quotes and we'd need to escape a final quote, escape - if qts[0][0] == val[-1]: - assert len(qts[0]) == 3 + if possible_quotes[0][0] == val[-1]: + assert len(possible_quotes[0]) == 3 val = val[:-1] + "\\" + val[-1] - return val, qts + return val, possible_quotes def _write_str_avoiding_backslashes(self, value, **kwargs): """Write string literal value with a best effort attempt to avoid backslashes.""" From 1fdb0752d14619ba531a3daaac17f06f810693d4 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 7 Jun 2020 12:18:28 -0700 Subject: [PATCH 18/31] Prefix undesrscore to avoid_backslashes --- Lib/ast.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index aa365aba6daadc..b93a2f1fae7685 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -653,8 +653,8 @@ class _Unparser(NodeVisitor): output source code for the abstract syntax; original formatting is disregarded.""" - def __init__(self, *, avoid_backslashes=False): - self.avoid_backslashes = avoid_backslashes + def __init__(self, *, _avoid_backslashes=False): + self._avoid_backslashes = _avoid_backslashes self._source = [] self._buffer = [] self._precedences = {} @@ -1094,7 +1094,7 @@ def _write_str_avoiding_backslashes(self, value, **kwargs): def visit_JoinedStr(self, node): self.write("f") - if self.avoid_backslashes: + if self._avoid_backslashes: self._fstring_JoinedStr(node, self.buffer_writer) self._write_str_avoiding_backslashes(self.buffer) return @@ -1139,7 +1139,7 @@ def _fstring_Constant(self, node, write): def _fstring_FormattedValue(self, node, write): write("{") - unparser = type(self)(avoid_backslashes=True) + unparser = type(self)(_avoid_backslashes=True) unparser.set_precedence(_Precedence.TEST.next(), node.value) expr = unparser.visit(node.value) if expr.startswith("{"): @@ -1171,7 +1171,7 @@ def _write_constant(self, value): if isinstance(value, (float, complex)): # Substitute overflowing decimal literal for AST infinities. self.write(repr(value).replace("inf", _INFSTR)) - elif self.avoid_backslashes and isinstance(value, str): + elif self._avoid_backslashes and isinstance(value, str): self._write_str_avoiding_backslashes(value) else: self.write(repr(value)) From c3fd073ce21ab5227d2e7a0f8f0da85e5859060b Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 7 Jun 2020 12:34:28 -0700 Subject: [PATCH 19/31] Rename value and val --- Lib/ast.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index b93a2f1fae7685..0b848160d1ca49 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1055,7 +1055,7 @@ def visit_AsyncWith(self, node): self.traverse(node.body) def _str_literal_helper( - self, value, quote_types=("'", '"', '"""', "'''"), escape="" + self, string, quote_types=("'", '"', '"""', "'''"), escape="" ): """Helper for writing string literals, minimizing escapes. Returns (possible quote types, string literal to write). @@ -1063,34 +1063,34 @@ def _str_literal_helper( # Escape characters we've been told to escape, backslashes, and # non-printable characters other than \n and \t. escape = {*escape, '\\'} - val = "".join( + escaped_string = "".join( c.encode('unicode_escape').decode('ascii') if c in escape or (not c.isprintable() and c not in '\n\t') else c - for c in value + for c in string ) - possible_quotes = [quote for quote in quote_types if quote not in val] - if "\n" in val: + possible_quotes = [quote for quote in quote_types if quote not in escaped_string] + if "\n" in escaped_string: possible_quotes = [quote for quote in possible_quotes if quote in ('"""', "'''")] if not possible_quotes: # If there aren't any possible quote_types, fallback to using repr # on the original value. Try to use a quote_type from quote_types. - value = repr(value) - quote_type = next((q for q in quote_types if value[0] in q), value[0]) - return value[1:-1], [quote_type] - if val: + string = repr(string) + quote_type = next((q for q in quote_types if string[0] in q), string[0]) + return string[1:-1], [quote_type] + if escaped_string: # Sort so that we prefer '''"''' over """\"""" - possible_quotes.sort(key=lambda q: q[0] == val[-1]) + possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1]) # If we're using triple quotes and we'd need to escape a final quote, escape - if possible_quotes[0][0] == val[-1]: + if possible_quotes[0][0] == escaped_string[-1]: assert len(possible_quotes[0]) == 3 - val = val[:-1] + "\\" + val[-1] - return val, possible_quotes + escaped_string = escaped_string[:-1] + "\\" + escaped_string[-1] + return escaped_string, possible_quotes - def _write_str_avoiding_backslashes(self, value, **kwargs): + def _write_str_avoiding_backslashes(self, string, **kwargs): """Write string literal value with a best effort attempt to avoid backslashes.""" - value, quote_types = self._str_literal_helper(value, **kwargs) + string, quote_types = self._str_literal_helper(string, **kwargs) quote_type = quote_types[0] - self.write(f"{quote_type}{value}{quote_type}") + self.write(f"{quote_type}{string}{quote_type}") def visit_JoinedStr(self, node): self.write("f") From a3b48a03a2623dd73dd56e420f4ecacbe096ac80 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 7 Jun 2020 12:39:50 -0700 Subject: [PATCH 20/31] Reformat string escaping --- Lib/ast.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 0b848160d1ca49..f6a2105c9a222c 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1064,8 +1064,11 @@ def _str_literal_helper( # non-printable characters other than \n and \t. escape = {*escape, '\\'} escaped_string = "".join( - c.encode('unicode_escape').decode('ascii') - if c in escape or (not c.isprintable() and c not in '\n\t') else c + ( + c.encode('unicode_escape').decode('ascii') + if c in escape or (not c.isprintable() and c not in '\n\t') + else c + ) for c in string ) possible_quotes = [quote for quote in quote_types if quote not in escaped_string] From 84485abd9cd703ab381a45cc10731fe789971710 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 7 Jun 2020 12:46:03 -0700 Subject: [PATCH 21/31] Add a comment to visit_JoinedStr --- Lib/ast.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/ast.py b/Lib/ast.py index f6a2105c9a222c..abf0978932ff2d 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1116,6 +1116,7 @@ def visit_JoinedStr(self, node): new_buffer = [] quote_types = ["'", '"', '"""', "'''"] for value, is_constant in buffer: + # Repeatedly narrow down the list of possible quote_types value, quote_types = self._str_literal_helper( value, quote_types, escape='\n\t' if is_constant else '' ) From 4f32cf0f9eb3aa380654f690d754779f7e8e6932 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 7 Jun 2020 13:00:43 -0700 Subject: [PATCH 22/31] Fix line lengths --- Lib/ast.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index abf0978932ff2d..85a546ce619b55 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1071,9 +1071,9 @@ def _str_literal_helper( ) for c in string ) - possible_quotes = [quote for quote in quote_types if quote not in escaped_string] + possible_quotes = [q for q in quote_types if q not in escaped_string] if "\n" in escaped_string: - possible_quotes = [quote for quote in possible_quotes if quote in ('"""', "'''")] + possible_quotes = [q for q in possible_quotes if q in ('"""', "'''")] if not possible_quotes: # If there aren't any possible quote_types, fallback to using repr # on the original value. Try to use a quote_type from quote_types. @@ -1083,7 +1083,8 @@ def _str_literal_helper( if escaped_string: # Sort so that we prefer '''"''' over """\"""" possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1]) - # If we're using triple quotes and we'd need to escape a final quote, escape + # If we're using triple quotes and we'd need to escape a final + # quote, escape it if possible_quotes[0][0] == escaped_string[-1]: assert len(possible_quotes[0]) == 3 escaped_string = escaped_string[:-1] + "\\" + escaped_string[-1] From d31b376bc85dded9d059259737b204e544707d07 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 7 Jun 2020 15:16:20 -0700 Subject: [PATCH 23/31] Update comment for variable name change --- Lib/ast.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 85a546ce619b55..6a159590e907a2 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1075,11 +1075,11 @@ def _str_literal_helper( if "\n" in escaped_string: possible_quotes = [q for q in possible_quotes if q in ('"""', "'''")] if not possible_quotes: - # If there aren't any possible quote_types, fallback to using repr - # on the original value. Try to use a quote_type from quote_types. + # If there aren't any possible_quotes, fallback to using repr + # on the original string. Try to use a quote from quote_types. string = repr(string) - quote_type = next((q for q in quote_types if string[0] in q), string[0]) - return string[1:-1], [quote_type] + quote = next((q for q in quote_types if string[0] in q), string[0]) + return string[1:-1], [quote] if escaped_string: # Sort so that we prefer '''"''' over """\"""" possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1]) From 428fab735b5c8c0131363fc193fd4235d9d9e4ad Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 18 Oct 2020 12:19:41 -0700 Subject: [PATCH 24/31] move variable from start of __init__ to end --- Lib/ast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/ast.py b/Lib/ast.py index 6a159590e907a2..62072caa53ce70 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -654,12 +654,12 @@ class _Unparser(NodeVisitor): is disregarded.""" def __init__(self, *, _avoid_backslashes=False): - self._avoid_backslashes = _avoid_backslashes self._source = [] self._buffer = [] self._precedences = {} self._type_ignores = {} self._indent = 0 + self._avoid_backslashes = _avoid_backslashes def interleave(self, inter, f, seq): """Call f on each item in seq, calling inter() in between.""" From ec3895270e76b02af833460218bac17cf774eeea Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 18 Oct 2020 12:23:52 -0700 Subject: [PATCH 25/31] use global constants for quote types --- Lib/ast.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 62072caa53ce70..aa9d38a33f6800 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -648,6 +648,11 @@ def next(self): except ValueError: return self + +_SINGLE_QUOTES = ("'", '"') +_MULTI_QUOTES = ('"""', "'''") +_ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES) + class _Unparser(NodeVisitor): """Methods in this class recursively traverse an AST and output source code for the abstract syntax; original formatting @@ -1055,7 +1060,7 @@ def visit_AsyncWith(self, node): self.traverse(node.body) def _str_literal_helper( - self, string, quote_types=("'", '"', '"""', "'''"), escape="" + self, string, quote_types=_ALL_QUOTES, escape="" ): """Helper for writing string literals, minimizing escapes. Returns (possible quote types, string literal to write). @@ -1073,7 +1078,7 @@ def _str_literal_helper( ) possible_quotes = [q for q in quote_types if q not in escaped_string] if "\n" in escaped_string: - possible_quotes = [q for q in possible_quotes if q in ('"""', "'''")] + possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES] if not possible_quotes: # If there aren't any possible_quotes, fallback to using repr # on the original string. Try to use a quote from quote_types. @@ -1115,7 +1120,7 @@ def visit_JoinedStr(self, node): meth(value, self.buffer_writer) buffer.append((self.buffer, isinstance(value, Constant))) new_buffer = [] - quote_types = ["'", '"', '"""', "'''"] + quote_types = _ALL_QUOTES for value, is_constant in buffer: # Repeatedly narrow down the list of possible quote_types value, quote_types = self._str_literal_helper( @@ -1170,7 +1175,7 @@ def _write_docstring(self, node): self.fill() if node.kind == "u": self.write("u") - self._write_str_avoiding_backslashes(node.value, quote_types=('"""', "'''")) + self._write_str_avoiding_backslashes(node.value, quote_types=_MULTI_QUOTES) def _write_constant(self, value): if isinstance(value, (float, complex)): From b903dcf4bcb9661ddae92ac4e3e5ad6a49f3756d Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 18 Oct 2020 12:27:47 -0700 Subject: [PATCH 26/31] make docstring explicit about returning a tuple --- Lib/ast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/ast.py b/Lib/ast.py index aa9d38a33f6800..9cc817dd605b0e 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1063,7 +1063,7 @@ def _str_literal_helper( self, string, quote_types=_ALL_QUOTES, escape="" ): """Helper for writing string literals, minimizing escapes. - Returns (possible quote types, string literal to write). + Returns the tuple (string literal to write, possible quote types). """ # Escape characters we've been told to escape, backslashes, and # non-printable characters other than \n and \t. From 007bb4aabf002e71d6024ae7480aae7e2b6b603c Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 18 Oct 2020 12:33:00 -0700 Subject: [PATCH 27/31] use kw only args --- Lib/ast.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 9cc817dd605b0e..65cdb477db65b5 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1060,7 +1060,7 @@ def visit_AsyncWith(self, node): self.traverse(node.body) def _str_literal_helper( - self, string, quote_types=_ALL_QUOTES, escape="" + self, string, *, quote_types=_ALL_QUOTES, escape="" ): """Helper for writing string literals, minimizing escapes. Returns the tuple (string literal to write, possible quote types). @@ -1095,9 +1095,9 @@ def _str_literal_helper( escaped_string = escaped_string[:-1] + "\\" + escaped_string[-1] return escaped_string, possible_quotes - def _write_str_avoiding_backslashes(self, string, **kwargs): + def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES): """Write string literal value with a best effort attempt to avoid backslashes.""" - string, quote_types = self._str_literal_helper(string, **kwargs) + string, quote_types = self._str_literal_helper(string, quote_types=quote_types) quote_type = quote_types[0] self.write(f"{quote_type}{string}{quote_type}") @@ -1124,7 +1124,7 @@ def visit_JoinedStr(self, node): for value, is_constant in buffer: # Repeatedly narrow down the list of possible quote_types value, quote_types = self._str_literal_helper( - value, quote_types, escape='\n\t' if is_constant else '' + value, quote_types=quote_types, escape='\n\t' if is_constant else '' ) new_buffer.append(value) value = "".join(new_buffer) From 4c09b961d9b937029a84f4b846958b9c1e7e0a9a Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 18 Oct 2020 12:39:23 -0700 Subject: [PATCH 28/31] add escape_special_whitespace boolean argument --- Lib/ast.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 65cdb477db65b5..c38935a3864cb9 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1060,14 +1060,14 @@ def visit_AsyncWith(self, node): self.traverse(node.body) def _str_literal_helper( - self, string, *, quote_types=_ALL_QUOTES, escape="" + self, string, *, quote_types=_ALL_QUOTES, escape_special_whitespace=False ): """Helper for writing string literals, minimizing escapes. Returns the tuple (string literal to write, possible quote types). """ # Escape characters we've been told to escape, backslashes, and # non-printable characters other than \n and \t. - escape = {*escape, '\\'} + escape = ('\n', '\t', '\\') if escape_special_whitespace else ('\\',) escaped_string = "".join( ( c.encode('unicode_escape').decode('ascii') @@ -1124,7 +1124,8 @@ def visit_JoinedStr(self, node): for value, is_constant in buffer: # Repeatedly narrow down the list of possible quote_types value, quote_types = self._str_literal_helper( - value, quote_types=quote_types, escape='\n\t' if is_constant else '' + value, quote_types=quote_types, + escape_special_whitespace=is_constant ) new_buffer.append(value) value = "".join(new_buffer) From 7735da54bcd80a8e6b06c4a6c57306b47c986382 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 18 Oct 2020 12:57:14 -0700 Subject: [PATCH 29/31] use an inner function instead of a comprehension --- Lib/ast.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index c38935a3864cb9..43110d77f23282 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1065,17 +1065,17 @@ def _str_literal_helper( """Helper for writing string literals, minimizing escapes. Returns the tuple (string literal to write, possible quote types). """ - # Escape characters we've been told to escape, backslashes, and - # non-printable characters other than \n and \t. - escape = ('\n', '\t', '\\') if escape_special_whitespace else ('\\',) - escaped_string = "".join( - ( - c.encode('unicode_escape').decode('ascii') - if c in escape or (not c.isprintable() and c not in '\n\t') - else c - ) - for c in string - ) + def escape_char(c): + # \n and \t are non-printable, but we only escape them if + # escape_special_whitespace is True + if not escape_special_whitespace and c in "\n\t": + return c + # Always escape backslashes and other non-printable characters + if c == "\\" or not c.isprintable(): + return c.encode("unicode_escape").decode("ascii") + return c + + escaped_string = "".join(map(escape_char, string)) possible_quotes = [q for q in quote_types if q not in escaped_string] if "\n" in escaped_string: possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES] From 593b0d2af6f435e613909bcbd2bd2a3ffb26cd34 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 18 Oct 2020 13:08:55 -0700 Subject: [PATCH 30/31] add to comment when falling back to repr --- Lib/ast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/ast.py b/Lib/ast.py index 43110d77f23282..10bec27aca425d 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1081,7 +1081,8 @@ def escape_char(c): possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES] if not possible_quotes: # If there aren't any possible_quotes, fallback to using repr - # on the original string. Try to use a quote from quote_types. + # on the original string. Try to use a quote from quote_types, + # e.g., so that we use triple quotes for docstrings. string = repr(string) quote = next((q for q in quote_types if string[0] in q), string[0]) return string[1:-1], [quote] From fc6166f79be4029dbe66f81c6fbfd11026027830 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 18 Oct 2020 13:27:42 -0700 Subject: [PATCH 31/31] swap the order of the iterations to save a little effort --- Lib/ast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/ast.py b/Lib/ast.py index 10bec27aca425d..0de28e4ed49f0b 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1076,9 +1076,10 @@ def escape_char(c): return c escaped_string = "".join(map(escape_char, string)) - possible_quotes = [q for q in quote_types if q not in escaped_string] + possible_quotes = quote_types if "\n" in escaped_string: possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES] + possible_quotes = [q for q in possible_quotes if q not in escaped_string] if not possible_quotes: # If there aren't any possible_quotes, fallback to using repr # on the original string. Try to use a quote from quote_types,