Skip to content

bpo-28002: Roundtrip f-strings with ast.unparse better #19612

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 32 commits into from
Nov 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
cc318ee
bpo-28002: Roundtrip f-strings with ast.unparse better
Apr 19, 2020
4743d8d
Update Lib/ast.py
hauntsaninja May 21, 2020
98878ba
_Unparser: make avoid_backslashes keyword only
May 22, 2020
a0a7b26
test_unparse: add test_fstring to run_always_files
May 22, 2020
0c95e56
unparse fstrings: improve cosmetics by escaping whitespace
Jun 3, 2020
ac49be1
unparse: add another test case
Jun 3, 2020
714a269
unparse: fix final quote escaping issues
Jun 3, 2020
85521c0
unparse: simplify, share code with docstring writing
Jun 3, 2020
106cdd2
Merge remote-tracking branch 'upstream/master' into astunparse
Jun 3, 2020
4e4a4b4
remove comment
Jun 3, 2020
91430f5
reduce assignments to qts
Jun 4, 2020
81ff3cb
use join instead of multiple replace
Jun 4, 2020
bd05428
use write_str_avoiding_backslashes for docstrings
Jun 4, 2020
cd2c107
don't use replace for backslashes
Jun 4, 2020
4a7037a
use isprintable
Jun 4, 2020
228f0b3
fix comment
Jun 4, 2020
debc3dd
Use American English
Jun 7, 2020
32d34e6
s/qts/possible_quotes/g
Jun 7, 2020
1fdb075
Prefix undesrscore to avoid_backslashes
Jun 7, 2020
c3fd073
Rename value and val
Jun 7, 2020
a3b48a0
Reformat string escaping
Jun 7, 2020
84485ab
Add a comment to visit_JoinedStr
Jun 7, 2020
4f32cf0
Fix line lengths
Jun 7, 2020
d31b376
Update comment for variable name change
Jun 7, 2020
428fab7
move variable from start of __init__ to end
Oct 18, 2020
ec38952
use global constants for quote types
Oct 18, 2020
b903dcf
make docstring explicit about returning a tuple
Oct 18, 2020
007bb4a
use kw only args
Oct 18, 2020
4c09b96
add escape_special_whitespace boolean argument
Oct 18, 2020
7735da5
use an inner function instead of a comprehension
Oct 18, 2020
593b0d2
add to comment when falling back to repr
Oct 18, 2020
fc6166f
swap the order of the iterations to save a little effort
Oct 18, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 86 additions & 24 deletions Lib/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,17 +648,23 @@ def next(self):
except ValueError:
return self


_SINGLE_QUOTES = ("'", '"')
_MULTI_QUOTES = ('"""', "'''")
_ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES)

class _Unparser(NodeVisitor):
"""Methods in this class recursively traverse an AST and
output source code for the abstract syntax; original formatting
is disregarded."""

def __init__(self):
def __init__(self, *, _avoid_backslashes=False):
self._source = []
self._buffer = []
self._precedences = {}
self._type_ignores = {}
self._indent = 0
self._avoid_backslashes = _avoid_backslashes

def interleave(self, inter, f, seq):
"""Call f on each item in seq, calling inter() in between."""
Expand Down Expand Up @@ -1053,15 +1059,85 @@ def visit_AsyncWith(self, node):
with self.block(extra=self.get_type_comment(node)):
self.traverse(node.body)

def _str_literal_helper(
self, string, *, quote_types=_ALL_QUOTES, escape_special_whitespace=False
):
"""Helper for writing string literals, minimizing escapes.
Returns the tuple (string literal to write, possible quote types).
"""
def escape_char(c):
# \n and \t are non-printable, but we only escape them if
# escape_special_whitespace is True
if not escape_special_whitespace and c in "\n\t":
return c
# Always escape backslashes and other non-printable characters
if c == "\\" or not c.isprintable():
return c.encode("unicode_escape").decode("ascii")
return c

escaped_string = "".join(map(escape_char, string))
possible_quotes = quote_types
if "\n" in escaped_string:
possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES]
possible_quotes = [q for q in possible_quotes if q not in escaped_string]
if not possible_quotes:
# If there aren't any possible_quotes, fallback to using repr
# on the original string. Try to use a quote from quote_types,
# e.g., so that we use triple quotes for docstrings.
string = repr(string)
quote = next((q for q in quote_types if string[0] in q), string[0])
return string[1:-1], [quote]
if escaped_string:
# Sort so that we prefer '''"''' over """\""""
possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1])
# If we're using triple quotes and we'd need to escape a final
# quote, escape it
if possible_quotes[0][0] == escaped_string[-1]:
assert len(possible_quotes[0]) == 3
escaped_string = escaped_string[:-1] + "\\" + escaped_string[-1]
return escaped_string, possible_quotes

def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES):
"""Write string literal value with a best effort attempt to avoid backslashes."""
string, quote_types = self._str_literal_helper(string, quote_types=quote_types)
quote_type = quote_types[0]
self.write(f"{quote_type}{string}{quote_type}")

def visit_JoinedStr(self, node):
self.write("f")
self._fstring_JoinedStr(node, self.buffer_writer)
self.write(repr(self.buffer))
if self._avoid_backslashes:
self._fstring_JoinedStr(node, self.buffer_writer)
self._write_str_avoiding_backslashes(self.buffer)
return

# If we don't need to avoid backslashes globally (i.e., we only need
# to avoid them inside FormattedValues), it's cosmetically preferred
# to use escaped whitespace. That is, it's preferred to use backslashes
# for cases like: f"{x}\n". To accomplish this, we keep track of what
# in our buffer corresponds to FormattedValues and what corresponds to
# Constant parts of the f-string, and allow escapes accordingly.
buffer = []
for value in node.values:
meth = getattr(self, "_fstring_" + type(value).__name__)
meth(value, self.buffer_writer)
buffer.append((self.buffer, isinstance(value, Constant)))
new_buffer = []
quote_types = _ALL_QUOTES
for value, is_constant in buffer:
# Repeatedly narrow down the list of possible quote_types
value, quote_types = self._str_literal_helper(
value, quote_types=quote_types,
escape_special_whitespace=is_constant
)
new_buffer.append(value)
value = "".join(new_buffer)
quote_type = quote_types[0]
self.write(f"{quote_type}{value}{quote_type}")

def visit_FormattedValue(self, node):
self.write("f")
self._fstring_FormattedValue(node, self.buffer_writer)
self.write(repr(self.buffer))
self._write_str_avoiding_backslashes(self.buffer)

def _fstring_JoinedStr(self, node, write):
for value in node.values:
Expand All @@ -1076,11 +1152,13 @@ def _fstring_Constant(self, node, write):

def _fstring_FormattedValue(self, node, write):
write("{")
unparser = type(self)()
unparser = type(self)(_avoid_backslashes=True)
unparser.set_precedence(_Precedence.TEST.next(), node.value)
expr = unparser.visit(node.value)
if expr.startswith("{"):
write(" ") # Separate pair of opening brackets as "{ {"
if "\\" in expr:
raise ValueError("Unable to avoid backslash in f-string expression part")
write(expr)
if node.conversion != -1:
conversion = chr(node.conversion)
Expand All @@ -1097,33 +1175,17 @@ def visit_Name(self, node):
self.write(node.id)

def _write_docstring(self, node):
def esc_char(c):
if c in ("\n", "\t"):
# In the AST form, we don't know the author's intentation
# about how this should be displayed. We'll only escape
# \n and \t, because they are more likely to be unescaped
# in the source
return c
return c.encode('unicode_escape').decode('ascii')

self.fill()
if node.kind == "u":
self.write("u")

value = node.value
if value:
# Preserve quotes in the docstring by escaping them
value = "".join(map(esc_char, value))
if value[-1] == '"':
value = value.replace('"', '\\"', -1)
value = value.replace('"""', '""\\"')

self.write(f'"""{value}"""')
self._write_str_avoiding_backslashes(node.value, quote_types=_MULTI_QUOTES)

def _write_constant(self, value):
if isinstance(value, (float, complex)):
# Substitute overflowing decimal literal for AST infinities.
self.write(repr(value).replace("inf", _INFSTR))
elif self._avoid_backslashes and isinstance(value, str):
self._write_str_avoiding_backslashes(value)
else:
self.write(repr(value))

Expand Down
42 changes: 29 additions & 13 deletions Lib/test/test_unparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,18 @@ def test_fstrings(self):
# See issue 25180
self.check_ast_roundtrip(r"""f'{f"{0}"*3}'""")
self.check_ast_roundtrip(r"""f'{f"{y}"*3}'""")
self.check_ast_roundtrip("""f''""")
self.check_ast_roundtrip('''f"""'end' "quote\\""""''')

def test_fstrings_complicated(self):
# See issue 28002
self.check_ast_roundtrip("""f'''{"'"}'''""")
self.check_ast_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''')
self.check_ast_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-'single quote\\'\'\'\'''')
self.check_ast_roundtrip('f"""{\'\'\'\n\'\'\'}"""')
self.check_ast_roundtrip('f"""{g(\'\'\'\n\'\'\')}"""')
self.check_ast_roundtrip('''f"a\\r\\nb"''')
self.check_ast_roundtrip('''f"\\u2028{'x'}"''')

def test_strings(self):
self.check_ast_roundtrip("u'foo'")
Expand Down Expand Up @@ -311,6 +323,9 @@ def test_invalid_fstring_conversion(self):
)
)

def test_invalid_fstring_backslash(self):
self.check_invalid(ast.FormattedValue(value=ast.Constant(value="\\\\")))

def test_invalid_set(self):
self.check_invalid(ast.Set(elts=[]))

Expand All @@ -330,8 +345,8 @@ def test_docstrings(self):
'\r\\r\t\\t\n\\n',
'""">>> content = \"\"\"blabla\"\"\" <<<"""',
r'foo\n\x00',
'🐍⛎𩸽üéş^\N{LONG RIGHTWARDS SQUIGGLE ARROW}'

"' \\'\\'\\'\"\"\" \"\"\\'\\' \\'",
'🐍⛎𩸽üéş^\\\\X\\\\BB\N{LONG RIGHTWARDS SQUIGGLE ARROW}'
)
for docstring in docstrings:
# check as Module docstrings for easy testing
Expand Down Expand Up @@ -416,7 +431,6 @@ def test_simple_expressions_parens(self):
self.check_src_roundtrip("call((yield x))")
self.check_src_roundtrip("return x + (yield x)")


def test_class_bases_and_keywords(self):
self.check_src_roundtrip("class X:\n pass")
self.check_src_roundtrip("class X(A):\n pass")
Expand All @@ -429,6 +443,13 @@ def test_class_bases_and_keywords(self):
self.check_src_roundtrip("class X(*args):\n pass")
self.check_src_roundtrip("class X(*args, **kwargs):\n pass")

def test_fstrings(self):
self.check_src_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''')
self.check_src_roundtrip('''f"\\u2028{'x'}"''')
self.check_src_roundtrip(r"f'{x}\n'")
self.check_src_roundtrip('''f''\'{"""\n"""}\\n''\'''')
self.check_src_roundtrip('''f''\'{f"""{x}\n"""}\\n''\'''')

def test_docstrings(self):
docstrings = (
'"""simple doc string"""',
Expand All @@ -443,6 +464,10 @@ def test_docstrings(self):
'""""""',
'"""\'\'\'"""',
'"""\'\'\'\'\'\'"""',
'"""🐍⛎𩸽üéş^\\\\X\\\\BB⟿"""',
'"""end in single \'quote\'"""',
"'''end in double \"quote\"'''",
'"""almost end in double "quote"."""',
)

for prefix in docstring_prefixes:
Expand Down Expand Up @@ -483,9 +508,8 @@ class DirectoryTestCase(ASTTestCase):

lib_dir = pathlib.Path(__file__).parent / ".."
test_directories = (lib_dir, lib_dir / "test")
skip_files = {"test_fstring.py"}
run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py",
"test_ast.py", "test_asdl_parser.py"}
"test_ast.py", "test_asdl_parser.py", "test_fstring.py"}

_files_to_test = None

Expand Down Expand Up @@ -525,14 +549,6 @@ def test_files(self):
if test.support.verbose:
print(f"Testing {item.absolute()}")

# Some f-strings are not correctly round-tripped by
# Tools/parser/unparse.py. See issue 28002 for details.
# We need to skip files that contain such f-strings.
if item.name in self.skip_files:
if test.support.verbose:
print(f"Skipping {item.absolute()}: see issue 28002")
continue

with self.subTest(filename=item):
source = read_pyfile(item)
self.check_ast_roundtrip(source)
Expand Down