Skip to content

Commit

Permalink
Add PEP 701 support (#3822)
Browse files Browse the repository at this point in the history
Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com>
Co-authored-by: hauntsaninja <hauntsaninja@gmail.com>
Co-authored-by: Jelle Zijlstra <jelle.zijlstra@gmail.com>
  • Loading branch information
4 people authored Apr 22, 2024
1 parent 944b99a commit 551ede2
Show file tree
Hide file tree
Showing 16 changed files with 941 additions and 102 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

<!-- Include any especially major or disruptive changes here -->

- Add support for the new Python 3.12 f-string syntax introduced by PEP 701 (#3822)

### Stable style

<!-- Changes that affect Black's stable style -->
Expand Down
31 changes: 13 additions & 18 deletions src/black/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,7 @@
from black.mode import FUTURE_FLAG_TO_FEATURE, VERSION_TO_FEATURES, Feature
from black.mode import Mode as Mode # re-exported
from black.mode import Preview, TargetVersion, supports_feature
from black.nodes import (
STARS,
is_number_token,
is_simple_decorator_expression,
is_string_token,
syms,
)
from black.nodes import STARS, is_number_token, is_simple_decorator_expression, syms
from black.output import color_diff, diff, dump_to_file, err, ipynb_diff, out
from black.parsing import ( # noqa F401
ASTSafetyError,
Expand All @@ -91,7 +85,6 @@
sanitized_lines,
)
from black.report import Changed, NothingChanged, Report
from black.trans import iter_fexpr_spans
from blib2to3.pgen2 import token
from blib2to3.pytree import Leaf, Node

Expand Down Expand Up @@ -1265,7 +1258,10 @@ def _format_str_once(
elt = EmptyLineTracker(mode=mode)
split_line_features = {
feature
for feature in {Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF}
for feature in {
Feature.TRAILING_COMMA_IN_CALL,
Feature.TRAILING_COMMA_IN_DEF,
}
if supports_feature(versions, feature)
}
block: Optional[LinesBlock] = None
Expand Down Expand Up @@ -1337,15 +1333,14 @@ def get_features_used( # noqa: C901
}

for n in node.pre_order():
if is_string_token(n):
value_head = n.value[:2]
if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
features.add(Feature.F_STRINGS)
if Feature.DEBUG_F_STRINGS not in features:
for span_beg, span_end in iter_fexpr_spans(n.value):
if n.value[span_beg : span_end - 1].rstrip().endswith("="):
features.add(Feature.DEBUG_F_STRINGS)
break
if n.type == token.FSTRING_START:
features.add(Feature.F_STRINGS)
elif (
n.type == token.RBRACE
and n.parent is not None
and any(child.type == token.EQUAL for child in n.parent.children)
):
features.add(Feature.DEBUG_F_STRINGS)

elif is_number_token(n):
if "_" in n.value:
Expand Down
45 changes: 45 additions & 0 deletions src/black/linegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,45 @@ def visit_NUMBER(self, leaf: Leaf) -> Iterator[Line]:
normalize_numeric_literal(leaf)
yield from self.visit_default(leaf)

def visit_fstring(self, node: Node) -> Iterator[Line]:
# currently we don't want to format and split f-strings at all.
string_leaf = _fstring_to_string(node)
node.replace(string_leaf)
yield from self.visit_STRING(string_leaf)

# TODO: Uncomment Implementation to format f-string children
# fstring_start = node.children[0]
# fstring_end = node.children[-1]
# assert isinstance(fstring_start, Leaf)
# assert isinstance(fstring_end, Leaf)

# quote_char = fstring_end.value[0]
# quote_idx = fstring_start.value.index(quote_char)
# prefix, quote = (
# fstring_start.value[:quote_idx],
# fstring_start.value[quote_idx:]
# )

# if not is_docstring(node, self.mode):
# prefix = normalize_string_prefix(prefix)

# assert quote == fstring_end.value

# is_raw_fstring = "r" in prefix or "R" in prefix
# middles = [
# leaf
# for leaf in node.leaves()
# if leaf.type == token.FSTRING_MIDDLE
# ]

# if self.mode.string_normalization:
# middles, quote = normalize_fstring_quotes(quote, middles, is_raw_fstring)

# fstring_start.value = prefix + quote
# fstring_end.value = quote

# yield from self.visit_default(node)

def __post_init__(self) -> None:
"""You are in a twisty little maze of passages."""
self.current_line = Line(mode=self.mode)
Expand Down Expand Up @@ -535,6 +574,12 @@ def __post_init__(self) -> None:
self.visit_guard = partial(v, keywords=Ø, parens={"if"})


def _fstring_to_string(node: Node) -> Leaf:
"""Converts an fstring node back to a string node."""
string_without_prefix = str(node)[len(node.prefix) :]
return Leaf(token.STRING, string_without_prefix, prefix=node.prefix)


def _hugging_power_ops_line_to_string(
line: Line,
features: Collection[Feature],
Expand Down
7 changes: 6 additions & 1 deletion src/black/lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,12 @@ def append(
Inline comments are put aside.
"""
has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
has_value = (
leaf.type in BRACKETS
# empty fstring-middles must not be truncated
or leaf.type == token.FSTRING_MIDDLE
or bool(leaf.value.strip())
)
if not has_value:
return

Expand Down
2 changes: 2 additions & 0 deletions src/black/mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class Feature(Enum):
DEBUG_F_STRINGS = 16
PARENTHESIZED_CONTEXT_MANAGERS = 17
TYPE_PARAMS = 18
FSTRING_PARSING = 19
FORCE_OPTIONAL_PARENTHESES = 50

# __future__ flags
Expand Down Expand Up @@ -156,6 +157,7 @@ class Feature(Enum):
Feature.EXCEPT_STAR,
Feature.VARIADIC_GENERICS,
Feature.TYPE_PARAMS,
Feature.FSTRING_PARSING,
},
}

Expand Down
46 changes: 28 additions & 18 deletions src/black/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,13 @@
OPENING_BRACKETS: Final = set(BRACKET.keys())
CLOSING_BRACKETS: Final = set(BRACKET.values())
BRACKETS: Final = OPENING_BRACKETS | CLOSING_BRACKETS
ALWAYS_NO_SPACE: Final = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
ALWAYS_NO_SPACE: Final = CLOSING_BRACKETS | {
token.COMMA,
STANDALONE_COMMENT,
token.FSTRING_MIDDLE,
token.FSTRING_END,
token.BANG,
}

RARROW = 55

Expand Down Expand Up @@ -211,6 +217,9 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool, mode: Mode) -> str: # no
}:
return NO

if t == token.LBRACE and p.type == syms.fstring_replacement_field:
return NO

prev = leaf.prev_sibling
if not prev:
prevp = preceding_leaf(p)
Expand Down Expand Up @@ -272,6 +281,9 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool, mode: Mode) -> str: # no
elif prev.type in OPENING_BRACKETS:
return NO

elif prev.type == token.BANG:
return NO

if p.type in {syms.parameters, syms.arglist}:
# untyped function signatures or calls
if not prev or prev.type != token.COMMA:
Expand Down Expand Up @@ -393,6 +405,7 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool, mode: Mode) -> str: # no
elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
return NO

# TODO: add fstring here?
elif t in {token.NAME, token.NUMBER, token.STRING}:
return NO

Expand Down Expand Up @@ -542,31 +555,32 @@ def is_arith_like(node: LN) -> bool:
}


def is_docstring(leaf: Leaf, mode: Mode) -> bool:
if leaf.type != token.STRING:
return False
def is_docstring(node: NL, mode: Mode) -> bool:
if isinstance(node, Leaf):
if node.type != token.STRING:
return False

prefix = get_string_prefix(leaf.value)
if set(prefix).intersection("bBfF"):
return False
prefix = get_string_prefix(node.value)
if set(prefix).intersection("bBfF"):
return False

if (
Preview.unify_docstring_detection in mode
and leaf.parent
and leaf.parent.type == syms.simple_stmt
and not leaf.parent.prev_sibling
and leaf.parent.parent
and leaf.parent.parent.type == syms.file_input
and node.parent
and node.parent.type == syms.simple_stmt
and not node.parent.prev_sibling
and node.parent.parent
and node.parent.parent.type == syms.file_input
):
return True

if prev_siblings_are(
leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
node.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
):
return True

# Multiline docstring on the same line as the `def`.
if prev_siblings_are(leaf.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
if prev_siblings_are(node.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
# `syms.parameters` is only used in funcdefs and async_funcdefs in the Python
# grammar. We're safe to return True without further checks.
return True
Expand Down Expand Up @@ -954,10 +968,6 @@ def is_rpar_token(nl: NL) -> TypeGuard[Leaf]:
return nl.type == token.RPAR


def is_string_token(nl: NL) -> TypeGuard[Leaf]:
return nl.type == token.STRING


def is_number_token(nl: NL) -> TypeGuard[Leaf]:
return nl.type == token.NUMBER

Expand Down
71 changes: 68 additions & 3 deletions src/black/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import re
import sys
from functools import lru_cache
from typing import Final, List, Match, Pattern
from typing import Final, List, Match, Pattern, Tuple

from black._width_table import WIDTH_TABLE
from blib2to3.pytree import Leaf
Expand Down Expand Up @@ -169,8 +169,7 @@ def _cached_compile(pattern: str) -> Pattern[str]:
def normalize_string_quotes(s: str) -> str:
"""Prefer double quotes but only if it doesn't cause more escaping.
Adds or removes backslashes as appropriate. Doesn't parse and fix
strings nested in f-strings.
Adds or removes backslashes as appropriate.
"""
value = s.lstrip(STRING_PREFIX_CHARS)
if value[:3] == '"""':
Expand Down Expand Up @@ -211,6 +210,7 @@ def normalize_string_quotes(s: str) -> str:
s = f"{prefix}{orig_quote}{body}{orig_quote}"
new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)

if "f" in prefix.casefold():
matches = re.findall(
r"""
Expand Down Expand Up @@ -240,6 +240,71 @@ def normalize_string_quotes(s: str) -> str:
return f"{prefix}{new_quote}{new_body}{new_quote}"


def normalize_fstring_quotes(
quote: str,
middles: List[Leaf],
is_raw_fstring: bool,
) -> Tuple[List[Leaf], str]:
"""Prefer double quotes but only if it doesn't cause more escaping.
Adds or removes backslashes as appropriate.
"""
if quote == '"""':
return middles, quote

elif quote == "'''":
new_quote = '"""'
elif quote == '"':
new_quote = "'"
else:
new_quote = '"'

unescaped_new_quote = _cached_compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
escaped_new_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
escaped_orig_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){quote}")
if is_raw_fstring:
for middle in middles:
if unescaped_new_quote.search(middle.value):
# There's at least one unescaped new_quote in this raw string
# so converting is impossible
return middles, quote

# Do not introduce or remove backslashes in raw strings, just use double quote
return middles, '"'

new_segments = []
for middle in middles:
segment = middle.value
# remove unnecessary escapes
new_segment = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", segment)
if segment != new_segment:
# Consider the string without unnecessary escapes as the original
middle.value = new_segment

new_segment = sub_twice(escaped_orig_quote, rf"\1\2{quote}", new_segment)
new_segment = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_segment)
new_segments.append(new_segment)

if new_quote == '"""' and new_segments[-1].endswith('"'):
# edge case:
new_segments[-1] = new_segments[-1][:-1] + '\\"'

for middle, new_segment in zip(middles, new_segments):
orig_escape_count = middle.value.count("\\")
new_escape_count = new_segment.count("\\")

if new_escape_count > orig_escape_count:
return middles, quote # Do not introduce more escaping

if new_escape_count == orig_escape_count and quote == '"':
return middles, quote # Prefer double quotes

for middle, new_segment in zip(middles, new_segments):
middle.value = new_segment

return middles, new_quote


def normalize_unicode_escape_sequences(leaf: Leaf) -> None:
"""Replace hex codes in Unicode escape sequences with lowercase representation."""
text = leaf.value
Expand Down
7 changes: 6 additions & 1 deletion src/blib2to3/Grammar.txt
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ atom: ('(' [yield_expr|testlist_gexp] ')' |
'[' [listmaker] ']' |
'{' [dictsetmaker] '}' |
'`' testlist1 '`' |
NAME | NUMBER | STRING+ | '.' '.' '.')
NAME | NUMBER | (STRING | fstring)+ | '.' '.' '.')
listmaker: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] )
testlist_gexp: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] )
lambdef: 'lambda' [varargslist] ':' test
Expand Down Expand Up @@ -254,3 +254,8 @@ case_block: "case" patterns [guard] ':' suite
guard: 'if' namedexpr_test
patterns: pattern (',' pattern)* [',']
pattern: (expr|star_expr) ['as' expr]

fstring: FSTRING_START fstring_middle* FSTRING_END
fstring_middle: fstring_replacement_field | FSTRING_MIDDLE
fstring_replacement_field: '{' (yield_expr | testlist_star_expr) ['='] [ "!" NAME ] [ ':' fstring_format_spec* ] '}'
fstring_format_spec: FSTRING_MIDDLE | fstring_replacement_field
4 changes: 3 additions & 1 deletion src/blib2to3/pgen2/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,9 @@ def parse_tokens(self, tokens: Iterable[GoodTokenInfo], debug: bool = False) ->
if type in {token.INDENT, token.DEDENT}:
prefix = _prefix
lineno, column = end
if value.endswith("\n"):
# FSTRING_MIDDLE is the only token that can end with a newline, and
# `end` will point to the next line. For that case, don't increment lineno.
if value.endswith("\n") and type != token.FSTRING_MIDDLE:
lineno += 1
column = 0
else:
Expand Down
1 change: 1 addition & 0 deletions src/blib2to3/pgen2/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ def report(self) -> None:
//= DOUBLESLASHEQUAL
-> RARROW
:= COLONEQUAL
! BANG
"""

opmap = {}
Expand Down
Loading

0 comments on commit 551ede2

Please sign in to comment.