Skip to content

Commit

Permalink
Simplify string tokenization regexes (#4331)
Browse files Browse the repository at this point in the history
  • Loading branch information
JelleZijlstra authored Apr 25, 2024
1 parent 5683242 commit ba88fc3
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 12 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@

<!-- Changes that improve Black's performance. -->

- Fix bad performance on certain complex string literals (#4331)

### Output

<!-- Changes to Black's terminal output and error messages -->
Expand Down
24 changes: 12 additions & 12 deletions src/blib2to3/pgen2/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,13 @@ def _combinations(*l: str) -> Set[str]:
Number = group(Imagnumber, Floatnumber, Intnumber)

# Tail end of ' string.
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
Single = r"(?:\\.|[^'\\])*'"
# Tail end of " string.
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
Double = r'(?:\\.|[^"\\])*"'
# Tail end of ''' string.
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''"
# Tail end of """ string.
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""'
_litprefix = r"(?:[uUrRbB]|[rR][bB]|[bBuU][rR])?"
_fstringlitprefix = r"(?:rF|FR|Fr|fr|RF|F|rf|f|Rf|fR)"
Triple = group(
Expand All @@ -136,12 +136,12 @@ def _combinations(*l: str) -> Set[str]:
)

# beginning of a single quoted f-string. must not end with `{{` or `\N{`
SingleLbrace = r"[^'\\{]*(?:(?:\\N{|\\.|{{)[^'\\{]*)*(?<!\\N){(?!{)"
DoubleLbrace = r'[^"\\{]*(?:(?:\\N{|\\.|{{)[^"\\{]*)*(?<!\\N){(?!{)'
SingleLbrace = r"(?:\\N{|\\.|{{|[^'\\{])*(?<!\\N){(?!{)"
DoubleLbrace = r'(?:\\N{|\\.|{{|[^"\\{])*(?<!\\N){(?!{)'

# beginning of a triple quoted f-string. must not end with `{{` or `\N{`
Single3Lbrace = r"[^'{]*(?:(?:\\N{|\\[^{]|{{|'(?!''))[^'{]*)*(?<!\\N){(?!{)"
Double3Lbrace = r'[^"{]*(?:(?:\\N{|\\[^{]|{{|"(?!""))[^"{]*)*(?<!\\N){(?!{)'
Single3Lbrace = r"(?:\\N{|\\[^{]|{{|'(?!'')|[^'{\\])*(?<!\\N){(?!{)"
Double3Lbrace = r'(?:\\N{|\\[^{]|{{|"(?!"")|[^"{\\])*(?<!\\N){(?!{)'

# ! format specifier inside an fstring brace, ensure it's not a `!=` token
Bang = Whitespace + group("!") + r"(?!=)"
Expand Down Expand Up @@ -171,12 +171,12 @@ def _combinations(*l: str) -> Set[str]:
Special = group(r"\r?\n", r"[:;.,`@]")
Funny = group(Operator, Bracket, Special)

_string_middle_single = r"[^\n'\\]*(?:\\.[^\n'\\]*)*"
_string_middle_double = r'[^\n"\\]*(?:\\.[^\n"\\]*)*'
_string_middle_single = r"(?:[^\n'\\]|\\.)*"
_string_middle_double = r'(?:[^\n"\\]|\\.)*'

# FSTRING_MIDDLE and LBRACE, must not end with a `{{` or `\N{`
_fstring_middle_single = r"[^\n'{]*(?:(?:\\N{|\\[^{]|{{)[^\n'{]*)*(?<!\\N)({)(?!{)"
_fstring_middle_double = r'[^\n"{]*(?:(?:\\N{|\\[^{]|{{)[^\n"{]*)*(?<!\\N)({)(?!{)'
_fstring_middle_single = r"(?:\\N{|\\[^{]|{{|[^\n'{\\])*(?<!\\N)({)(?!{)"
_fstring_middle_double = r'(?:\\N{|\\[^{]|{{|[^\n"{\\])*(?<!\\N)({)(?!{)'

# First (or only) line of ' or " string.
ContStr = group(
Expand Down
4 changes: 4 additions & 0 deletions tests/data/cases/pep_701.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@
level=0,
)

f'{{\\"kind\\":\\"ConfigMap\\",\\"metadata\\":{{\\"annotations\\":{{}},\\"name\\":\\"cluster-info\\",\\"namespace\\":\\"amazon-cloudwatch\\"}}}}'

# output

x = f"foo"
Expand Down Expand Up @@ -240,3 +242,5 @@
f"{self.writer._transport.get_extra_info('peername')}", # type: ignore[attr-defined]
level=0,
)

f'{{\\"kind\\":\\"ConfigMap\\",\\"metadata\\":{{\\"annotations\\":{{}},\\"name\\":\\"cluster-info\\",\\"namespace\\":\\"amazon-cloudwatch\\"}}}}'

0 comments on commit ba88fc3

Please sign in to comment.