diff --git a/CHANGES.md b/CHANGES.md index b39f9ab4f54..79e7b0b1444 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -30,6 +30,8 @@ +- Fix bad performance on certain complex string literals (#4331) + ### Output diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py index d6b684ab1aa..fd0b5564f43 100644 --- a/src/blib2to3/pgen2/tokenize.py +++ b/src/blib2to3/pgen2/tokenize.py @@ -119,13 +119,13 @@ def _combinations(*l: str) -> Set[str]: Number = group(Imagnumber, Floatnumber, Intnumber) # Tail end of ' string. -Single = r"[^'\\]*(?:\\.[^'\\]*)*'" +Single = r"(?:\\.|[^'\\])*'" # Tail end of " string. -Double = r'[^"\\]*(?:\\.[^"\\]*)*"' +Double = r'(?:\\.|[^"\\])*"' # Tail end of ''' string. -Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" +Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''" # Tail end of """ string. -Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' +Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""' _litprefix = r"(?:[uUrRbB]|[rR][bB]|[bBuU][rR])?" _fstringlitprefix = r"(?:rF|FR|Fr|fr|RF|F|rf|f|Rf|fR)" Triple = group( @@ -136,12 +136,12 @@ def _combinations(*l: str) -> Set[str]: ) # beginning of a single quoted f-string. must not end with `{{` or `\N{` -SingleLbrace = r"[^'\\{]*(?:(?:\\N{|\\.|{{)[^'\\{]*)*(? Set[str]: Special = group(r"\r?\n", r"[:;.,`@]") Funny = group(Operator, Bracket, Special) -_string_middle_single = r"[^\n'\\]*(?:\\.[^\n'\\]*)*" -_string_middle_double = r'[^\n"\\]*(?:\\.[^\n"\\]*)*' +_string_middle_single = r"(?:[^\n'\\]|\\.)*" +_string_middle_double = r'(?:[^\n"\\]|\\.)*' # FSTRING_MIDDLE and LBRACE, must not end with a `{{` or `\N{` -_fstring_middle_single = r"[^\n'{]*(?:(?:\\N{|\\[^{]|{{)[^\n'{]*)*(?