Skip to content

Commit 459eb35

Browse files
committedFeb 22, 2020
[add] StringTrailerParser class and convert StringParensStripper.do_match(...) to use it
1 parent 9796abb commit 459eb35

File tree

1 file changed

+140
-32
lines changed

1 file changed

+140
-32
lines changed
 

‎black.py

Lines changed: 140 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from concurrent.futures import Executor, ProcessPoolExecutor
66
from contextlib import contextmanager
77
from datetime import datetime
8-
from enum import Enum
8+
from enum import auto, Enum
99
from functools import lru_cache, partial, wraps
1010
import io
1111
import itertools
@@ -2872,7 +2872,7 @@ def do_match(self, line: Line) -> FixMatchResult:
28722872
return Ok(i)
28732873

28742874
cant_fix = CantFix(
2875-
f"This line ({self.line_str}) has no strings that need merging."
2875+
f"This line ({self.line_str!r}) has no strings that need merging."
28762876
)
28772877
return Err(cant_fix)
28782878

@@ -3131,6 +3131,116 @@ def __validate_msg(line: Line, string_idx: int) -> FixResult[None]:
31313131
return Ok(None)
31323132

31333133

3134+
class STP_State(Enum):
3135+
"""(S)tring (T)railer (P)arser State"""
3136+
3137+
START = auto()
3138+
3139+
DOT = auto()
3140+
NAME = auto()
3141+
3142+
PERCENT = auto()
3143+
SINGLE_FMT_ARG = auto()
3144+
3145+
LPAR = auto()
3146+
RPAR = auto()
3147+
3148+
DONE = auto()
3149+
ERROR = auto()
3150+
3151+
3152+
class StringTrailerParser:
3153+
"""
3154+
A state machine that aids in parsing a string's "trailer", which can be
3155+
either non-existant, an old-style formatting sequence (e.g. `% varX` or `%
3156+
(varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
3157+
varY)`).
3158+
3159+
NOTE: A new StringTrailerParser object must be instantiated for each string
3160+
trailer we parse.
3161+
"""
3162+
3163+
def __init__(self) -> None:
3164+
self.state = STP_State.START
3165+
self.unmatched_lpars = 0
3166+
3167+
self.goto: Dict[Tuple[STP_State, int], STP_State] = defaultdict(
3168+
lambda: STP_State.ERROR
3169+
)
3170+
3171+
self.goto[STP_State.START, token.DOT] = STP_State.DOT
3172+
self.goto[STP_State.START, token.PERCENT] = STP_State.PERCENT
3173+
self.goto[STP_State.START, -1] = STP_State.DONE
3174+
3175+
self.goto[STP_State.DOT, token.NAME] = STP_State.NAME
3176+
3177+
self.goto[STP_State.NAME, token.LPAR] = STP_State.LPAR
3178+
3179+
self.goto[STP_State.PERCENT, token.LPAR] = STP_State.LPAR
3180+
self.goto[STP_State.PERCENT, -1] = STP_State.SINGLE_FMT_ARG
3181+
self.goto[STP_State.SINGLE_FMT_ARG, -1] = STP_State.DONE
3182+
3183+
self.goto[STP_State.RPAR, -1] = STP_State.DONE
3184+
3185+
def parse(self, leaves: List[Leaf], string_idx: int) -> int:
3186+
"""
3187+
Pre-conditions:
3188+
* `leaves[string_idx].type == token.STRING`
3189+
3190+
Returns:
3191+
The index directly after the last leaf which is apart of the string
3192+
trailer, if a "trailer" exists.
3193+
OR
3194+
string_idx + 1, if no string "trailer" exists.
3195+
"""
3196+
assert leaves[string_idx].type == token.STRING
3197+
3198+
idx = string_idx + 1
3199+
while idx < len(leaves) and self._parse(leaves[idx]):
3200+
idx += 1
3201+
return idx
3202+
3203+
def _parse(self, leaf: Leaf) -> bool:
3204+
"""
3205+
Pre-conditions:
3206+
* On the first call to this function @leaf MUST be the leaf that
3207+
was directly after the string leaf in question (e.g. if our target
3208+
string is `line.leaves[i]` then the first call to this method must
3209+
be `line.leaves[i + 1]`).
3210+
* On the next call to this function, the leaf paramater passed in
3211+
MUST be the leaf directly following @leaf.
3212+
3213+
Returns:
3214+
True iff @leaf is apart of the string's trailer.
3215+
"""
3216+
if leaf.type in [token.LPAR, token.RPAR] and leaf.value == "":
3217+
return True
3218+
3219+
next_token = leaf.type
3220+
if next_token == token.LPAR:
3221+
self.unmatched_lpars += 1
3222+
3223+
last_state = self.state
3224+
if last_state == STP_State.LPAR:
3225+
if next_token == token.RPAR:
3226+
self.unmatched_lpars -= 1
3227+
if self.unmatched_lpars == 0:
3228+
self.state = STP_State.RPAR
3229+
else:
3230+
self.state = self.goto[last_state, next_token]
3231+
3232+
if self.state == STP_State.ERROR:
3233+
if (last_state, -1) in self.goto:
3234+
self.state = self.goto[last_state, -1]
3235+
else:
3236+
raise RuntimeError(f"{self.__class__.__name__} ERROR!")
3237+
3238+
if self.state == STP_State.DONE:
3239+
return False
3240+
3241+
return True
3242+
3243+
31343244
class StringParensStripper(StringFixer):
31353245
"""StringFixer that strips surrounding parentheses from strings.
31363246
@@ -3150,45 +3260,43 @@ class StringParensStripper(StringFixer):
31503260
def do_match(self, line: Line) -> FixMatchResult:
31513261
LL = line.leaves
31523262

3153-
regex_result = self._re_string_match(
3154-
fr"""
3155-
^
3156-
(?: # Ensures that we don't mistake an end quote for a starting quote.
3157-
[^'"]
3158-
| {RE_BALANCED_QUOTES}
3159-
)*?
3160-
[^A-Za-z0-9_'"][ ]* # NOT a function call!
3161-
\({RE_STRING_GROUP}{RE_STRING_TRAILER}\)[^\.]
3162-
.*$
3163-
""",
3164-
)
3165-
3166-
if isinstance(regex_result, Err):
3167-
return regex_result
3168-
3169-
string_value = regex_result.ok()
31703263
for (i, leaf) in enumerate(LL):
3171-
if leaf.type != token.STRING or leaf.value != string_value:
3264+
if leaf.type != token.STRING:
31723265
continue
31733266

31743267
if i == 0 or LL[i - 1].type != token.LPAR:
31753268
continue
31763269

3177-
unmatched_parens = 0
3178-
for inner_leaf in LL[i + 1 :]:
3179-
if inner_leaf.type == token.RPAR:
3180-
if unmatched_parens == 0:
3181-
return Ok(i)
3182-
else:
3183-
unmatched_parens -= 1
3270+
if i >= 2 and LL[i - 2].type == token.NAME:
3271+
continue
3272+
3273+
if i + 1 >= len(LL):
3274+
continue
3275+
3276+
trailer = StringTrailerParser()
3277+
after_string_idx = trailer.parse(LL, i)
31843278

3185-
if inner_leaf.type == token.LPAR:
3186-
unmatched_parens += 1
3279+
if (
3280+
after_string_idx < len(LL)
3281+
and LL[after_string_idx].type == token.RPAR
3282+
and LL[after_string_idx].value == ")"
3283+
):
3284+
if (
3285+
after_string_idx + 1 < len(LL)
3286+
and LL[after_string_idx + 1].type == token.DOT
3287+
):
3288+
cant_fix = CantFix(
3289+
"String is wrapped in parens, but the RPAR is directly followed"
3290+
" by a dot, which is a deal breaker."
3291+
)
3292+
return Err(cant_fix)
31873293

3188-
raise RuntimeError(
3189-
f"Logic Error. {self.__class__.__name__} was unable to determine a string"
3190-
" index for some reason."
3294+
return Ok(i)
3295+
3296+
cant_fix = CantFix(
3297+
f"This line ({self.line_str!r}) has no strings wrapped in parens."
31913298
)
3299+
return Err(cant_fix)
31923300

31933301
def do_transform(self, line: Line, string_idx: int) -> Iterator[FixResult[Line]]:
31943302
LL = line.leaves

0 commit comments

Comments
 (0)