5
5
from concurrent .futures import Executor , ProcessPoolExecutor
6
6
from contextlib import contextmanager
7
7
from datetime import datetime
8
- from enum import Enum
8
+ from enum import auto , Enum
9
9
from functools import lru_cache , partial , wraps
10
10
import io
11
11
import itertools
@@ -2872,7 +2872,7 @@ def do_match(self, line: Line) -> FixMatchResult:
2872
2872
return Ok (i )
2873
2873
2874
2874
cant_fix = CantFix (
2875
- f"This line ({ self .line_str } ) has no strings that need merging."
2875
+ f"This line ({ self .line_str !r } ) has no strings that need merging."
2876
2876
)
2877
2877
return Err (cant_fix )
2878
2878
@@ -3131,6 +3131,116 @@ def __validate_msg(line: Line, string_idx: int) -> FixResult[None]:
3131
3131
return Ok (None )
3132
3132
3133
3133
3134
+ class STP_State (Enum ):
3135
+ """(S)tring (T)railer (P)arser State"""
3136
+
3137
+ START = auto ()
3138
+
3139
+ DOT = auto ()
3140
+ NAME = auto ()
3141
+
3142
+ PERCENT = auto ()
3143
+ SINGLE_FMT_ARG = auto ()
3144
+
3145
+ LPAR = auto ()
3146
+ RPAR = auto ()
3147
+
3148
+ DONE = auto ()
3149
+ ERROR = auto ()
3150
+
3151
+
3152
+ class StringTrailerParser :
3153
+ """
3154
+ A state machine that aids in parsing a string's "trailer", which can be
3155
+ either non-existant, an old-style formatting sequence (e.g. `% varX` or `%
3156
+ (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
3157
+ varY)`).
3158
+
3159
+ NOTE: A new StringTrailerParser object must be instantiated for each string
3160
+ trailer we parse.
3161
+ """
3162
+
3163
+ def __init__ (self ) -> None :
3164
+ self .state = STP_State .START
3165
+ self .unmatched_lpars = 0
3166
+
3167
+ self .goto : Dict [Tuple [STP_State , int ], STP_State ] = defaultdict (
3168
+ lambda : STP_State .ERROR
3169
+ )
3170
+
3171
+ self .goto [STP_State .START , token .DOT ] = STP_State .DOT
3172
+ self .goto [STP_State .START , token .PERCENT ] = STP_State .PERCENT
3173
+ self .goto [STP_State .START , - 1 ] = STP_State .DONE
3174
+
3175
+ self .goto [STP_State .DOT , token .NAME ] = STP_State .NAME
3176
+
3177
+ self .goto [STP_State .NAME , token .LPAR ] = STP_State .LPAR
3178
+
3179
+ self .goto [STP_State .PERCENT , token .LPAR ] = STP_State .LPAR
3180
+ self .goto [STP_State .PERCENT , - 1 ] = STP_State .SINGLE_FMT_ARG
3181
+ self .goto [STP_State .SINGLE_FMT_ARG , - 1 ] = STP_State .DONE
3182
+
3183
+ self .goto [STP_State .RPAR , - 1 ] = STP_State .DONE
3184
+
3185
+ def parse (self , leaves : List [Leaf ], string_idx : int ) -> int :
3186
+ """
3187
+ Pre-conditions:
3188
+ * `leaves[string_idx].type == token.STRING`
3189
+
3190
+ Returns:
3191
+ The index directly after the last leaf which is apart of the string
3192
+ trailer, if a "trailer" exists.
3193
+ OR
3194
+ string_idx + 1, if no string "trailer" exists.
3195
+ """
3196
+ assert leaves [string_idx ].type == token .STRING
3197
+
3198
+ idx = string_idx + 1
3199
+ while idx < len (leaves ) and self ._parse (leaves [idx ]):
3200
+ idx += 1
3201
+ return idx
3202
+
3203
+ def _parse (self , leaf : Leaf ) -> bool :
3204
+ """
3205
+ Pre-conditions:
3206
+ * On the first call to this function @leaf MUST be the leaf that
3207
+ was directly after the string leaf in question (e.g. if our target
3208
+ string is `line.leaves[i]` then the first call to this method must
3209
+ be `line.leaves[i + 1]`).
3210
+ * On the next call to this function, the leaf paramater passed in
3211
+ MUST be the leaf directly following @leaf.
3212
+
3213
+ Returns:
3214
+ True iff @leaf is apart of the string's trailer.
3215
+ """
3216
+ if leaf .type in [token .LPAR , token .RPAR ] and leaf .value == "" :
3217
+ return True
3218
+
3219
+ next_token = leaf .type
3220
+ if next_token == token .LPAR :
3221
+ self .unmatched_lpars += 1
3222
+
3223
+ last_state = self .state
3224
+ if last_state == STP_State .LPAR :
3225
+ if next_token == token .RPAR :
3226
+ self .unmatched_lpars -= 1
3227
+ if self .unmatched_lpars == 0 :
3228
+ self .state = STP_State .RPAR
3229
+ else :
3230
+ self .state = self .goto [last_state , next_token ]
3231
+
3232
+ if self .state == STP_State .ERROR :
3233
+ if (last_state , - 1 ) in self .goto :
3234
+ self .state = self .goto [last_state , - 1 ]
3235
+ else :
3236
+ raise RuntimeError (f"{ self .__class__ .__name__ } ERROR!" )
3237
+
3238
+ if self .state == STP_State .DONE :
3239
+ return False
3240
+
3241
+ return True
3242
+
3243
+
3134
3244
class StringParensStripper (StringFixer ):
3135
3245
"""StringFixer that strips surrounding parentheses from strings.
3136
3246
@@ -3150,45 +3260,43 @@ class StringParensStripper(StringFixer):
3150
3260
def do_match (self , line : Line ) -> FixMatchResult :
3151
3261
LL = line .leaves
3152
3262
3153
- regex_result = self ._re_string_match (
3154
- fr"""
3155
- ^
3156
- (?: # Ensures that we don't mistake an end quote for a starting quote.
3157
- [^'"]
3158
- | { RE_BALANCED_QUOTES }
3159
- )*?
3160
- [^A-Za-z0-9_'"][ ]* # NOT a function call!
3161
- \({ RE_STRING_GROUP } { RE_STRING_TRAILER } \)[^\.]
3162
- .*$
3163
- """ ,
3164
- )
3165
-
3166
- if isinstance (regex_result , Err ):
3167
- return regex_result
3168
-
3169
- string_value = regex_result .ok ()
3170
3263
for (i , leaf ) in enumerate (LL ):
3171
- if leaf .type != token .STRING or leaf . value != string_value :
3264
+ if leaf .type != token .STRING :
3172
3265
continue
3173
3266
3174
3267
if i == 0 or LL [i - 1 ].type != token .LPAR :
3175
3268
continue
3176
3269
3177
- unmatched_parens = 0
3178
- for inner_leaf in LL [i + 1 :]:
3179
- if inner_leaf .type == token .RPAR :
3180
- if unmatched_parens == 0 :
3181
- return Ok (i )
3182
- else :
3183
- unmatched_parens -= 1
3270
+ if i >= 2 and LL [i - 2 ].type == token .NAME :
3271
+ continue
3272
+
3273
+ if i + 1 >= len (LL ):
3274
+ continue
3275
+
3276
+ trailer = StringTrailerParser ()
3277
+ after_string_idx = trailer .parse (LL , i )
3184
3278
3185
- if inner_leaf .type == token .LPAR :
3186
- unmatched_parens += 1
3279
+ if (
3280
+ after_string_idx < len (LL )
3281
+ and LL [after_string_idx ].type == token .RPAR
3282
+ and LL [after_string_idx ].value == ")"
3283
+ ):
3284
+ if (
3285
+ after_string_idx + 1 < len (LL )
3286
+ and LL [after_string_idx + 1 ].type == token .DOT
3287
+ ):
3288
+ cant_fix = CantFix (
3289
+ "String is wrapped in parens, but the RPAR is directly followed"
3290
+ " by a dot, which is a deal breaker."
3291
+ )
3292
+ return Err (cant_fix )
3187
3293
3188
- raise RuntimeError (
3189
- f"Logic Error. { self .__class__ .__name__ } was unable to determine a string"
3190
- " index for some reason."
3294
+ return Ok (i )
3295
+
3296
+ cant_fix = CantFix (
3297
+ f"This line ({ self .line_str !r} ) has no strings wrapped in parens."
3191
3298
)
3299
+ return Err (cant_fix )
3192
3300
3193
3301
def do_transform (self , line : Line , string_idx : int ) -> Iterator [FixResult [Line ]]:
3194
3302
LL = line .leaves
0 commit comments