Skip to content

Commit

Permalink
String formatting linting (#443)
Browse files Browse the repository at this point in the history
* Add lint rule for f-strings without placeholders

* Add linting for string.format(...)

* Add linting for % formatting
  • Loading branch information
asottile authored and myint committed Jul 30, 2019
1 parent c0193b2 commit eeb6263
Show file tree
Hide file tree
Showing 3 changed files with 593 additions and 3 deletions.
347 changes: 344 additions & 3 deletions pyflakes/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import functools
import os
import re
import string
import sys
import tokenize

Expand All @@ -29,6 +30,8 @@

builtin_vars = dir(__import__('__builtin__' if PY2 else 'builtins'))

parse_format_string = string.Formatter().parse

if PY2:
tokenize_tokenize = tokenize.generate_tokens
else:
Expand Down Expand Up @@ -81,6 +84,87 @@ def getAlternatives(n):
TYPE_FUNC_RE = re.compile(r'^(\(.*?\))\s*->\s*(.*)$')


MAPPING_KEY_RE = re.compile(r'\(([^()]*)\)')
CONVERSION_FLAG_RE = re.compile('[#0+ -]*')
WIDTH_RE = re.compile(r'(?:\*|\d*)')
PRECISION_RE = re.compile(r'(?:\.(?:\*|\d*))?')
LENGTH_RE = re.compile('[hlL]?')
# https://docs.python.org/3/library/stdtypes.html#old-string-formatting
VALID_CONVERSIONS = frozenset('diouxXeEfFgGcrsa%')


def _must_match(regex, string, pos):
# type: (Pattern[str], str, int) -> Match[str]
match = regex.match(string, pos)
assert match is not None
return match


def parse_percent_format(s): # type: (str) -> Tuple[PercentFormat, ...]
"""Parses the string component of a `'...' % ...` format call
Copied from https://github.com/asottile/pyupgrade at v1.20.1
"""

def _parse_inner():
# type: () -> Generator[PercentFormat, None, None]
string_start = 0
string_end = 0
in_fmt = False

i = 0
while i < len(s):
if not in_fmt:
try:
i = s.index('%', i)
except ValueError: # no more % fields!
yield s[string_start:], None
return
else:
string_end = i
i += 1
in_fmt = True
else:
key_match = MAPPING_KEY_RE.match(s, i)
if key_match:
key = key_match.group(1) # type: Optional[str]
i = key_match.end()
else:
key = None

conversion_flag_match = _must_match(CONVERSION_FLAG_RE, s, i)
conversion_flag = conversion_flag_match.group() or None
i = conversion_flag_match.end()

width_match = _must_match(WIDTH_RE, s, i)
width = width_match.group() or None
i = width_match.end()

precision_match = _must_match(PRECISION_RE, s, i)
precision = precision_match.group() or None
i = precision_match.end()

# length modifier is ignored
i = _must_match(LENGTH_RE, s, i).end()

try:
conversion = s[i]
except IndexError:
raise ValueError('end-of-string while parsing format')
i += 1

fmt = (key, conversion_flag, width, precision, conversion)
yield s[string_start:string_end], fmt

in_fmt = False
string_start = i

if in_fmt:
raise ValueError('end-of-string while parsing format')

return tuple(_parse_inner())


class _FieldsOrder(dict):
"""Fix order of AST node fields."""

Expand Down Expand Up @@ -1241,10 +1325,250 @@ def ignore(self, node):
PASS = ignore

# "expr" type nodes
BOOLOP = BINOP = UNARYOP = IFEXP = SET = \
CALL = REPR = ATTRIBUTE = SUBSCRIPT = \
BOOLOP = UNARYOP = IFEXP = SET = \
REPR = ATTRIBUTE = SUBSCRIPT = \
STARRED = NAMECONSTANT = handleChildren

def _handle_string_dot_format(self, node):
try:
placeholders = tuple(parse_format_string(node.func.value.s))
except ValueError as e:
self.report(messages.StringDotFormatInvalidFormat, node, e)
return

class state: # py2-compatible `nonlocal`
auto = None
next_auto = 0

placeholder_positional = set()
placeholder_named = set()

def _add_key(fmtkey):
"""Returns True if there is an error which should early-exit"""
if fmtkey is None: # end of string or `{` / `}` escapes
return False

# attributes / indices are allowed in `.format(...)`
fmtkey, _, _ = fmtkey.partition('.')
fmtkey, _, _ = fmtkey.partition('[')

try:
fmtkey = int(fmtkey)
except ValueError:
pass
else: # fmtkey was an integer
if state.auto is True:
self.report(messages.StringDotFormatMixingAutomatic, node)
return True
else:
state.auto = False

if fmtkey == '':
if state.auto is False:
self.report(messages.StringDotFormatMixingAutomatic, node)
return True
else:
state.auto = True

fmtkey = state.next_auto
state.next_auto += 1

if isinstance(fmtkey, int):
placeholder_positional.add(fmtkey)
else:
placeholder_named.add(fmtkey)

return False

for _, fmtkey, spec, _ in placeholders:
if _add_key(fmtkey):
return

# spec can also contain format specifiers
if spec is not None:
try:
spec_placeholders = tuple(parse_format_string(spec))
except ValueError as e:
self.report(messages.StringDotFormatInvalidFormat, node, e)
return

for _, spec_fmtkey, spec_spec, _ in spec_placeholders:
# can't recurse again
if spec_spec is not None and '{' in spec_spec:
self.report(
messages.StringDotFormatInvalidFormat,
node,
'Max string recursion exceeded',
)
return
if _add_key(spec_fmtkey):
return

# bail early if there is *args or **kwargs
if (
# python 2.x *args / **kwargs
getattr(node, 'starargs', None) or
getattr(node, 'kwargs', None) or
# python 3.x *args
any(
isinstance(arg, getattr(ast, 'Starred', ()))
for arg in node.args
) or
# python 3.x **kwargs
any(kwd.arg is None for kwd in node.keywords)
):
return

substitution_positional = set(range(len(node.args)))
substitution_named = {kwd.arg for kwd in node.keywords}

extra_positional = substitution_positional - placeholder_positional
extra_named = substitution_named - placeholder_named

missing_arguments = (
(placeholder_positional | placeholder_named) -
(substitution_positional | substitution_named)
)

if extra_positional:
self.report(
messages.StringDotFormatExtraPositionalArguments,
node,
', '.join(sorted(str(x) for x in extra_positional)),
)
if extra_named:
self.report(
messages.StringDotFormatExtraNamedArguments,
node,
', '.join(sorted(extra_named)),
)
if missing_arguments:
self.report(
messages.StringDotFormatMissingArgument,
node,
', '.join(sorted(str(x) for x in missing_arguments)),
)

def CALL(self, node):
if (
isinstance(node.func, ast.Attribute) and
isinstance(node.func.value, ast.Str) and
node.func.attr == 'format'
):
self._handle_string_dot_format(node)
self.handleChildren(node)

def _handle_percent_format(self, node):
try:
placeholders = parse_percent_format(node.left.s)
except ValueError:
self.report(
messages.PercentFormatInvalidFormat,
node,
'incomplete format',
)
return

named = set()
positional_count = 0
positional = None
for _, placeholder in placeholders:
if placeholder is None:
continue
name, _, width, precision, conversion = placeholder

if conversion == '%':
continue

if conversion not in VALID_CONVERSIONS:
self.report(
messages.PercentFormatUnsupportedFormatCharacter,
node,
conversion,
)

if positional is None and conversion:
positional = name is None

for part in (width, precision):
if part is not None and '*' in part:
if not positional:
self.report(
messages.PercentFormatStarRequiresSequence,
node,
)
else:
positional_count += 1

if positional and name is not None:
self.report(
messages.PercentFormatMixedPositionalAndNamed,
node,
)
return
elif not positional and name is None:
self.report(
messages.PercentFormatMixedPositionalAndNamed,
node,
)
return

if positional:
positional_count += 1
else:
named.add(name)

if (
isinstance(node.right, (ast.List, ast.Tuple)) and
# does not have any *splats (py35+ feature)
not any(
isinstance(elt, getattr(ast, 'Starred', ()))
for elt in node.right.elts
)
):
substitution_count = len(node.right.elts)
if positional and positional_count != substitution_count:
self.report(
messages.PercentFormatPositionalCountMismatch,
node,
positional_count,
substitution_count,
)
elif not positional:
self.report(messages.PercentFormatExpectedMapping, node)

if (
isinstance(node.right, ast.Dict) and
all(isinstance(k, ast.Str) for k in node.right.keys)
):
if positional and positional_count > 1:
self.report(messages.PercentFormatExpectedSequence, node)
return

substitution_keys = {k.s for k in node.right.keys}
extra_keys = substitution_keys - named
missing_keys = named - substitution_keys
if not positional and extra_keys:
self.report(
messages.PercentFormatExtraNamedArguments,
node,
', '.join(sorted(extra_keys)),
)
if not positional and missing_keys:
self.report(
messages.PercentFormatMissingArgument,
node,
', '.join(sorted(missing_keys)),
)

def BINOP(self, node):
if (
isinstance(node.op, ast.Mod) and
isinstance(node.left, ast.Str)
):
self._handle_percent_format(node)
self.handleChildren(node)

NUM = STR = BYTES = ELLIPSIS = CONSTANT = ignore

# "slice" type nodes
Expand Down Expand Up @@ -1273,7 +1597,24 @@ def RAISE(self, node):
self.report(messages.RaiseNotImplemented, node)

# additional node types
COMPREHENSION = KEYWORD = FORMATTEDVALUE = JOINEDSTR = handleChildren
COMPREHENSION = KEYWORD = FORMATTEDVALUE = handleChildren

_in_fstring = False

def JOINEDSTR(self, node):
if (
# the conversion / etc. flags are parsed as f-strings without
# placeholders
not self._in_fstring and
not any(isinstance(x, ast.FormattedValue) for x in node.values)
):
self.report(messages.FStringMissingPlaceholders, node)

self._in_fstring, orig = True, self._in_fstring
try:
self.handleChildren(node)
finally:
self._in_fstring = orig

def DICT(self, node):
# Complain if there are duplicate keys with different values
Expand Down
Loading

0 comments on commit eeb6263

Please sign in to comment.