Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

String formatting linting #443

Merged
merged 3 commits into from
Jul 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
347 changes: 344 additions & 3 deletions pyflakes/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import functools
import os
import re
import string
import sys
import tokenize

Expand All @@ -29,6 +30,8 @@

builtin_vars = dir(__import__('__builtin__' if PY2 else 'builtins'))

parse_format_string = string.Formatter().parse

if PY2:
tokenize_tokenize = tokenize.generate_tokens
else:
Expand Down Expand Up @@ -81,6 +84,87 @@ def getAlternatives(n):
TYPE_FUNC_RE = re.compile(r'^(\(.*?\))\s*->\s*(.*)$')


MAPPING_KEY_RE = re.compile(r'\(([^()]*)\)')
CONVERSION_FLAG_RE = re.compile('[#0+ -]*')
WIDTH_RE = re.compile(r'(?:\*|\d*)')
PRECISION_RE = re.compile(r'(?:\.(?:\*|\d*))?')
LENGTH_RE = re.compile('[hlL]?')
# https://docs.python.org/3/library/stdtypes.html#old-string-formatting
VALID_CONVERSIONS = frozenset('diouxXeEfFgGcrsa%')


def _must_match(regex, string, pos):
# type: (Pattern[str], str, int) -> Match[str]
match = regex.match(string, pos)
assert match is not None
return match


def parse_percent_format(s): # type: (str) -> Tuple[PercentFormat, ...]
"""Parses the string component of a `'...' % ...` format call

Copied from https://github.com/asottile/pyupgrade at v1.20.1
"""

def _parse_inner():
# type: () -> Generator[PercentFormat, None, None]
string_start = 0
string_end = 0
in_fmt = False

i = 0
while i < len(s):
if not in_fmt:
try:
i = s.index('%', i)
except ValueError: # no more % fields!
yield s[string_start:], None
return
else:
string_end = i
i += 1
in_fmt = True
else:
key_match = MAPPING_KEY_RE.match(s, i)
if key_match:
key = key_match.group(1) # type: Optional[str]
i = key_match.end()
else:
key = None

conversion_flag_match = _must_match(CONVERSION_FLAG_RE, s, i)
conversion_flag = conversion_flag_match.group() or None
i = conversion_flag_match.end()

width_match = _must_match(WIDTH_RE, s, i)
width = width_match.group() or None
i = width_match.end()

precision_match = _must_match(PRECISION_RE, s, i)
precision = precision_match.group() or None
i = precision_match.end()

# length modifier is ignored
i = _must_match(LENGTH_RE, s, i).end()

try:
conversion = s[i]
except IndexError:
raise ValueError('end-of-string while parsing format')
i += 1

fmt = (key, conversion_flag, width, precision, conversion)
yield s[string_start:string_end], fmt

in_fmt = False
string_start = i

if in_fmt:
raise ValueError('end-of-string while parsing format')

return tuple(_parse_inner())


class _FieldsOrder(dict):
"""Fix order of AST node fields."""

Expand Down Expand Up @@ -1231,10 +1315,250 @@ def ignore(self, node):
PASS = ignore

# "expr" type nodes
BOOLOP = BINOP = UNARYOP = IFEXP = SET = \
CALL = REPR = ATTRIBUTE = SUBSCRIPT = \
BOOLOP = UNARYOP = IFEXP = SET = \
REPR = ATTRIBUTE = SUBSCRIPT = \
STARRED = NAMECONSTANT = handleChildren

def _handle_string_dot_format(self, node):
try:
placeholders = tuple(parse_format_string(node.func.value.s))
except ValueError as e:
self.report(messages.StringDotFormatInvalidFormat, node, e)
return

class state: # py2-compatible `nonlocal`
auto = None
next_auto = 0

placeholder_positional = set()
placeholder_named = set()

def _add_key(fmtkey):
"""Returns True if there is an error which should early-exit"""
if fmtkey is None: # end of string or `{` / `}` escapes
return False

# attributes / indices are allowed in `.format(...)`
fmtkey, _, _ = fmtkey.partition('.')
fmtkey, _, _ = fmtkey.partition('[')

try:
fmtkey = int(fmtkey)
except ValueError:
pass
else: # fmtkey was an integer
if state.auto is True:
self.report(messages.StringDotFormatMixingAutomatic, node)
return True
else:
state.auto = False

if fmtkey == '':
if state.auto is False:
self.report(messages.StringDotFormatMixingAutomatic, node)
return True
else:
state.auto = True

fmtkey = state.next_auto
state.next_auto += 1

if isinstance(fmtkey, int):
placeholder_positional.add(fmtkey)
else:
placeholder_named.add(fmtkey)

return False

for _, fmtkey, spec, _ in placeholders:
if _add_key(fmtkey):
return

# spec can also contain format specifiers
if spec is not None:
try:
spec_placeholders = tuple(parse_format_string(spec))
except ValueError as e:
self.report(messages.StringDotFormatInvalidFormat, node, e)
return

for _, spec_fmtkey, spec_spec, _ in spec_placeholders:
# can't recurse again
if spec_spec is not None and '{' in spec_spec:
self.report(
messages.StringDotFormatInvalidFormat,
node,
'Max string recursion exceeded',
)
return
if _add_key(spec_fmtkey):
return

# bail early if there is *args or **kwargs
if (
# python 2.x *args / **kwargs
getattr(node, 'starargs', None) or
getattr(node, 'kwargs', None) or
# python 3.x *args
any(
isinstance(arg, getattr(ast, 'Starred', ()))
for arg in node.args
) or
# python 3.x **kwargs
any(kwd.arg is None for kwd in node.keywords)
):
return

substitution_positional = set(range(len(node.args)))
substitution_named = {kwd.arg for kwd in node.keywords}

extra_positional = substitution_positional - placeholder_positional
extra_named = substitution_named - placeholder_named

missing_arguments = (
(placeholder_positional | placeholder_named) -
(substitution_positional | substitution_named)
)

if extra_positional:
self.report(
messages.StringDotFormatExtraPositionalArguments,
node,
', '.join(sorted(str(x) for x in extra_positional)),
)
if extra_named:
self.report(
messages.StringDotFormatExtraNamedArguments,
node,
', '.join(sorted(extra_named)),
)
if missing_arguments:
self.report(
messages.StringDotFormatMissingArgument,
node,
', '.join(sorted(str(x) for x in missing_arguments)),
)

def CALL(self, node):
if (
isinstance(node.func, ast.Attribute) and
isinstance(node.func.value, ast.Str) and
node.func.attr == 'format'
):
self._handle_string_dot_format(node)
self.handleChildren(node)

def _handle_percent_format(self, node):
try:
placeholders = parse_percent_format(node.left.s)
except ValueError:
self.report(
messages.PercentFormatInvalidFormat,
node,
'incomplete format',
)
return

named = set()
positional_count = 0
positional = None
for _, placeholder in placeholders:
if placeholder is None:
continue
name, _, width, precision, conversion = placeholder

if conversion == '%':
continue

if conversion not in VALID_CONVERSIONS:
self.report(
messages.PercentFormatUnsupportedFormatCharacter,
node,
conversion,
)

if positional is None and conversion:
positional = name is None

for part in (width, precision):
if part is not None and '*' in part:
if not positional:
self.report(
messages.PercentFormatStarRequiresSequence,
node,
)
else:
positional_count += 1

if positional and name is not None:
self.report(
messages.PercentFormatMixedPositionalAndNamed,
node,
)
return
elif not positional and name is None:
self.report(
messages.PercentFormatMixedPositionalAndNamed,
node,
)
return

if positional:
positional_count += 1
else:
named.add(name)

if (
isinstance(node.right, (ast.List, ast.Tuple)) and
# does not have any *splats (py35+ feature)
not any(
isinstance(elt, getattr(ast, 'Starred', ()))
for elt in node.right.elts
)
):
substitution_count = len(node.right.elts)
if positional and positional_count != substitution_count:
self.report(
messages.PercentFormatPositionalCountMismatch,
node,
positional_count,
substitution_count,
)
elif not positional:
self.report(messages.PercentFormatExpectedMapping, node)

if (
isinstance(node.right, ast.Dict) and
all(isinstance(k, ast.Str) for k in node.right.keys)
):
if positional and positional_count > 1:
self.report(messages.PercentFormatExpectedSequence, node)
return

substitution_keys = {k.s for k in node.right.keys}
extra_keys = substitution_keys - named
missing_keys = named - substitution_keys
if not positional and extra_keys:
self.report(
messages.PercentFormatExtraNamedArguments,
node,
', '.join(sorted(extra_keys)),
)
if not positional and missing_keys:
self.report(
messages.PercentFormatMissingArgument,
node,
', '.join(sorted(missing_keys)),
)

def BINOP(self, node):
if (
isinstance(node.op, ast.Mod) and
isinstance(node.left, ast.Str)
):
self._handle_percent_format(node)
self.handleChildren(node)

NUM = STR = BYTES = ELLIPSIS = CONSTANT = ignore

# "slice" type nodes
Expand Down Expand Up @@ -1263,7 +1587,24 @@ def RAISE(self, node):
self.report(messages.RaiseNotImplemented, node)

# additional node types
COMPREHENSION = KEYWORD = FORMATTEDVALUE = JOINEDSTR = handleChildren
COMPREHENSION = KEYWORD = FORMATTEDVALUE = handleChildren

_in_fstring = False

def JOINEDSTR(self, node):
if (
# the conversion / etc. flags are parsed as f-strings without
# placeholders
not self._in_fstring and
not any(isinstance(x, ast.FormattedValue) for x in node.values)
):
self.report(messages.FStringMissingPlaceholders, node)

self._in_fstring, orig = True, self._in_fstring
try:
self.handleChildren(node)
finally:
self._in_fstring = orig

def DICT(self, node):
# Complain if there are duplicate keys with different values
Expand Down
Loading