PyCQA · myint · Jul 30, 2019 · Apr 4, 2019 · Apr 4, 2019 · Apr 4, 2019
diff --git a/pyflakes/checker.py b/pyflakes/checker.py
@@ -12,6 +12,7 @@
 import functools
 import os
 import re
+import string
 import sys
 import tokenize
 
@@ -29,6 +30,8 @@
 
 builtin_vars = dir(__import__('__builtin__' if PY2 else 'builtins'))
 
+parse_format_string = string.Formatter().parse
+
 if PY2:
     tokenize_tokenize = tokenize.generate_tokens
 else:
@@ -81,6 +84,87 @@ def getAlternatives(n):
 TYPE_FUNC_RE = re.compile(r'^(\(.*?\))\s*->\s*(.*)$')
 
 
+MAPPING_KEY_RE = re.compile(r'\(([^()]*)\)')
+CONVERSION_FLAG_RE = re.compile('[#0+ -]*')
+WIDTH_RE = re.compile(r'(?:\*|\d*)')
+PRECISION_RE = re.compile(r'(?:\.(?:\*|\d*))?')
+LENGTH_RE = re.compile('[hlL]?')
+# https://docs.python.org/3/library/stdtypes.html#old-string-formatting
+VALID_CONVERSIONS = frozenset('diouxXeEfFgGcrsa%')
+
+
+def _must_match(regex, string, pos):
+    # type: (Pattern[str], str, int) -> Match[str]
+    match = regex.match(string, pos)
+    assert match is not None
+    return match
+
+
+def parse_percent_format(s):  # type: (str) -> Tuple[PercentFormat, ...]
+    """Parses the string component of a `'...' % ...` format call
+
+    Copied from https://github.com/asottile/pyupgrade at v1.20.1
+    """
+
+    def _parse_inner():
+        # type: () -> Generator[PercentFormat, None, None]
+        string_start = 0
+        string_end = 0
+        in_fmt = False
+
+        i = 0
+        while i < len(s):
+            if not in_fmt:
+                try:
+                    i = s.index('%', i)
+                except ValueError:  # no more % fields!
+                    yield s[string_start:], None
+                    return
+                else:
+                    string_end = i
+                    i += 1
+                    in_fmt = True
+            else:
+                key_match = MAPPING_KEY_RE.match(s, i)
+                if key_match:
+                    key = key_match.group(1)  # type: Optional[str]
+                    i = key_match.end()
+                else:
+                    key = None
+
+                conversion_flag_match = _must_match(CONVERSION_FLAG_RE, s, i)
+                conversion_flag = conversion_flag_match.group() or None
+                i = conversion_flag_match.end()
+
+                width_match = _must_match(WIDTH_RE, s, i)
+                width = width_match.group() or None
+                i = width_match.end()
+
+                precision_match = _must_match(PRECISION_RE, s, i)
+                precision = precision_match.group() or None
+                i = precision_match.end()
+
+                # length modifier is ignored
+                i = _must_match(LENGTH_RE, s, i).end()
+
+                try:
+                    conversion = s[i]
+                except IndexError:
+                    raise ValueError('end-of-string while parsing format')
+                i += 1
+
+                fmt = (key, conversion_flag, width, precision, conversion)
+                yield s[string_start:string_end], fmt
+
+                in_fmt = False
+                string_start = i
+
+        if in_fmt:
+            raise ValueError('end-of-string while parsing format')
+
+    return tuple(_parse_inner())
+
+
 class _FieldsOrder(dict):
     """Fix order of AST node fields."""
 
@@ -1231,10 +1315,250 @@ def ignore(self, node):
     PASS = ignore
 
     # "expr" type nodes
-    BOOLOP = BINOP = UNARYOP = IFEXP = SET = \
-        CALL = REPR = ATTRIBUTE = SUBSCRIPT = \
+    BOOLOP = UNARYOP = IFEXP = SET = \
+        REPR = ATTRIBUTE = SUBSCRIPT = \
         STARRED = NAMECONSTANT = handleChildren
 
+    def _handle_string_dot_format(self, node):
+        try:
+            placeholders = tuple(parse_format_string(node.func.value.s))
+        except ValueError as e:
+            self.report(messages.StringDotFormatInvalidFormat, node, e)
+            return
+
+        class state:  # py2-compatible `nonlocal`
+            auto = None
+            next_auto = 0
+
+        placeholder_positional = set()
+        placeholder_named = set()
+
+        def _add_key(fmtkey):
+            """Returns True if there is an error which should early-exit"""
+            if fmtkey is None:  # end of string or `{` / `}` escapes
+                return False
+
+            # attributes / indices are allowed in `.format(...)`
+            fmtkey, _, _ = fmtkey.partition('.')
+            fmtkey, _, _ = fmtkey.partition('[')
+
+            try:
+                fmtkey = int(fmtkey)
+            except ValueError:
+                pass
+            else:  # fmtkey was an integer
+                if state.auto is True:
+                    self.report(messages.StringDotFormatMixingAutomatic, node)
+                    return True
+                else:
+                    state.auto = False
+
+            if fmtkey == '':
+                if state.auto is False:
+                    self.report(messages.StringDotFormatMixingAutomatic, node)
+                    return True
+                else:
+                    state.auto = True
+
+                fmtkey = state.next_auto
+                state.next_auto += 1
+
+            if isinstance(fmtkey, int):
+                placeholder_positional.add(fmtkey)
+            else:
+                placeholder_named.add(fmtkey)
+
+            return False
+
+        for _, fmtkey, spec, _ in placeholders:
+            if _add_key(fmtkey):
+                return
+
+            # spec can also contain format specifiers
+            if spec is not None:
+                try:
+                    spec_placeholders = tuple(parse_format_string(spec))
+                except ValueError as e:
+                    self.report(messages.StringDotFormatInvalidFormat, node, e)
+                    return
+
+                for _, spec_fmtkey, spec_spec, _ in spec_placeholders:
+                    # can't recurse again
+                    if spec_spec is not None and '{' in spec_spec:
+                        self.report(
+                            messages.StringDotFormatInvalidFormat,
+                            node,
+                            'Max string recursion exceeded',
+                        )
+                        return
+                    if _add_key(spec_fmtkey):
+                        return
+
+        # bail early if there is *args or **kwargs
+        if (
+                # python 2.x *args / **kwargs
+                getattr(node, 'starargs', None) or
+                getattr(node, 'kwargs', None) or
+                # python 3.x *args
+                any(
+                    isinstance(arg, getattr(ast, 'Starred', ()))
+                    for arg in node.args
+                ) or
+                # python 3.x **kwargs
+                any(kwd.arg is None for kwd in node.keywords)
+        ):
+            return
+
+        substitution_positional = set(range(len(node.args)))
+        substitution_named = {kwd.arg for kwd in node.keywords}
+
+        extra_positional = substitution_positional - placeholder_positional
+        extra_named = substitution_named - placeholder_named
+
+        missing_arguments = (
+            (placeholder_positional | placeholder_named) -
+            (substitution_positional | substitution_named)
+        )
+
+        if extra_positional:
+            self.report(
+                messages.StringDotFormatExtraPositionalArguments,
+                node,
+                ', '.join(sorted(str(x) for x in extra_positional)),
+            )
+        if extra_named:
+            self.report(
+                messages.StringDotFormatExtraNamedArguments,
+                node,
+                ', '.join(sorted(extra_named)),
+            )
+        if missing_arguments:
+            self.report(
+                messages.StringDotFormatMissingArgument,
+                node,
+                ', '.join(sorted(str(x) for x in missing_arguments)),
+            )
+
+    def CALL(self, node):
+        if (
+                isinstance(node.func, ast.Attribute) and
+                isinstance(node.func.value, ast.Str) and
+                node.func.attr == 'format'
+        ):
+            self._handle_string_dot_format(node)
+        self.handleChildren(node)
+
+    def _handle_percent_format(self, node):
+        try:
+            placeholders = parse_percent_format(node.left.s)
+        except ValueError:
+            self.report(
+                messages.PercentFormatInvalidFormat,
+                node,
+                'incomplete format',
+            )
+            return
+
+        named = set()
+        positional_count = 0
+        positional = None
+        for _, placeholder in placeholders:
+            if placeholder is None:
+                continue
+            name, _, width, precision, conversion = placeholder
+
+            if conversion == '%':
+                continue
+
+            if conversion not in VALID_CONVERSIONS:
+                self.report(
+                    messages.PercentFormatUnsupportedFormatCharacter,
+                    node,
+                    conversion,
+                )
+
+            if positional is None and conversion:
+                positional = name is None
+
+            for part in (width, precision):
+                if part is not None and '*' in part:
+                    if not positional:
+                        self.report(
+                            messages.PercentFormatStarRequiresSequence,
+                            node,
+                        )
+                    else:
+                        positional_count += 1
+
+            if positional and name is not None:
+                self.report(
+                    messages.PercentFormatMixedPositionalAndNamed,
+                    node,
+                )
+                return
+            elif not positional and name is None:
+                self.report(
+                    messages.PercentFormatMixedPositionalAndNamed,
+                    node,
+                )
+                return
+
+            if positional:
+                positional_count += 1
+            else:
+                named.add(name)
+
+        if (
+                isinstance(node.right, (ast.List, ast.Tuple)) and
+                # does not have any *splats (py35+ feature)
+                not any(
+                    isinstance(elt, getattr(ast, 'Starred', ()))
+                    for elt in node.right.elts
+                )
+        ):
+            substitution_count = len(node.right.elts)
+            if positional and positional_count != substitution_count:
+                self.report(
+                    messages.PercentFormatPositionalCountMismatch,
+                    node,
+                    positional_count,
+                    substitution_count,
+                )
+            elif not positional:
+                self.report(messages.PercentFormatExpectedMapping, node)
+
+        if (
+                isinstance(node.right, ast.Dict) and
+                all(isinstance(k, ast.Str) for k in node.right.keys)
+        ):
+            if positional and positional_count > 1:
+                self.report(messages.PercentFormatExpectedSequence, node)
+                return
+
+            substitution_keys = {k.s for k in node.right.keys}
+            extra_keys = substitution_keys - named
+            missing_keys = named - substitution_keys
+            if not positional and extra_keys:
+                self.report(
+                    messages.PercentFormatExtraNamedArguments,
+                    node,
+                    ', '.join(sorted(extra_keys)),
+                )
+            if not positional and missing_keys:
+                self.report(
+                    messages.PercentFormatMissingArgument,
+                    node,
+                    ', '.join(sorted(missing_keys)),
+                )
+
+    def BINOP(self, node):
+        if (
+                isinstance(node.op, ast.Mod) and
+                isinstance(node.left, ast.Str)
+        ):
+            self._handle_percent_format(node)
+        self.handleChildren(node)
+
     NUM = STR = BYTES = ELLIPSIS = CONSTANT = ignore
 
     # "slice" type nodes
@@ -1263,7 +1587,24 @@ def RAISE(self, node):
             self.report(messages.RaiseNotImplemented, node)
 
     # additional node types
-    COMPREHENSION = KEYWORD = FORMATTEDVALUE = JOINEDSTR = handleChildren
+    COMPREHENSION = KEYWORD = FORMATTEDVALUE = handleChildren
+
+    _in_fstring = False
+
+    def JOINEDSTR(self, node):
+        if (
+                # the conversion / etc. flags are parsed as f-strings without
+                # placeholders
+                not self._in_fstring and
+                not any(isinstance(x, ast.FormattedValue) for x in node.values)
+        ):
+            self.report(messages.FStringMissingPlaceholders, node)
+
+        self._in_fstring, orig = True, self._in_fstring
+        try:
+            self.handleChildren(node)
+        finally:
+            self._in_fstring = orig
 
     def DICT(self, node):
         # Complain if there are duplicate keys with different values