From d202b0a549bccd99cea5c8a17acb03f245aa2747 Mon Sep 17 00:00:00 2001 From: johndoknjas Date: Tue, 22 Oct 2024 18:58:35 -0700 Subject: [PATCH] Get the recursion feature to work for class/instance methods as well. Also, disable the feature by default, and add a `--recursion` flag for if the user wants to do it. --- CHANGELOG.md | 4 + README.md | 9 +++ tests/__init__.py | 5 ++ tests/test_recursion.py | 167 ++++++++++++++++++++++++++++++---------- vulture/config.py | 4 + vulture/core.py | 20 ++++- vulture/utils.py | 60 +++++++++++---- 7 files changed, 206 insertions(+), 63 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a944eb1..bce1f3da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# next (unreleased) + +* Add an option to mark most functions only called recursively as unused (John Doknjas, #374). + # 2.13 (2024-10-02) * Add support for Python 3.13 (Jendrik Seipp, #369). diff --git a/README.md b/README.md index 45bb3c44..3570adb2 100644 --- a/README.md +++ b/README.md @@ -145,6 +145,15 @@ function arguments, e.g., `def foo(x, _y)`. Raise the minimum [confidence value](#types-of-unused-code) with the `--min-confidence` flag. +#### Verbose output + +For more verbose output, use the `--verbose` (or `-v`) flag. + +#### Not counting recursion + +It's possible that a function is only called by itself. The `--recursion` (or `-r`) flag will get +vulture to mark most such functions as unused. Note that this will have some performance cost. + #### Unreachable code If Vulture complains about code like `if False:`, you can use a Boolean diff --git a/tests/__init__.py b/tests/__init__.py index c54b3877..c362e160 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -42,3 +42,8 @@ def check_unreachable(v, lineno, size, name): @pytest.fixture def v(): return core.Vulture(verbose=True) + + +@pytest.fixture +def v_rec(): + return core.Vulture(verbose=True, recursion=True) diff --git a/tests/test_recursion.py b/tests/test_recursion.py index c434aeab..eab920ba 100644 --- a/tests/test_recursion.py +++ b/tests/test_recursion.py @@ -1,63 +1,104 @@ -from . import check, v +from . import check, v, v_rec -assert v # Silence pyflakes. +assert v +assert v_rec -def test_recursion1(v): - v.scan( - """\ +def test_recursion1(v, v_rec): + code = """\ def Rec(): Rec() """ - ) + v_rec.scan(code) + check(v_rec.defined_funcs, ["Rec"]) + check(v_rec.unused_funcs, ["Rec"]) + v.scan(code) check(v.defined_funcs, ["Rec"]) - check(v.unused_funcs, ["Rec"]) - + check(v.unused_funcs, []) -def test_recursion2(v): - v.scan( - """\ -def Rec(): - Rec() +def test_recursion2(v, v_rec): + code = """\ class MyClass: def __init__(self): pass - def Rec(): - Rec() # calls global Rec() + def inst(self): + self.inst() + + def inst2(self): + self.inst2() + + def Rec2(): + MyClass.Rec2() -def main(): - main() + @classmethod + def Rec3(): + MyClass.Rec3() + + @staticmethod + def Rec4(): + MyClass.Rec4() + +def inst2(): + o = MyClass() + o.inst2() + +def Rec3(): + Rec3() + +def Rec4(): + Rec4() """ - ) - check(v.defined_funcs, ["Rec", "Rec", "main"]) - check(v.unused_funcs, ["main"]) + v_rec.scan(code) + check(v_rec.defined_funcs, ["Rec2", "inst2", "Rec3", "Rec4"]) + check(v_rec.unused_funcs, ["Rec2", "Rec3", "Rec4"]) + check(v_rec.defined_methods, ["inst", "inst2", "Rec3", "Rec4"]) + check(v_rec.unused_methods, ["inst", "Rec3", "Rec4"]) + v.scan(code) + check(v.defined_funcs, ["Rec2", "inst2", "Rec3", "Rec4"]) + check(v.unused_funcs, []) + check(v.defined_methods, ["inst", "inst2", "Rec3", "Rec4"]) + check(v.unused_methods, []) -def test_recursion3(v): - v.scan( - """\ +def test_recursion3(v, v_rec): + code = """\ class MyClass: def __init__(self): pass + @classmethod def Rec(): - pass + MyClass.Rec() -def Rec(): - MyClass.Rec() +def aa(): + aa() """ + v_rec.scan(code) + check( + v_rec.defined_funcs, + [ + "aa", + ], ) - check(v.defined_funcs, ["Rec", "Rec"]) + check(v_rec.defined_methods, ["Rec"]) + check(v_rec.unused_funcs, ["aa"]) + check(v_rec.unused_methods, ["Rec"]) + v.scan(code) + check( + v.defined_funcs, + [ + "aa", + ], + ) + check(v.defined_methods, ["Rec"]) check(v.unused_funcs, []) - # MyClass.Rec() is not treated as a recursive call. So, MyClass.Rec is marked as used, causing Rec to also - # be marked as used (in Vulture's current behaviour) since they share the same name. + check(v.unused_methods, []) -def test_recursion4(v): - v.scan( - """\ +def test_recursion4(v, v_rec): + code = """\ def Rec(): Rec() @@ -68,23 +109,65 @@ def __init__(self): def Rec(): pass """ - ) + v_rec.scan(code) + check(v_rec.defined_funcs, ["Rec", "Rec"]) + check(v_rec.unused_funcs, ["Rec", "Rec"]) + v.scan(code) check(v.defined_funcs, ["Rec", "Rec"]) - check(v.unused_funcs, ["Rec", "Rec"]) + check(v.unused_funcs, []) -def test_recursion5(v): - v.scan( - """\ +def test_recursion5(v, v_rec): + code = """\ def rec(): - if (5 > 4): - rec() + if 5 > 4: + if 5 > 4: + rec() def outer(): def inner(): - outer() # these calls aren't considered for recursion + # the following calls are within a function within a function, so they + # are disregarded from recursion candidacy (to keep things simple) + outer() inner() """ - ) + v_rec.scan(code) + check(v_rec.defined_funcs, ["rec", "outer", "inner"]) + check(v_rec.unused_funcs, ["rec"]) + v.scan(code) check(v.defined_funcs, ["rec", "outer", "inner"]) - check(v.unused_funcs, ["rec"]) + check(v.unused_funcs, []) + + +def test_recursion6(v, v_rec): + code = """\ +def rec(num: int): + if num > 4: + x = 1 + (rec ((num + num) / 3) / 2) + return x +""" + v_rec.scan(code) + check(v_rec.defined_funcs, ["rec"]) + check(v_rec.unused_funcs, ["rec"]) + v.scan(code) + check(v.defined_funcs, ["rec"]) + check(v.unused_funcs, []) + + +def test_recursion7(v, v_rec): + code = """\ +def rec(num: int): + for i in (1, num): + rec(i) +rec(2) +""" + v_rec.scan(code) + check(v_rec.defined_funcs, ["rec"]) + check(v_rec.unused_funcs, []) + check(v_rec.defined_vars, ["num", "i"]) + check(v_rec.unused_vars, []) + v.scan(code) + check(v.defined_funcs, ["rec"]) + check(v.unused_funcs, []) + check(v.defined_vars, ["num", "i"]) + check(v.unused_vars, []) diff --git a/vulture/config.py b/vulture/config.py index 87dd5739..0bb3655f 100644 --- a/vulture/config.py +++ b/vulture/config.py @@ -23,6 +23,7 @@ "make_whitelist": False, "sort_by_size": False, "verbose": False, + "recursion": False, } @@ -169,6 +170,9 @@ def csv(exclude): "-v", "--verbose", action="store_true", default=missing ) parser.add_argument("--version", action="version", version=version) + parser.add_argument( + "-r", "--recursion", action="store_true", default=missing + ) namespace = parser.parse_args(args) cli_args = { key: value diff --git a/vulture/core.py b/vulture/core.py index 41a68285..01c86fca 100644 --- a/vulture/core.py +++ b/vulture/core.py @@ -195,9 +195,14 @@ class Vulture(ast.NodeVisitor): """Find dead code.""" def __init__( - self, verbose=False, ignore_names=None, ignore_decorators=None + self, + verbose=False, + ignore_names=None, + ignore_decorators=None, + recursion=False, ): self.verbose = verbose + self.recursion = recursion def get_list(typ): return utils.LoggingList(typ, self.verbose) @@ -250,7 +255,8 @@ def handle_syntax_error(e): ) self.exit_code = ExitCode.InvalidInput else: - utils.add_parent_info(node) + if self.recursion: + utils.add_parents(node) # When parsing type comments, visiting can throw a SyntaxError: try: self.visit(node) @@ -509,7 +515,9 @@ def visit_AsyncFunctionDef(self, node): def visit_Attribute(self, node): if isinstance(node.ctx, ast.Store): self._define(self.defined_attrs, node.attr, node) - elif isinstance(node.ctx, ast.Load): + elif isinstance(node.ctx, ast.Load) and not getattr( + node, "recursive", None + ): self.used_names.add(node.attr) def visit_BinOp(self, node): @@ -548,6 +556,9 @@ def visit_Call(self, node): ): self._handle_new_format_string(node.func.value.value) + if self.recursion and isinstance(node.func, (ast.Name, ast.Attribute)): + node.func.recursive = utils.recursive_call(node.func) + def _handle_new_format_string(self, s): def is_identifier(name): return bool(re.match(r"[a-zA-Z_][a-zA-Z0-9_]*", name)) @@ -643,7 +654,7 @@ def visit_Name(self, node): if ( isinstance(node.ctx, (ast.Load, ast.Del)) and node.id not in IGNORED_VARIABLE_NAMES - and not utils.top_lvl_recursive_call(node) + and not getattr(node, "recursive", None) ): self.used_names.add(node.id) elif isinstance(node.ctx, (ast.Param, ast.Store)): @@ -734,6 +745,7 @@ def main(): verbose=config["verbose"], ignore_names=config["ignore_names"], ignore_decorators=config["ignore_decorators"], + recursion=config["recursion"], ) vulture.scavenge(config["paths"], exclude=config["exclude"]) sys.exit( diff --git a/vulture/utils.py b/vulture/utils.py index ac476980..1434ccfc 100644 --- a/vulture/utils.py +++ b/vulture/utils.py @@ -112,7 +112,7 @@ def read_file(filename): raise VultureInputException from err -def add_parent_info(root: ast.AST) -> None: +def add_parents(root: ast.AST) -> None: # https://stackoverflow.com/a/43311383/7743427: root.parent = None for node in ast.walk(root): @@ -120,22 +120,48 @@ def add_parent_info(root: ast.AST) -> None: child.parent = node -def ancestor(node: ast.AST, target_ancestor_types) -> Optional[ast.AST]: - while node and not isinstance(node, target_ancestor_types): - node = getattr(node, "parent", None) - return node - - -def top_lvl_recursive_call(node: ast.Name) -> bool: - """Returns true if a recursive call is made from a top level function to itself.""" - assert isinstance(node, ast.Name) - enclosing_func = ancestor(node, (ast.FunctionDef, ast.AsyncFunctionDef)) - return bool( - enclosing_func - and node.id == enclosing_func.name - and enclosing_func.col_offset == 0 - and ancestor(node, ast.Call) - ) +def parent(node: Optional[ast.AST]) -> ast.AST | None: + return getattr(node, "parent", None) + + +def ancestor( + node: ast.AST, target_ancestor_types, limit: int +) -> Optional[ast.AST]: + """`limit` is how far back to search before giving up and returning `None` instead""" + assert limit < 100 + if limit == 0: + return None + if not (node := parent(node)) or isinstance(node, target_ancestor_types): + return node + return ancestor(node, target_ancestor_types, limit - 1) + + +def call_info_no_args(call_node: ast.Call) -> str: + """Returns a string of the portion of the function call that's before the parenthesized arg list.""" + return ast.unparse(call_node).split("(")[0] + + +def recursive_call(node: ast.Name | ast.Attribute) -> Optional[bool]: + """Returns a boolean if it can be determined the node is part of a recursive call. + Otherwise if the function is nested in a complicated way, `None` is returned.""" + if not isinstance((call_node := parent(node)), ast.Call) or not ( + func := ancestor(node, (ast.FunctionDef, ast.AsyncFunctionDef), 10) + ): + return False + if isinstance((func_parent := parent(func)), ast.Module): + return call_info_no_args(call_node) == func.name + if not isinstance(func_parent, ast.ClassDef) or not isinstance( + parent(func_parent), ast.Module + ): + return None + return call_info_no_args(call_node).split(".") == [ + ( + "self" + if "self" == next((x.arg for x in func.args.args), None) + else func_parent.name + ), + func.name, + ] class LoggingList(list):