Skip to content

Commit

Permalink
Support imports using importlib.import_module (#782)
Browse files Browse the repository at this point in the history
* test: add coverage for dynamic import with importlib

* style: lint

* feat(WIP): attempt to get importlib imports

* test: add test coverage for both importlib formats

* feat: also handle case where importlib.import_module is aliased

* refactor: consolidate proof of concept test into existing test

* refactor(clippy): collapse nested `if let` statements in `Stmt::Expr` match arm

* docs: note which sorts of `importlib` import are supported

* test: check nested packages with import_module resolve to the top level package (e.g. `http.server` -> `http`)

* test: test coverage for aliased import (not being collected)

* fix: working import alias coverage

* feat: full import path so importlib aliases are acknowledged too

* docs: Clarify cases where `importlib.import_module` usage is/is not detected

* docs: docstring for `ImportVisitor` explaining what `import_module_names` is

* refactor: split long match statement arms into new functions with docstrings (`handle_import`, `handle_import_from`, `handle_expr`)

* style(consistency): Add language hint to code block in docs/usage.md

Co-authored-by: Mathieu Kniewallner <mathieu.kniewallner@gmail.com>

* docs: Link to importlib stdlib docs URL in docs/usage.md

Co-authored-by: Mathieu Kniewallner <mathieu.kniewallner@gmail.com>

* fix(consistency): Use full language hint on code block in docs/usage.md

Co-authored-by: Mathieu Kniewallner <mathieu.kniewallner@gmail.com>

* fix(typo): No capitalisation in sentence punctuated by code block in docs/usage.md

Co-authored-by: Mathieu Kniewallner <mathieu.kniewallner@gmail.com>

* fix(typo): wrong type names used in refactored function

---------

Co-authored-by: Mathieu Kniewallner <mathieu.kniewallner@gmail.com>
  • Loading branch information
lmmx and mkniewallner authored Jul 29, 2024
1 parent 355292c commit 722313a
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 32 deletions.
15 changes: 15 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,21 @@ if TYPE_CHECKING:
import mypy_boto3_s3
```

There is some support for imports created with [`importlib.import_module`](https://docs.python.org/3/library/importlib.html#importlib.import_module) that use a string literal:

```python
import importlib

importlib.import_module("foo") # package 'foo' imported
```

but not where the argument is provided dynamically from a variable, attribute, etc., e.g.:

```python
bar = "foo"
importlib.import_module(bar) # Not detected
```

## Excluding files and directories

To determine issues with imported modules and dependencies, _deptry_ will scan the working directory and its subdirectories recursively for `.py` and `.ipynb` files, so it can
Expand Down
131 changes: 111 additions & 20 deletions src/visitor.rs
Original file line number Diff line number Diff line change
@@ -1,47 +1,138 @@
use ruff_python_ast::visitor::{walk_stmt, Visitor};
use ruff_python_ast::{self, Expr, ExprAttribute, ExprName, Stmt, StmtIf};
use ruff_python_ast::{
self, Expr, ExprAttribute, ExprName, Stmt, StmtExpr, StmtIf, StmtImport, StmtImportFrom,
};
use ruff_text_size::TextRange;
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};

#[derive(Debug, Clone)]
pub struct ImportVisitor {
imports: HashMap<String, Vec<TextRange>>,
import_module_names: HashSet<String>,
}

/// Visitor for tracking Python imports in AST.
///
/// `imports`: Maps top-level module names to their import locations.
///
/// `import_module_names`: Tracks import names (including aliases) that
/// refer to the `importlib.import_module` function itself, i.e. the import
/// alias for `importlib` package and/or the `importlib.import_module` function.
impl ImportVisitor {
pub fn new() -> Self {
Self {
imports: HashMap::new(),
import_module_names: HashSet::new(),
}
}

pub fn get_imports(self) -> HashMap<String, Vec<TextRange>> {
self.imports
}
}

impl<'a> Visitor<'a> for ImportVisitor {
fn visit_stmt(&mut self, stmt: &'a Stmt) {
match stmt {
Stmt::Import(import_stmt) => {
for alias in &import_stmt.names {
let top_level_module = get_top_level_module_name(&alias.name);
self.imports
.entry(top_level_module)
.or_default()
.push(alias.range);
/// Handles regular import statements (e.g., `import foo` or `import foo as bar`).
///
/// Processes each name in the import statement (dealiasing if needed), and adds the
/// top-level module to the `imports` map.
///
/// Imports of the `importlib` package are handled as a special case: its name (or alias),
/// suffixed with ".import_module", is stored in `import_module_names`. This is done to
/// indicate how we should look out for dynamic imports in the code that follows.
fn handle_import(&mut self, import_stmt: &StmtImport) {
for alias in &import_stmt.names {
let top_level_module = get_top_level_module_name(alias.name.as_str());
self.imports
.entry(top_level_module)
.or_default()
.push(alias.range);

if alias.name.as_str() == "importlib" {
let name = alias
.asname
.as_ref()
.map(|id| id.as_str())
.unwrap_or("importlib");
self.import_module_names
.insert(format!("{}.import_module", name));
}
}
}

/// Handles "from" import statements (e.g., `from foo import bar` or `from foo import bar as baz`).
///
/// Processes the module which the names are being imported from, adds it to the `imports` map
/// if it's a top-level import.
///
/// Imports of `import_module` from the `importlib` package are handled as a special case: its
/// name (or alias), prefixed with "importlib.", is stored in `import_module_names`. This is done to
/// indicate how we should look out for dynamic imports in the code that follows.
fn handle_import_from(&mut self, import_from_stmt: &StmtImportFrom) {
if let Some(module) = &import_from_stmt.module {
if import_from_stmt.level == 0 {
let module_name = module.as_str();
self.imports
.entry(get_top_level_module_name(module_name))
.or_default()
.push(import_from_stmt.range);

if module_name == "importlib" {
for alias in &import_from_stmt.names {
if alias.name.as_str() == "import_module" {
let name = alias
.asname
.as_ref()
.map(|id| id.as_str())
.unwrap_or("import_module");
self.import_module_names.insert(name.to_string());
}
}
}
}
Stmt::ImportFrom(import_from_stmt) => {
if let Some(module) = &import_from_stmt.module {
if import_from_stmt.level == 0 {
self.imports
.entry(get_top_level_module_name(module.as_str()))
.or_default()
.push(import_from_stmt.range);
}
}

/// Handles expression statements, looking for dynamic imports using `importlib.import_module`.
///
/// This function checks if the expression is a call to a function that might be `importlib.import_module`
/// (which could be import aliased at either the package or function name). If so, processes the imported
/// module name (which must be given as a string literal, not a variable) and adds it to the `imports` map.
fn handle_expr(&mut self, expr_stmt: &StmtExpr) {
// Check for dynamic imports using `importlib.import_module`
if let Expr::Call(call_expr) = expr_stmt.value.as_ref() {
let is_import_module = match call_expr.func.as_ref() {
Expr::Attribute(attr_expr) => {
if let Expr::Name(name) = attr_expr.value.as_ref() {
self.import_module_names
.contains(&format!("{}.import_module", name.id.as_str()))
} else {
false
}
}
Expr::Name(name) => self.import_module_names.contains(name.id.as_str()),
_ => false,
};

if is_import_module {
if let Some(Expr::StringLiteral(string_literal)) = call_expr.arguments.args.first()
{
let top_level_module =
get_top_level_module_name(&string_literal.value.to_string());
self.imports
.entry(top_level_module)
.or_default()
.push(expr_stmt.range);
}
}
}
}
}

impl<'a> Visitor<'a> for ImportVisitor {
fn visit_stmt(&mut self, stmt: &'a Stmt) {
match stmt {
Stmt::Import(import_stmt) => self.handle_import(import_stmt),
Stmt::ImportFrom(import_from_stmt) => self.handle_import_from(import_from_stmt),
Stmt::Expr(expr_stmt) => self.handle_expr(expr_stmt),
Stmt::If(if_stmt) if is_guarded_by_type_checking(if_stmt) => {
// Avoid parsing imports that are only evaluated by type checkers.
}
Expand Down
10 changes: 10 additions & 0 deletions tests/data/some_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
from os import chdir, walk
from pathlib import Path
from typing import List, TYPE_CHECKING
from importlib import import_module
from importlib import import_module as im
import importlib
import importlib as il

import numpy as np
import pandas
Expand Down Expand Up @@ -40,3 +44,9 @@ def func():
class MyClass:
def __init__(self):
import module_in_class

import_module("patito")
importlib.import_module("polars")
im("uvicorn")
import_module("http.server")
il.import_module("xml.etree.ElementTree")
35 changes: 23 additions & 12 deletions tests/unit/imports/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,37 @@ def test_import_parser_py() -> None:
some_imports_path = Path("tests/data/some_imports.py")

assert get_imported_modules_from_list_of_files([some_imports_path]) == {
"barfoo": [Location(some_imports_path, 21, 8)],
"baz": [Location(some_imports_path, 17, 5)],
"click": [Location(some_imports_path, 31, 12)],
"foobar": [Location(some_imports_path, 19, 12)],
"httpx": [Location(some_imports_path, 15, 12)],
"module_in_class": [Location(some_imports_path, 42, 16)],
"module_in_func": [Location(some_imports_path, 37, 12)],
"not_click": [Location(some_imports_path, 33, 12)],
"barfoo": [Location(some_imports_path, 25, 8)],
"baz": [Location(some_imports_path, 21, 5)],
"click": [Location(some_imports_path, 35, 12)],
"foobar": [Location(some_imports_path, 23, 12)],
"http": [Location(some_imports_path, line=51, column=1)],
"httpx": [Location(some_imports_path, 19, 12)],
"importlib": [
Location(some_imports_path, line=5, column=1),
Location(some_imports_path, line=6, column=1),
Location(some_imports_path, line=7, column=8),
Location(some_imports_path, line=8, column=8),
],
"module_in_class": [Location(some_imports_path, 46, 16)],
"module_in_func": [Location(some_imports_path, 41, 12)],
"not_click": [Location(some_imports_path, 37, 12)],
"numpy": [
Location(some_imports_path, 6, 8),
Location(some_imports_path, 8, 1),
Location(some_imports_path, 10, 8),
Location(some_imports_path, 12, 1),
],
"os": [Location(some_imports_path, 2, 1)],
"pandas": [Location(some_imports_path, 7, 8)],
"pandas": [Location(some_imports_path, 11, 8)],
"pathlib": [Location(some_imports_path, 3, 1)],
"randomizer": [Location(some_imports_path, 22, 1)],
"patito": [Location(some_imports_path, line=48, column=1)],
"polars": [Location(some_imports_path, line=49, column=1)],
"randomizer": [Location(some_imports_path, 26, 1)],
"typing": [
Location(some_imports_path, 1, 8),
Location(some_imports_path, 4, 1),
],
"uvicorn": [Location(some_imports_path, line=50, column=1)],
"xml": [Location(some_imports_path, line=52, column=1)],
}


Expand Down

0 comments on commit 722313a

Please sign in to comment.