Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement import extraction for notebooks in Rust #606

Merged
merged 13 commits into from
Mar 18, 2024
38 changes: 38 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ rayon = "1.9.0"
regex = "1.10.3"
rustpython-ast = { version = "0.3.0", features = ["visitor"] }
rustpython-parser = "0.3.0"
serde_json = "1.0.114"

[profile.release]
lto = true
Expand Down
13 changes: 1 addition & 12 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ classifiers = [
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = [
"chardet>=4.0.0",
"click>=8.0.0,<9",
"pathspec>=0.9.0",
"colorama>=0.4.6; sys_platform == 'win32'",
Expand Down
21 changes: 6 additions & 15 deletions python/deptry/imports/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
from collections import defaultdict
from typing import TYPE_CHECKING

from deptry.imports.extractors import NotebookImportExtractor
from deptry.rust import get_imports_from_py_files
from deptry.rust import get_imports_from_ipynb_files, get_imports_from_py_files

if TYPE_CHECKING:
from pathlib import Path
Expand All @@ -19,7 +18,7 @@ def get_imported_modules_from_list_of_files(list_of_files: list[Path]) -> dict[s
logging.info("Scanning %d %s...", len(list_of_files), "files" if len(list_of_files) > 1 else "file")

py_files = [str(file) for file in list_of_files if file.suffix == ".py"]
ipynb_files = [file for file in list_of_files if file.suffix == ".ipynb"]
ipynb_files = [str(file) for file in list_of_files if file.suffix == ".ipynb"]

modules: dict[str, list[Location]] = defaultdict(list)

Expand All @@ -29,25 +28,17 @@ def get_imported_modules_from_list_of_files(list_of_files: list[Path]) -> dict[s
for module, locations in convert_rust_locations_to_python_locations(rust_result).items():
modules[module].extend(locations)

# Process each .ipynb file individually
for file in ipynb_files:
for module, locations in get_imported_modules_from_ipynb_file(file).items():
# Process all .ipynb files in parallel using Rust
if ipynb_files:
rust_result = get_imports_from_ipynb_files(ipynb_files)
for module, locations in convert_rust_locations_to_python_locations(rust_result).items():
modules[module].extend(locations)

logging.debug("All imported modules: %s\n", modules)

return modules


def get_imported_modules_from_ipynb_file(path_to_file: Path) -> dict[str, list[Location]]:
logging.debug("Scanning %s...", path_to_file)

modules = NotebookImportExtractor(path_to_file).extract_imports()

logging.debug("Found the following imports in %s: %s", path_to_file, modules)
return modules


def convert_rust_locations_to_python_locations(
imported_modules: dict[str, list[RustLocation]],
) -> dict[str, list[Location]]:
Expand Down
5 changes: 0 additions & 5 deletions python/deptry/imports/extractors/__init__.py

This file was deleted.

55 changes: 0 additions & 55 deletions python/deptry/imports/extractors/base.py

This file was deleted.

58 changes: 0 additions & 58 deletions python/deptry/imports/extractors/notebook_import_extractor.py

This file was deleted.

2 changes: 1 addition & 1 deletion python/deptry/rust.pyi
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .rust import Location as RustLocation

def get_imports_from_py_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ...
def get_imports_from_py_file(file_path: str) -> dict[str, list[RustLocation]]: ...
def get_imports_from_ipynb_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ...

class Location:
file: str
Expand Down
73 changes: 73 additions & 0 deletions src/imports/ipynb.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use crate::file_utils;
use crate::location;

use file_utils::read_file;
use location::Location;
use pyo3::exceptions::PySyntaxError;
use pyo3::prelude::*;
use pyo3::types::PyString;
use rayon::prelude::*;
use std::collections::HashMap;

use super::shared;

/// Processes multiple Python files in parallel to extract import statements and their locations.
/// Accepts a list of file paths and returns a dictionary mapping module names to their import locations.
#[pyfunction]
pub fn get_imports_from_ipynb_files(py: Python, file_paths: Vec<&PyString>) -> PyResult<PyObject> {
let rust_file_paths: Vec<String> = file_paths
.iter()
.map(|py_str| py_str.to_str().unwrap().to_owned())
.collect();

let results: Vec<_> = rust_file_paths
.par_iter()
.map(|path_str| {
let result = _get_imports_from_ipynb_file(path_str);
shared::ThreadResult {
file: path_str.to_string(),
result,
}
})
.collect();

let (all_imports, errors) = shared::merge_results_from_threads(results);
shared::log_python_errors_as_warnings(&errors);

shared::convert_to_python_dict(py, all_imports)
}

/// Core helper function that extracts import statements and their locations from a single .ipynb file.
/// Ensures robust error handling and provides clearer, more detailed comments.
fn _get_imports_from_ipynb_file(path_str: &str) -> PyResult<HashMap<String, Vec<Location>>> {
let file_content = read_file(path_str)?;
let notebook: serde_json::Value =
serde_json::from_str(&file_content).map_err(|e| PySyntaxError::new_err(e.to_string()))?;
let cells = notebook["cells"]
.as_array()
.ok_or_else(|| PySyntaxError::new_err("Expected 'cells' to be an array"))?;
let python_code = _extract_code_from_notebook_cells(cells);

let ast = shared::get_ast_from_file_content(&python_code, path_str)?;
let imported_modules = shared::extract_imports_from_ast(ast);

Ok(shared::convert_imports_with_textranges_to_location_objects(
imported_modules,
path_str,
&python_code,
))
}

/// Extracts and concatenates code from notebook code cells.
fn _extract_code_from_notebook_cells(cells: &[serde_json::Value]) -> String {
let code_lines: Vec<String> = cells
.iter()
.filter(|cell| cell["cell_type"] == "code")
.flat_map(|cell| cell["source"].as_array())
.flatten()
.filter_map(|line| line.as_str())
.map(str::to_owned)
.collect();

code_lines.join("\n")
}
3 changes: 3 additions & 0 deletions src/imports/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pub mod ipynb;
pub mod py;
pub mod shared;
Loading