Skip to content

Commit

Permalink
Implement import extraction for notebooks in Rust (#606)
Browse files Browse the repository at this point in the history
* Move the functionality for extraction of imports from `ipynb` files to Rust.
* Introduced the use of type aliases
* Removed the functions `get_imports_from_py_file` since it was not used.
  • Loading branch information
fpgmaas authored Mar 18, 2024
1 parent 6c73675 commit b3b1e8e
Show file tree
Hide file tree
Showing 16 changed files with 276 additions and 234 deletions.
38 changes: 38 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ rayon = "1.9.0"
regex = "1.10.3"
rustpython-ast = { version = "0.3.0", features = ["visitor"] }
rustpython-parser = "0.3.0"
serde_json = "1.0.114"

[profile.release]
lto = true
Expand Down
13 changes: 1 addition & 12 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ classifiers = [
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = [
"chardet>=4.0.0",
"click>=8.0.0,<9",
"pathspec>=0.9.0",
"colorama>=0.4.6; sys_platform == 'win32'",
Expand Down
21 changes: 6 additions & 15 deletions python/deptry/imports/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
from collections import defaultdict
from typing import TYPE_CHECKING

from deptry.imports.extractors import NotebookImportExtractor
from deptry.rust import get_imports_from_py_files
from deptry.rust import get_imports_from_ipynb_files, get_imports_from_py_files

if TYPE_CHECKING:
from pathlib import Path
Expand All @@ -19,7 +18,7 @@ def get_imported_modules_from_list_of_files(list_of_files: list[Path]) -> dict[s
logging.info("Scanning %d %s...", len(list_of_files), "files" if len(list_of_files) > 1 else "file")

py_files = [str(file) for file in list_of_files if file.suffix == ".py"]
ipynb_files = [file for file in list_of_files if file.suffix == ".ipynb"]
ipynb_files = [str(file) for file in list_of_files if file.suffix == ".ipynb"]

modules: dict[str, list[Location]] = defaultdict(list)

Expand All @@ -29,25 +28,17 @@ def get_imported_modules_from_list_of_files(list_of_files: list[Path]) -> dict[s
for module, locations in convert_rust_locations_to_python_locations(rust_result).items():
modules[module].extend(locations)

# Process each .ipynb file individually
for file in ipynb_files:
for module, locations in get_imported_modules_from_ipynb_file(file).items():
# Process all .ipynb files in parallel using Rust
if ipynb_files:
rust_result = get_imports_from_ipynb_files(ipynb_files)
for module, locations in convert_rust_locations_to_python_locations(rust_result).items():
modules[module].extend(locations)

logging.debug("All imported modules: %s\n", modules)

return modules


def get_imported_modules_from_ipynb_file(path_to_file: Path) -> dict[str, list[Location]]:
logging.debug("Scanning %s...", path_to_file)

modules = NotebookImportExtractor(path_to_file).extract_imports()

logging.debug("Found the following imports in %s: %s", path_to_file, modules)
return modules


def convert_rust_locations_to_python_locations(
imported_modules: dict[str, list[RustLocation]],
) -> dict[str, list[Location]]:
Expand Down
5 changes: 0 additions & 5 deletions python/deptry/imports/extractors/__init__.py

This file was deleted.

55 changes: 0 additions & 55 deletions python/deptry/imports/extractors/base.py

This file was deleted.

58 changes: 0 additions & 58 deletions python/deptry/imports/extractors/notebook_import_extractor.py

This file was deleted.

2 changes: 1 addition & 1 deletion python/deptry/rust.pyi
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .rust import Location as RustLocation

def get_imports_from_py_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ...
def get_imports_from_py_file(file_path: str) -> dict[str, list[RustLocation]]: ...
def get_imports_from_ipynb_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ...

class Location:
file: str
Expand Down
73 changes: 73 additions & 0 deletions src/imports/ipynb.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use crate::file_utils;
use crate::location;

use file_utils::read_file;
use location::Location;
use pyo3::exceptions::PySyntaxError;
use pyo3::prelude::*;
use pyo3::types::PyString;
use rayon::prelude::*;
use std::collections::HashMap;

use super::shared;

/// Processes multiple Python files in parallel to extract import statements and their locations.
/// Accepts a list of file paths and returns a dictionary mapping module names to their import locations.
#[pyfunction]
pub fn get_imports_from_ipynb_files(py: Python, file_paths: Vec<&PyString>) -> PyResult<PyObject> {
let rust_file_paths: Vec<String> = file_paths
.iter()
.map(|py_str| py_str.to_str().unwrap().to_owned())
.collect();

let results: Vec<_> = rust_file_paths
.par_iter()
.map(|path_str| {
let result = _get_imports_from_ipynb_file(path_str);
shared::ThreadResult {
file: path_str.to_string(),
result,
}
})
.collect();

let (all_imports, errors) = shared::merge_results_from_threads(results);
shared::log_python_errors_as_warnings(&errors);

shared::convert_to_python_dict(py, all_imports)
}

/// Core helper function that extracts import statements and their locations from a single .ipynb file.
/// Ensures robust error handling and provides clearer, more detailed comments.
fn _get_imports_from_ipynb_file(path_str: &str) -> PyResult<HashMap<String, Vec<Location>>> {
let file_content = read_file(path_str)?;
let notebook: serde_json::Value =
serde_json::from_str(&file_content).map_err(|e| PySyntaxError::new_err(e.to_string()))?;
let cells = notebook["cells"]
.as_array()
.ok_or_else(|| PySyntaxError::new_err("Expected 'cells' to be an array"))?;
let python_code = _extract_code_from_notebook_cells(cells);

let ast = shared::get_ast_from_file_content(&python_code, path_str)?;
let imported_modules = shared::extract_imports_from_ast(ast);

Ok(shared::convert_imports_with_textranges_to_location_objects(
imported_modules,
path_str,
&python_code,
))
}

/// Extracts and concatenates code from notebook code cells.
fn _extract_code_from_notebook_cells(cells: &[serde_json::Value]) -> String {
let code_lines: Vec<String> = cells
.iter()
.filter(|cell| cell["cell_type"] == "code")
.flat_map(|cell| cell["source"].as_array())
.flatten()
.filter_map(|line| line.as_str())
.map(str::to_owned)
.collect();

code_lines.join("\n")
}
3 changes: 3 additions & 0 deletions src/imports/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pub mod ipynb;
pub mod py;
pub mod shared;
Loading

0 comments on commit b3b1e8e

Please sign in to comment.