From b6bffda11c284bdd43fca02ca1124dd702673c57 Mon Sep 17 00:00:00 2001 From: Florian Maas Date: Sat, 16 Mar 2024 15:09:34 +0100 Subject: [PATCH 1/9] first draft of notebook extractor in rust --- Cargo.lock | 38 +++++++ Cargo.toml | 1 + python/deptry/imports/extract.py | 21 +--- python/deptry/imports/extractors/__init__.py | 5 - python/deptry/imports/extractors/base.py | 55 --------- .../extractors/notebook_import_extractor.py | 58 ---------- python/deptry/rust.pyi | 2 + src/imports/ipynb.rs | 107 ++++++++++++++++++ src/imports/mod.rs | 3 + src/imports/py.rs | 79 +++++++++++++ src/{imports.rs => imports/shared.rs} | 75 +----------- src/lib.rs | 12 +- tests/unit/imports/test_extract.py | 10 +- 13 files changed, 255 insertions(+), 211 deletions(-) delete mode 100644 python/deptry/imports/extractors/__init__.py delete mode 100644 python/deptry/imports/extractors/base.py delete mode 100644 python/deptry/imports/extractors/notebook_import_extractor.py create mode 100644 src/imports/ipynb.rs create mode 100644 src/imports/mod.rs create mode 100644 src/imports/py.rs rename src/{imports.rs => imports/shared.rs} (51%) diff --git a/Cargo.lock b/Cargo.lock index cf366939..efcac224 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -139,6 +139,7 @@ dependencies = [ "regex", "rustpython-ast", "rustpython-parser", + "serde_json", ] [[package]] @@ -254,6 +255,12 @@ dependencies = [ "either", ] +[[package]] +name = "itoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" + [[package]] name = "keccak" version = "0.1.5" @@ -781,6 +788,37 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" +[[package]] +name = "serde" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.52", +] + +[[package]] +name = "serde_json" +version = "1.0.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" +dependencies = [ + "itoa", + "ryu", + "serde", +] + [[package]] name = "sha3" version = "0.9.1" diff --git a/Cargo.toml b/Cargo.toml index 07afc45f..44d02e46 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,3 +18,4 @@ rayon = "1.9.0" regex = "1.10.3" rustpython-ast = { version = "0.3.0", features = ["visitor"] } rustpython-parser = "0.3.0" +serde_json = "1.0.114" diff --git a/python/deptry/imports/extract.py b/python/deptry/imports/extract.py index 36701dbd..293630f6 100644 --- a/python/deptry/imports/extract.py +++ b/python/deptry/imports/extract.py @@ -4,8 +4,7 @@ from collections import defaultdict from typing import TYPE_CHECKING -from deptry.imports.extractors import NotebookImportExtractor -from deptry.rust import get_imports_from_py_files +from deptry.rust import get_imports_from_ipynb_files, get_imports_from_py_files if TYPE_CHECKING: from pathlib import Path @@ -19,7 +18,7 @@ def get_imported_modules_from_list_of_files(list_of_files: list[Path]) -> dict[s logging.info("Scanning %d %s...", len(list_of_files), "files" if len(list_of_files) > 1 else "file") py_files = [str(file) for file in list_of_files if file.suffix == ".py"] - ipynb_files = [file for file in list_of_files if file.suffix == ".ipynb"] + ipynb_files = [str(file) for file in list_of_files if file.suffix == ".ipynb"] modules: dict[str, list[Location]] = defaultdict(list) @@ -29,9 +28,10 @@ def get_imported_modules_from_list_of_files(list_of_files: list[Path]) -> dict[s for module, locations in convert_rust_locations_to_python_locations(rust_result).items(): modules[module].extend(locations) - # Process each .ipynb file individually - for file in ipynb_files: - for module, locations in get_imported_modules_from_ipynb_file(file).items(): + # Process all .ipynb files in parallel using Rust + if ipynb_files: + rust_result = get_imports_from_ipynb_files(ipynb_files) + for module, locations in convert_rust_locations_to_python_locations(rust_result).items(): modules[module].extend(locations) logging.debug("All imported modules: %s\n", modules) @@ -39,15 +39,6 @@ def get_imported_modules_from_list_of_files(list_of_files: list[Path]) -> dict[s return modules -def get_imported_modules_from_ipynb_file(path_to_file: Path) -> dict[str, list[Location]]: - logging.debug("Scanning %s...", path_to_file) - - modules = NotebookImportExtractor(path_to_file).extract_imports() - - logging.debug("Found the following imports in %s: %s", path_to_file, modules) - return modules - - def convert_rust_locations_to_python_locations( imported_modules: dict[str, list[RustLocation]], ) -> dict[str, list[Location]]: diff --git a/python/deptry/imports/extractors/__init__.py b/python/deptry/imports/extractors/__init__.py deleted file mode 100644 index ba141ce1..00000000 --- a/python/deptry/imports/extractors/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from __future__ import annotations - -from deptry.imports.extractors.notebook_import_extractor import NotebookImportExtractor - -__all__ = ("NotebookImportExtractor",) diff --git a/python/deptry/imports/extractors/base.py b/python/deptry/imports/extractors/base.py deleted file mode 100644 index 7df4bdc9..00000000 --- a/python/deptry/imports/extractors/base.py +++ /dev/null @@ -1,55 +0,0 @@ -from __future__ import annotations - -import ast -from abc import ABC, abstractmethod -from collections import defaultdict -from dataclasses import dataclass -from typing import TYPE_CHECKING - -import chardet - -from deptry.imports.location import Location - -if TYPE_CHECKING: - from pathlib import Path - - -@dataclass -class ImportExtractor(ABC): - """ - Base class for other classes that can be used to extract the imported modules from a file. - """ - - file: Path - - @abstractmethod - def extract_imports(self) -> dict[str, list[Location]]: - raise NotImplementedError() - - def _extract_imports_from_ast(self, tree: ast.AST) -> dict[str, list[Location]]: - """ - Given an Abstract Syntax Tree, find the imported top-level modules. - For example, given the source tree of a file with contents: - - from pandas.tools import scatter_matrix - - Will return the set {"pandas"}. - """ - - imported_modules: dict[str, list[Location]] = defaultdict(list) - - for node in ast.walk(tree): - if isinstance(node, ast.Import): - for module in node.names: - imported_modules[module.name.split(".")[0]].append( - Location(self.file, node.lineno, node.col_offset) - ) - elif isinstance(node, ast.ImportFrom) and node.module and node.level == 0: - imported_modules[node.module.split(".")[0]].append(Location(self.file, node.lineno, node.col_offset)) - - return imported_modules - - @staticmethod - def _get_file_encoding(file: Path) -> str: - with file.open("rb") as f: - return chardet.detect(f.read())["encoding"] diff --git a/python/deptry/imports/extractors/notebook_import_extractor.py b/python/deptry/imports/extractors/notebook_import_extractor.py deleted file mode 100644 index 18b307b7..00000000 --- a/python/deptry/imports/extractors/notebook_import_extractor.py +++ /dev/null @@ -1,58 +0,0 @@ -from __future__ import annotations - -import ast -import itertools -import json -import logging -import re -from dataclasses import dataclass -from typing import TYPE_CHECKING, Any - -from deptry.imports.extractors.base import ImportExtractor - -if TYPE_CHECKING: - from pathlib import Path - - from deptry.imports.location import Location - - -@dataclass -class NotebookImportExtractor(ImportExtractor): - """Extract import statements from a Jupyter notebook.""" - - def extract_imports(self) -> dict[str, list[Location]]: - """Extract the imported top-level modules from all code cells in the Jupyter Notebook.""" - notebook = self._read_ipynb_file(self.file) - if not notebook: - return {} - - cells = self._keep_code_cells(notebook) - import_statements = [self._extract_import_statements_from_cell(cell) for cell in cells] - tree = ast.parse("\n".join(itertools.chain.from_iterable(import_statements)), str(self.file)) - return self._extract_imports_from_ast(tree) - - @classmethod - def _read_ipynb_file(cls, path_to_ipynb: Path) -> dict[str, Any] | None: - try: - with path_to_ipynb.open() as ipynb_file: - notebook: dict[str, Any] = json.load(ipynb_file) - except ValueError: - try: - with path_to_ipynb.open(encoding=cls._get_file_encoding(path_to_ipynb)) as ipynb_file: - notebook = json.load(ipynb_file, strict=False) - except UnicodeDecodeError: - logging.warning("Warning: File %s could not be decoded. Skipping...", path_to_ipynb) - return None - return notebook - - @staticmethod - def _keep_code_cells(notebook: dict[str, Any]) -> list[dict[str, Any]]: - return [cell for cell in notebook["cells"] if cell["cell_type"] == "code"] - - @staticmethod - def _contains_import_statements(line: str) -> bool: - return re.search(r"^(?:from\s+(\w+)(?:\.\w+)?\s+)?import\s+([^\s,.]+)(?:\.\w+)?", line) is not None - - @classmethod - def _extract_import_statements_from_cell(cls, cell: dict[str, Any]) -> list[str]: - return [line for line in cell["source"] if cls._contains_import_statements(line)] diff --git a/python/deptry/rust.pyi b/python/deptry/rust.pyi index f4344df9..9cad70fa 100644 --- a/python/deptry/rust.pyi +++ b/python/deptry/rust.pyi @@ -1,7 +1,9 @@ from .rust import Location as RustLocation def get_imports_from_py_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ... +def get_imports_from_ipynb_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ... def get_imports_from_py_file(file_path: str) -> dict[str, list[RustLocation]]: ... +def get_imports_from_ipynb_file(file_path: str) -> dict[str, list[RustLocation]]: ... class Location: file: str diff --git a/src/imports/ipynb.rs b/src/imports/ipynb.rs new file mode 100644 index 00000000..00bbd5af --- /dev/null +++ b/src/imports/ipynb.rs @@ -0,0 +1,107 @@ +use crate::file_utils; +use crate::location; + +use file_utils::read_file; +use location::Location; +use pyo3::exceptions::PySyntaxError; +use pyo3::prelude::*; +use pyo3::types::PyString; +use rayon::prelude::*; +use regex::Regex; +use std::collections::HashMap; + +use super::shared::{ + convert_imports_with_textranges_to_location_objects, convert_to_python_dict, + extract_imports_from_ast, get_ast_from_file_content, +}; + +/// Processes multiple Python files in parallel to extract import statements and their locations. +/// Accepts a list of file paths and returns a dictionary mapping module names to their import locations. +#[pyfunction] +pub fn get_imports_from_ipynb_files(py: Python, file_paths: Vec<&PyString>) -> PyResult { + let rust_file_paths: Vec = file_paths + .iter() + .map(|py_str| py_str.to_str().unwrap().to_owned()) + .collect(); + + // Process each file in parallel and collect results + let results: PyResult> = rust_file_paths + .par_iter() + .map(|path_str| _get_imports_from_ipynb_file(path_str)) + .collect(); + + let results = results?; + + // Merge results from each thread + let mut all_imports = HashMap::new(); + for file_result in results { + for (module, locations) in file_result { + all_imports + .entry(module) + .or_insert_with(Vec::new) + .extend(locations); + } + } + + convert_to_python_dict(py, all_imports) +} + +/// Processes a single Python file to extract import statements and their locations. +/// Accepts a single file path and returns a dictionary mapping module names to their import locations. +#[pyfunction] +pub fn get_imports_from_ipynb_file(py: Python, file_path: &PyString) -> PyResult { + let path_str = file_path.to_str()?; + let result = _get_imports_from_ipynb_file(path_str)?; + + convert_to_python_dict(py, result) +} + +/// Core helper function that extracts import statements and their locations from the content of a single Python file. +/// Used internally by both parallel and single file processing functions. +fn _get_imports_from_ipynb_file(path_str: &str) -> PyResult>> { + let file_content = match read_file(path_str) { + Ok(content) => content, + Err(_) => { + log::warn!("Warning: File {} could not be read. Skipping...", path_str); + return Ok(HashMap::new()); + } + }; + + let notebook: serde_json::Value = match serde_json::from_str(&file_content) { + Ok(content) => content, + Err(_) => { + log::warn!("Warning: File {} is not valid JSON. Skipping...", path_str); + return Ok(HashMap::new()); + } + }; + + let cells = notebook["cells"].as_array().ok_or_else(|| { + PySyntaxError::new_err("Invalid notebook structure: 'cells' is not an array") + })?; + + let import_regex = + Regex::new(r"^(?:from\s+(\w+)(?:\.\w+)?\s+)?import\s+([^\s,.]+)(?:\.\w+)?").unwrap(); + + let import_statements: Vec = cells + .iter() + .filter(|cell| cell["cell_type"] == "code") + .flat_map(|cell| cell["source"].as_array()) + .flatten() + .filter_map(|line| line.as_str()) + .filter(|line| import_regex.is_match(line)) + .map(|line| line.to_string()) + .collect(); + + let imports_script = import_statements.join("\n"); + + let ast = get_ast_from_file_content(&imports_script, path_str) + .map_err(|e| PySyntaxError::new_err(format!("Error parsing file {}: {}", path_str, e)))?; + + let imported_modules = extract_imports_from_ast(ast); + + Ok(convert_imports_with_textranges_to_location_objects( + imported_modules, + path_str, + &file_content, + )) +} diff --git a/src/imports/mod.rs b/src/imports/mod.rs new file mode 100644 index 00000000..07dca57d --- /dev/null +++ b/src/imports/mod.rs @@ -0,0 +1,3 @@ +pub mod ipynb; +pub mod py; +pub mod shared; diff --git a/src/imports/py.rs b/src/imports/py.rs new file mode 100644 index 00000000..f3fb740c --- /dev/null +++ b/src/imports/py.rs @@ -0,0 +1,79 @@ +use crate::file_utils; +use crate::location; + +use file_utils::read_file; +use location::Location; +use pyo3::exceptions::PySyntaxError; +use pyo3::prelude::*; +use pyo3::types::PyString; +use rayon::prelude::*; +use std::collections::HashMap; + +use super::shared::{ + convert_imports_with_textranges_to_location_objects, convert_to_python_dict, + extract_imports_from_ast, get_ast_from_file_content, +}; + +/// Processes multiple Python files in parallel to extract import statements and their locations. +/// Accepts a list of file paths and returns a dictionary mapping module names to their import locations. +#[pyfunction] +pub fn get_imports_from_py_files(py: Python, file_paths: Vec<&PyString>) -> PyResult { + let rust_file_paths: Vec = file_paths + .iter() + .map(|py_str| py_str.to_str().unwrap().to_owned()) + .collect(); + + // Process each file in parallel and collect results + let results: PyResult> = rust_file_paths + .par_iter() + .map(|path_str| _get_imports_from_py_file(path_str)) + .collect(); + + let results = results?; + + // Merge results from each thread + let mut all_imports = HashMap::new(); + for file_result in results { + for (module, locations) in file_result { + all_imports + .entry(module) + .or_insert_with(Vec::new) + .extend(locations); + } + } + + convert_to_python_dict(py, all_imports) +} + +/// Processes a single Python file to extract import statements and their locations. +/// Accepts a single file path and returns a dictionary mapping module names to their import locations. +#[pyfunction] +pub fn get_imports_from_py_file(py: Python, file_path: &PyString) -> PyResult { + let path_str = file_path.to_str()?; + let result = _get_imports_from_py_file(path_str)?; + + convert_to_python_dict(py, result) +} + +/// Core helper function that extracts import statements and their locations from the content of a single Python file. +/// Used internally by both parallel and single file processing functions. +fn _get_imports_from_py_file(path_str: &str) -> PyResult>> { + let file_content = match read_file(path_str) { + Ok(content) => content, + Err(_) => { + log::warn!("Warning: File {} could not be read. Skipping...", path_str); + return Ok(HashMap::new()); + } + }; + + let ast = get_ast_from_file_content(&file_content, path_str) + .map_err(|e| PySyntaxError::new_err(format!("Error parsing file {}: {}", path_str, e)))?; + + let imported_modules = extract_imports_from_ast(ast); + + Ok(convert_imports_with_textranges_to_location_objects( + imported_modules, + path_str, + &file_content, + )) +} diff --git a/src/imports.rs b/src/imports/shared.rs similarity index 51% rename from src/imports.rs rename to src/imports/shared.rs index 25db77c7..65cca9ae 100644 --- a/src/imports.rs +++ b/src/imports/shared.rs @@ -1,13 +1,10 @@ -use crate::file_utils; use crate::location; use crate::visitor; -use file_utils::read_file; use location::Location; use pyo3::exceptions::PySyntaxError; use pyo3::prelude::*; -use pyo3::types::{PyDict, PyList, PyString}; -use rayon::prelude::*; +use pyo3::types::{PyDict, PyList}; use rustpython_ast::Mod; use rustpython_ast::Visitor; use rustpython_parser::source_code::LineIndex; @@ -16,70 +13,6 @@ use rustpython_parser::{parse, Mode}; use std::collections::HashMap; use visitor::ImportVisitor; -/// Processes multiple Python files in parallel to extract import statements and their locations. -/// Accepts a list of file paths and returns a dictionary mapping module names to their import locations. -#[pyfunction] -pub fn get_imports_from_py_files(py: Python, file_paths: Vec<&PyString>) -> PyResult { - let rust_file_paths: Vec = file_paths - .iter() - .map(|py_str| py_str.to_str().unwrap().to_owned()) - .collect(); - - // Process each file in parallel and collect results - let results: PyResult> = rust_file_paths - .par_iter() - .map(|path_str| _get_imports_from_py_file(path_str)) - .collect(); - - let results = results?; - - // Merge results from each thread - let mut all_imports = HashMap::new(); - for file_result in results { - for (module, locations) in file_result { - all_imports - .entry(module) - .or_insert_with(Vec::new) - .extend(locations); - } - } - - convert_to_python_dict(py, all_imports) -} - -/// Processes a single Python file to extract import statements and their locations. -/// Accepts a single file path and returns a dictionary mapping module names to their import locations. -#[pyfunction] -pub fn get_imports_from_py_file(py: Python, file_path: &PyString) -> PyResult { - let path_str = file_path.to_str()?; - let result = _get_imports_from_py_file(path_str)?; - - convert_to_python_dict(py, result) -} - -/// Core helper function that extracts import statements and their locations from the content of a single Python file. -/// Used internally by both parallel and single file processing functions. -fn _get_imports_from_py_file(path_str: &str) -> PyResult>> { - let file_content = match read_file(path_str) { - Ok(content) => content, - Err(_) => { - log::warn!("Warning: File {} could not be read. Skipping...", path_str); - return Ok(HashMap::new()); - } - }; - - let ast = get_ast_from_file_content(&file_content, path_str) - .map_err(|e| PySyntaxError::new_err(format!("Error parsing file {}: {}", path_str, e)))?; - - let imported_modules = extract_imports_from_ast(ast); - - Ok(convert_imports_with_textranges_to_location_objects( - imported_modules, - path_str, - &file_content, - )) -} - /// Parses the content of a Python file into an abstract syntax tree (AST). pub fn get_ast_from_file_content(file_content: &str, file_path: &str) -> PyResult { parse(file_content, Mode::Module, file_path) @@ -88,7 +21,7 @@ pub fn get_ast_from_file_content(file_content: &str, file_path: &str) -> PyResul /// Iterates through an AST to identify and collect import statements, and returns them together with their /// respective TextRange for each occurrence. -fn extract_imports_from_ast(ast: Mod) -> HashMap> { +pub fn extract_imports_from_ast(ast: Mod) -> HashMap> { let mut visitor = ImportVisitor::new(); if let Mod::Module(module) = ast { @@ -102,7 +35,7 @@ fn extract_imports_from_ast(ast: Mod) -> HashMap> { /// Converts textual ranges of import statements into structured location objects. /// Facilitates the mapping of imports to detailed, file-specific location data (file, line, column). -fn convert_imports_with_textranges_to_location_objects( +pub fn convert_imports_with_textranges_to_location_objects( imports: HashMap>, file_path: &str, source_code: &str, @@ -132,7 +65,7 @@ fn convert_imports_with_textranges_to_location_objects( } /// Transforms a Rust HashMap containing import data into a Python dictionary suitable for Python-side consumption. -fn convert_to_python_dict( +pub fn convert_to_python_dict( py: Python<'_>, imports_with_locations: HashMap>, ) -> PyResult { diff --git a/src/lib.rs b/src/lib.rs index defb4cb6..ba37d250 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,8 +13,16 @@ use location::Location; fn rust(_py: Python, m: &PyModule) -> PyResult<()> { pyo3_log::init(); // Initialize logging to forward to Python's logger - m.add_function(wrap_pyfunction!(imports::get_imports_from_py_files, m)?)?; - m.add_function(wrap_pyfunction!(imports::get_imports_from_py_file, m)?)?; + m.add_function(wrap_pyfunction!(imports::py::get_imports_from_py_files, m)?)?; + m.add_function(wrap_pyfunction!(imports::py::get_imports_from_py_file, m)?)?; + m.add_function(wrap_pyfunction!( + imports::ipynb::get_imports_from_ipynb_files, + m + )?)?; + m.add_function(wrap_pyfunction!( + imports::ipynb::get_imports_from_ipynb_file, + m + )?)?; m.add_class::()?; Ok(()) } diff --git a/tests/unit/imports/test_extract.py b/tests/unit/imports/test_extract.py index 05dd79a5..2a6fdd3a 100644 --- a/tests/unit/imports/test_extract.py +++ b/tests/unit/imports/test_extract.py @@ -9,7 +9,7 @@ import pytest -from deptry.imports.extract import get_imported_modules_from_ipynb_file, get_imported_modules_from_list_of_files +from deptry.imports.extract import get_imported_modules_from_list_of_files from deptry.imports.location import Location from tests.utils import run_within_dir @@ -44,10 +44,10 @@ def test_import_parser_py() -> None: def test_import_parser_ipynb() -> None: notebook_path = Path("tests/data/example_project/src/notebook.ipynb") - assert get_imported_modules_from_ipynb_file(notebook_path) == { - "click": [Location(notebook_path, 1, 0)], - "toml": [Location(notebook_path, 5, 0)], - "urllib3": [Location(notebook_path, 3, 0)], + assert get_imported_modules_from_list_of_files([notebook_path]) == { + "click": [Location(notebook_path, 2, 6)], + "toml": [Location(notebook_path, 5, 9)], + "urllib3": [Location(notebook_path, 3, 1)], } From a7a04e40f4917fdc72be8c918745b2f2ad95bafe Mon Sep 17 00:00:00 2001 From: Florian Maas Date: Sat, 16 Mar 2024 15:47:34 +0100 Subject: [PATCH 2/9] improved modules related to import extraction in Rust --- src/imports/ipynb.rs | 52 ++++++++++++++++++------------ tests/unit/imports/test_extract.py | 4 +-- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/src/imports/ipynb.rs b/src/imports/ipynb.rs index 00bbd5af..1cea7c7c 100644 --- a/src/imports/ipynb.rs +++ b/src/imports/ipynb.rs @@ -15,7 +15,7 @@ use super::shared::{ extract_imports_from_ast, get_ast_from_file_content, }; -/// Processes multiple Python files in parallel to extract import statements and their locations. +/// Processes multiple .ipynb files in parallel to extract import statements and their locations. /// Accepts a list of file paths and returns a dictionary mapping module names to their import locations. #[pyfunction] pub fn get_imports_from_ipynb_files(py: Python, file_paths: Vec<&PyString>) -> PyResult { @@ -46,7 +46,7 @@ pub fn get_imports_from_ipynb_files(py: Python, file_paths: Vec<&PyString>) -> P convert_to_python_dict(py, all_imports) } -/// Processes a single Python file to extract import statements and their locations. +/// Processes a single .ipynb file to extract import statements and their locations. /// Accepts a single file path and returns a dictionary mapping module names to their import locations. #[pyfunction] pub fn get_imports_from_ipynb_file(py: Python, file_path: &PyString) -> PyResult { @@ -56,8 +56,24 @@ pub fn get_imports_from_ipynb_file(py: Python, file_path: &PyString) -> PyResult convert_to_python_dict(py, result) } -/// Core helper function that extracts import statements and their locations from the content of a single Python file. -/// Used internally by both parallel and single file processing functions. +fn _extract_import_statements_from_notebook_cells(cells: &[serde_json::Value]) -> String { + let import_regex = + Regex::new(r"^(?:from\s+(\w+)(?:\.\w+)?\s+)?import\s+([^\s,.]+)(?:\.\w+)?").unwrap(); + + let import_statements: Vec = cells + .iter() + .filter(|cell| cell["cell_type"] == "code") + .flat_map(|cell| cell["source"].as_array()) + .flatten() + .filter_map(|line| line.as_str()) + .filter(|line| import_regex.is_match(line)) + .map(|line| line.to_string()) + .collect(); + + import_statements.join("\n") +} + +/// Core helper function that extracts import statements and their locations from the content of a single .ipynb file. fn _get_imports_from_ipynb_file(path_str: &str) -> PyResult>> { let file_content = match read_file(path_str) { Ok(content) => content, @@ -75,24 +91,18 @@ fn _get_imports_from_ipynb_file(path_str: &str) -> PyResult = cells - .iter() - .filter(|cell| cell["cell_type"] == "code") - .flat_map(|cell| cell["source"].as_array()) - .flatten() - .filter_map(|line| line.as_str()) - .filter(|line| import_regex.is_match(line)) - .map(|line| line.to_string()) - .collect(); + let cells = match notebook["cells"].as_array() { + Some(cells) => cells, + None => { + log::warn!( + "Warning: File {} is not a valid notebook: 'cells' is not an array. Skipping...", + path_str + ); + return Ok(HashMap::new()); + } + }; - let imports_script = import_statements.join("\n"); + let imports_script = _extract_import_statements_from_notebook_cells(cells); let ast = get_ast_from_file_content(&imports_script, path_str) .map_err(|e| PySyntaxError::new_err(format!("Error parsing file {}: {}", path_str, e)))?; diff --git a/tests/unit/imports/test_extract.py b/tests/unit/imports/test_extract.py index 2a6fdd3a..03b849f0 100644 --- a/tests/unit/imports/test_extract.py +++ b/tests/unit/imports/test_extract.py @@ -119,7 +119,7 @@ def test_import_parser_file_encodings_ipynb(code_cell_content: list[str], encodi } f.write(json.dumps(file_content)) - assert get_imported_modules_from_list_of_files([random_file]) == {"foo": [Location(random_file, 1, 0)]} + assert get_imported_modules_from_list_of_files([random_file]) == {"foo": [Location(random_file, 1, 8)]} def test_import_parser_file_encodings_warning(tmp_path: Path, caplog: LogCaptureFixture) -> None: @@ -135,6 +135,6 @@ def test_import_parser_file_encodings_warning(tmp_path: Path, caplog: LogCapture # //TODO logging from Rust still includes it's own warning and file + line number. Can we get rid of that? pattern = re.compile( - r"WARNING deptry.imports:imports.rs:\d+ Warning: File file1.py could not be read. Skipping...\n" + r"WARNING deptry.imports.py:py.rs:\d+ Warning: File file1.py could not be read. Skipping...\n" ) assert pattern.search(caplog.text) is not None From f76794f12cecc5653c472e98647b3ca3afb020fe Mon Sep 17 00:00:00 2001 From: Florian Maas Date: Sat, 16 Mar 2024 16:21:06 +0100 Subject: [PATCH 3/9] fix notebook extraction --- src/imports/ipynb.rs | 17 ++++++----------- tests/unit/imports/test_extract.py | 4 ++-- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/imports/ipynb.rs b/src/imports/ipynb.rs index 1cea7c7c..dd4d6d3a 100644 --- a/src/imports/ipynb.rs +++ b/src/imports/ipynb.rs @@ -7,7 +7,6 @@ use pyo3::exceptions::PySyntaxError; use pyo3::prelude::*; use pyo3::types::PyString; use rayon::prelude::*; -use regex::Regex; use std::collections::HashMap; use super::shared::{ @@ -56,21 +55,17 @@ pub fn get_imports_from_ipynb_file(py: Python, file_path: &PyString) -> PyResult convert_to_python_dict(py, result) } -fn _extract_import_statements_from_notebook_cells(cells: &[serde_json::Value]) -> String { - let import_regex = - Regex::new(r"^(?:from\s+(\w+)(?:\.\w+)?\s+)?import\s+([^\s,.]+)(?:\.\w+)?").unwrap(); - - let import_statements: Vec = cells +fn _extract_code_from_notebook_cells(cells: &[serde_json::Value]) -> String { + let code_lines: Vec = cells .iter() .filter(|cell| cell["cell_type"] == "code") .flat_map(|cell| cell["source"].as_array()) .flatten() .filter_map(|line| line.as_str()) - .filter(|line| import_regex.is_match(line)) .map(|line| line.to_string()) .collect(); - import_statements.join("\n") + code_lines.join("\n") } /// Core helper function that extracts import statements and their locations from the content of a single .ipynb file. @@ -102,9 +97,9 @@ fn _get_imports_from_ipynb_file(path_str: &str) -> PyResult PyResult None: notebook_path = Path("tests/data/example_project/src/notebook.ipynb") assert get_imported_modules_from_list_of_files([notebook_path]) == { - "click": [Location(notebook_path, 2, 6)], - "toml": [Location(notebook_path, 5, 9)], + "click": [Location(notebook_path, 1, 8)], + "toml": [Location(notebook_path, 5, 8)], "urllib3": [Location(notebook_path, 3, 1)], } From 08f2f64a514fd51d1655e9de94a6afcea626e5b4 Mon Sep 17 00:00:00 2001 From: Florian Maas Date: Sat, 16 Mar 2024 16:25:01 +0100 Subject: [PATCH 4/9] remove chardet --- pdm.lock | 13 +------------ pyproject.toml | 1 - 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/pdm.lock b/pdm.lock index 28439bd4..3a38d00d 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev", "docs", "typing"] strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.1" -content_hash = "sha256:b6907482d32747a9b5502c17aff64523173d8d2cdab8357e8755558793c14d81" +content_hash = "sha256:075a94a78d7250b4bdc64a8e66445f285b6afec47004ca0e45aab3b0744eec29" [[package]] name = "babel" @@ -43,17 +43,6 @@ files = [ {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, ] -[[package]] -name = "chardet" -version = "5.2.0" -requires_python = ">=3.7" -summary = "Universal encoding detector for Python 3" -groups = ["default"] -files = [ - {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, - {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, -] - [[package]] name = "charset-normalizer" version = "3.3.2" diff --git a/pyproject.toml b/pyproject.toml index eb5d4b41..a34128f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,6 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", ] dependencies = [ - "chardet>=4.0.0", "click>=8.0.0,<9", "pathspec>=0.9.0", "colorama>=0.4.6; sys_platform == 'win32'", From d1e7813b8e2b14fe411f8e5b3a4ffe7e44575960 Mon Sep 17 00:00:00 2001 From: Florian Maas Date: Sun, 17 Mar 2024 09:34:54 +0100 Subject: [PATCH 5/9] fix test --- tests/functional/cli/test_cli_requirements_txt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/cli/test_cli_requirements_txt.py b/tests/functional/cli/test_cli_requirements_txt.py index 59eccc17..4ca6f23b 100644 --- a/tests/functional/cli/test_cli_requirements_txt.py +++ b/tests/functional/cli/test_cli_requirements_txt.py @@ -98,7 +98,7 @@ def test_cli_single_requirements_txt(pip_venv_factory: PipVenvFactory) -> None: "location": { "file": str(Path("src/notebook.ipynb")), "line": 3, - "column": 0, + "column": 1, }, }, ] From 55d1dc25bce362741c9bed2f27623ea3ed4bf4ca Mon Sep 17 00:00:00 2001 From: Florian Maas Date: Mon, 18 Mar 2024 16:32:31 +0100 Subject: [PATCH 6/9] fix nit tests --- notebook_ok.ipynb | 1 - notebook_with_bad_encoding.ipynb | 1 - notebook_with_syntax_error.ipynb | 1 - tests/unit/imports/test_extract.py | 67 +++++++++++++++--------------- 4 files changed, 34 insertions(+), 36 deletions(-) delete mode 100644 notebook_ok.ipynb delete mode 100644 notebook_with_bad_encoding.ipynb delete mode 100644 notebook_with_syntax_error.ipynb diff --git a/notebook_ok.ipynb b/notebook_ok.ipynb deleted file mode 100644 index 7d0b4c84..00000000 --- a/notebook_ok.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"cells": [{"cell_type": "code", "source": ["import numpy\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 2} diff --git a/notebook_with_bad_encoding.ipynb b/notebook_with_bad_encoding.ipynb deleted file mode 100644 index efead06e..00000000 --- a/notebook_with_bad_encoding.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"cells": [{"cell_type": "code", "source": ["print('\u00e6\u00f8\u00e5')"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 2} diff --git a/notebook_with_syntax_error.ipynb b/notebook_with_syntax_error.ipynb deleted file mode 100644 index 2bcb357c..00000000 --- a/notebook_with_syntax_error.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"cells": [{"cell_type": "code", "source": ["import n invalid_syntax:::\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 2} diff --git a/tests/unit/imports/test_extract.py b/tests/unit/imports/test_extract.py index 69177a53..091b2459 100644 --- a/tests/unit/imports/test_extract.py +++ b/tests/unit/imports/test_extract.py @@ -158,38 +158,39 @@ def test_import_parser_for_ipynb_errors(tmp_path: Path, caplog: LogCaptureFixtur notebook_ok = Path("notebook_ok.ipynb") notebook_with_syntax_error = Path("notebook_with_syntax_error.ipynb") - # Create a well-formed notebook - with notebook_ok.open("w") as f: - json.dump( - { - "cells": [{"cell_type": "code", "source": ["import numpy\n"]}], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 2, - }, - f, - ) + with run_within_dir(tmp_path): + # Create a well-formed notebook + with notebook_ok.open("w") as f: + json.dump( + { + "cells": [{"cell_type": "code", "source": ["import numpy\n"]}], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2, + }, + f, + ) + + # Create a notebook with invalid Python syntax in a code cell + with notebook_with_syntax_error.open("w") as f: + json.dump( + { + "cells": [{"cell_type": "code", "source": ["import n invalid_syntax:::\n"]}], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2, + }, + f, + ) + + # Execute function and assert the result for well-formed notebook + with caplog.at_level(logging.WARNING): + assert get_imported_modules_from_list_of_files([ + notebook_ok, + notebook_with_syntax_error, + ]) == {"numpy": [Location(file=Path("notebook_ok.ipynb"), line=1, column=8)]} - # Create a notebook with invalid Python syntax in a code cell - with notebook_with_syntax_error.open("w") as f: - json.dump( - { - "cells": [{"cell_type": "code", "source": ["import n invalid_syntax:::\n"]}], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 2, - }, - f, + assert re.search( + r"WARNING .*:ipynb.rs:\d+ Warning: Skipping processing of notebook_with_syntax_error.ipynb because of the following error: \"SyntaxError: invalid syntax. Got unexpected token 'invalid_syntax' at byte offset 9\"", + caplog.text, ) - - # Execute function and assert the result for well-formed notebook - with caplog.at_level(logging.WARNING): - assert get_imported_modules_from_list_of_files([ - notebook_ok, - notebook_with_syntax_error, - ]) == {"numpy": [Location(file=Path("notebook_ok.ipynb"), line=1, column=8)]} - - assert re.search( - r"WARNING .*:ipynb.rs:\d+ Warning: Skipping processing of notebook_with_syntax_error.ipynb because of the following error: \"SyntaxError: invalid syntax. Got unexpected token 'invalid_syntax' at byte offset 9\"", - caplog.text, - ) From fbfbbaf71accb49f34912f84deab7f0e083e2a1e Mon Sep 17 00:00:00 2001 From: Florian Maas Date: Mon, 18 Mar 2024 17:22:59 +0100 Subject: [PATCH 7/9] added type aliasses --- package-lock.json | 6 +++ src/imports/ipynb.rs | 61 +++++++----------------------- src/imports/py.rs | 54 +++++++------------------- src/imports/shared.rs | 45 +++++++++++++++++++++- tests/unit/imports/test_extract.py | 6 +-- 5 files changed, 79 insertions(+), 93 deletions(-) create mode 100644 package-lock.json diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 00000000..11460110 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,6 @@ +{ + "name": "deptry", + "lockfileVersion": 3, + "requires": true, + "packages": {} +} diff --git a/src/imports/ipynb.rs b/src/imports/ipynb.rs index 0c638891..496908e8 100644 --- a/src/imports/ipynb.rs +++ b/src/imports/ipynb.rs @@ -9,12 +9,9 @@ use pyo3::types::PyString; use rayon::prelude::*; use std::collections::HashMap; -use super::shared::{ - convert_imports_with_textranges_to_location_objects, convert_to_python_dict, - extract_imports_from_ast, get_ast_from_file_content, -}; +use super::shared; -/// Processes multiple .ipynb files in parallel to extract import statements and their locations. +/// Processes multiple Python files in parallel to extract import statements and their locations. /// Accepts a list of file paths and returns a dictionary mapping module names to their import locations. #[pyfunction] pub fn get_imports_from_ipynb_files(py: Python, file_paths: Vec<&PyString>) -> PyResult { @@ -23,68 +20,38 @@ pub fn get_imports_from_ipynb_files(py: Python, file_paths: Vec<&PyString>) -> P .map(|py_str| py_str.to_str().unwrap().to_owned()) .collect(); - // Process each file in parallel and collect results let results: Vec<_> = rust_file_paths .par_iter() - .map(|path_str| match _get_imports_from_ipynb_file(path_str) { - Ok(result) => (path_str, Ok(result)), - Err(e) => (path_str, Err(e)), + .map(|path_str| { + let result = _get_imports_from_ipynb_file(path_str); + shared::ThreadResult { + file: path_str.to_string(), + result, + } }) .collect(); - // Merge results from each thread - let mut all_imports = HashMap::new(); - let mut errors = Vec::new(); - - for (path, file_result) in results { - match file_result { - Ok(file_result) => { - for (module, locations) in file_result { - all_imports - .entry(module) - .or_insert_with(Vec::new) - .extend(locations); - } - } - Err(e) => errors.push((path.to_string(), e)), - } - } - - for (path, error) in errors { - log::warn!( - "Warning: Skipping processing of {} because of the following error: \"{}\".", - path, - error - ); - } + let (all_imports, errors) = shared::merge_results_from_threads(results); + shared::log_python_errors_as_warnings(&errors); - convert_to_python_dict(py, all_imports) + shared::convert_to_python_dict(py, all_imports) } /// Core helper function that extracts import statements and their locations from a single .ipynb file. /// Ensures robust error handling and provides clearer, more detailed comments. fn _get_imports_from_ipynb_file(path_str: &str) -> PyResult>> { - // Read the content of the .ipynb file, handling potential IO errors. let file_content = read_file(path_str)?; - - // Deserialize the JSON content of the notebook, handling syntax errors. let notebook: serde_json::Value = serde_json::from_str(&file_content).map_err(|e| PySyntaxError::new_err(e.to_string()))?; - - // Extract the code cells from the notebook, handling unexpected data structures. let cells = notebook["cells"] .as_array() .ok_or_else(|| PySyntaxError::new_err("Expected 'cells' to be an array"))?; - - // Concatenate the code from all code cells into a single string. let python_code = _extract_code_from_notebook_cells(cells); - // Parse the Python code to AST and extract import statements. - let ast = get_ast_from_file_content(&python_code, path_str)?; - let imported_modules = extract_imports_from_ast(ast); + let ast = shared::get_ast_from_file_content(&python_code, path_str)?; + let imported_modules = shared::extract_imports_from_ast(ast); - // Convert the extracted import data into location objects. - Ok(convert_imports_with_textranges_to_location_objects( + Ok(shared::convert_imports_with_textranges_to_location_objects( imported_modules, path_str, &python_code, diff --git a/src/imports/py.rs b/src/imports/py.rs index 46f93ff7..e27a8684 100644 --- a/src/imports/py.rs +++ b/src/imports/py.rs @@ -8,10 +8,7 @@ use pyo3::types::PyString; use rayon::prelude::*; use std::collections::HashMap; -use super::shared::{ - convert_imports_with_textranges_to_location_objects, convert_to_python_dict, - extract_imports_from_ast, get_ast_from_file_content, -}; +use super::shared; /// Processes multiple Python files in parallel to extract import statements and their locations. /// Accepts a list of file paths and returns a dictionary mapping module names to their import locations. @@ -22,54 +19,29 @@ pub fn get_imports_from_py_files(py: Python, file_paths: Vec<&PyString>) -> PyRe .map(|py_str| py_str.to_str().unwrap().to_owned()) .collect(); - // Process each file in parallel and collect results let results: Vec<_> = rust_file_paths .par_iter() - .map(|path_str| match _get_imports_from_py_file(path_str) { - Ok(result) => (path_str, Ok(result)), - Err(e) => (path_str, Err(e)), + .map(|path_str| { + let result = _get_imports_from_py_file(path_str); + shared::ThreadResult { + file: path_str.to_string(), + result, + } }) .collect(); - // Merge results from each thread - let mut all_imports = HashMap::new(); - let mut errors = Vec::new(); - - for (path, file_result) in results { - match file_result { - Ok(file_result) => { - for (module, locations) in file_result { - all_imports - .entry(module) - .or_insert_with(Vec::new) - .extend(locations); - } - } - Err(e) => errors.push((path.to_string(), e)), - } - } - - for (path, error) in errors { - log::warn!( - "Warning: Skipping processing of {} because of the following error: \"{}\".", - path, - error - ); - } - - convert_to_python_dict(py, all_imports) + let (all_imports, errors) = shared::merge_results_from_threads(results); + shared::log_python_errors_as_warnings(&errors); + shared::convert_to_python_dict(py, all_imports) } /// Core helper function that extracts import statements and their locations from the content of a single Python file. /// Used internally by both parallel and single file processing functions. fn _get_imports_from_py_file(path_str: &str) -> PyResult>> { let file_content = read_file(path_str)?; - - let ast = get_ast_from_file_content(&file_content, path_str)?; - - let imported_modules = extract_imports_from_ast(ast); - - Ok(convert_imports_with_textranges_to_location_objects( + let ast = shared::get_ast_from_file_content(&file_content, path_str)?; + let imported_modules = shared::extract_imports_from_ast(ast); + Ok(shared::convert_imports_with_textranges_to_location_objects( imported_modules, path_str, &file_content, diff --git a/src/imports/shared.rs b/src/imports/shared.rs index b2afd04b..87ea3996 100644 --- a/src/imports/shared.rs +++ b/src/imports/shared.rs @@ -13,6 +13,14 @@ use rustpython_parser::{parse, Mode}; use std::collections::HashMap; use visitor::ImportVisitor; +pub type FileToImportsMap = HashMap>; +pub type ErrorList = Vec<(String, PyErr)>; + +pub struct ThreadResult { + pub file: String, + pub result: PyResult, +} + /// Parses the content of a Python file into an abstract syntax tree (AST). pub fn get_ast_from_file_content(file_content: &str, file_path: &str) -> PyResult { let ast = parse(file_content, Mode::Module, file_path) @@ -40,7 +48,7 @@ pub fn convert_imports_with_textranges_to_location_objects( imports: HashMap>, file_path: &str, source_code: &str, -) -> HashMap> { +) -> FileToImportsMap { let line_index = LineIndex::from_source_text(source_code); let mut imports_with_locations = HashMap::>::new(); @@ -68,7 +76,7 @@ pub fn convert_imports_with_textranges_to_location_objects( /// Transforms a Rust HashMap containing import data into a Python dictionary suitable for Python-side consumption. pub fn convert_to_python_dict( py: Python<'_>, - imports_with_locations: HashMap>, + imports_with_locations: FileToImportsMap, ) -> PyResult { let imports_dict = PyDict::new(py); @@ -83,3 +91,36 @@ pub fn convert_to_python_dict( Ok(imports_dict.into()) } + +// Shared logic for merging results from different threads. +pub fn merge_results_from_threads(results: Vec) -> (FileToImportsMap, ErrorList) { + let mut all_imports = HashMap::new(); + let mut errors = Vec::new(); + + for thread_result in results { + match thread_result.result { + Ok(file_result) => { + for (module, locations) in file_result { + all_imports + .entry(module) + .or_insert_with(Vec::new) + .extend(locations); + } + } + Err(e) => errors.push((thread_result.file, e)), + } + } + + (all_imports, errors) +} + +// Shared logic for logging errors. +pub fn log_python_errors_as_warnings(errors: &[(String, PyErr)]) { + for (path, error) in errors { + log::warn!( + "Warning: Skipping processing of {} because of the following error: \"{}\".", + path, + error + ); + } +} diff --git a/tests/unit/imports/test_extract.py b/tests/unit/imports/test_extract.py index 091b2459..165eebcd 100644 --- a/tests/unit/imports/test_extract.py +++ b/tests/unit/imports/test_extract.py @@ -145,11 +145,11 @@ def test_import_parser_errors(tmp_path: Path, caplog: LogCaptureFixture) -> None ]) == {"black": [Location(file=Path("file_ok.py"), line=1, column=8)]} assert re.search( - r"WARNING deptry.imports.py:py.rs:\d+ Warning: Skipping processing of file_with_bad_encoding.py because of the following error: \"OSError: Failed to decode file content with the detected encoding.\".", + r"WARNING .*:shared.rs:\d+ Warning: Skipping processing of file_with_bad_encoding.py because of the following error: \"OSError: Failed to decode file content with the detected encoding.\".", caplog.text, ) assert re.search( - r"WARNING deptry.imports.py:py.rs:\d+ Warning: Skipping processing of file_with_syntax_error.py because of the following error: \"SyntaxError: invalid syntax. Got unexpected token ':' at byte offset 15\".", + r"WARNING .*:shared.rs:\d+ Warning: Skipping processing of file_with_syntax_error.py because of the following error: \"SyntaxError: invalid syntax. Got unexpected token ':' at byte offset 15\".", caplog.text, ) @@ -191,6 +191,6 @@ def test_import_parser_for_ipynb_errors(tmp_path: Path, caplog: LogCaptureFixtur ]) == {"numpy": [Location(file=Path("notebook_ok.ipynb"), line=1, column=8)]} assert re.search( - r"WARNING .*:ipynb.rs:\d+ Warning: Skipping processing of notebook_with_syntax_error.ipynb because of the following error: \"SyntaxError: invalid syntax. Got unexpected token 'invalid_syntax' at byte offset 9\"", + r"WARNING .*:shared.rs:\d+ Warning: Skipping processing of notebook_with_syntax_error.ipynb because of the following error: \"SyntaxError: invalid syntax. Got unexpected token 'invalid_syntax' at byte offset 9\"", caplog.text, ) From 4e8cea5314fcc0c39c7c5751f49c480f4d7cef34 Mon Sep 17 00:00:00 2001 From: Florian Maas Date: Mon, 18 Mar 2024 17:29:31 +0100 Subject: [PATCH 8/9] remove missing function --- python/deptry/rust.pyi | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/deptry/rust.pyi b/python/deptry/rust.pyi index 9cad70fa..98ae6f4a 100644 --- a/python/deptry/rust.pyi +++ b/python/deptry/rust.pyi @@ -2,8 +2,6 @@ from .rust import Location as RustLocation def get_imports_from_py_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ... def get_imports_from_ipynb_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ... -def get_imports_from_py_file(file_path: str) -> dict[str, list[RustLocation]]: ... -def get_imports_from_ipynb_file(file_path: str) -> dict[str, list[RustLocation]]: ... class Location: file: str From 818722edc20abd05d8cfae45bbc03adc58f05932 Mon Sep 17 00:00:00 2001 From: Florian Maas Date: Mon, 18 Mar 2024 17:31:51 +0100 Subject: [PATCH 9/9] remove file that was added by accident --- package-lock.json | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 package-lock.json diff --git a/package-lock.json b/package-lock.json deleted file mode 100644 index 11460110..00000000 --- a/package-lock.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "name": "deptry", - "lockfileVersion": 3, - "requires": true, - "packages": {} -}