From b6bffda11c284bdd43fca02ca1124dd702673c57 Mon Sep 17 00:00:00 2001
From: Florian Maas <fpgmaas@gmail.com>
Date: Sat, 16 Mar 2024 15:09:34 +0100
Subject: [PATCH 1/9] first draft of notebook extractor in rust

---
 Cargo.lock                                    |  38 +++++++
 Cargo.toml                                    |   1 +
 python/deptry/imports/extract.py              |  21 +---
 python/deptry/imports/extractors/__init__.py  |   5 -
 python/deptry/imports/extractors/base.py      |  55 ---------
 .../extractors/notebook_import_extractor.py   |  58 ----------
 python/deptry/rust.pyi                        |   2 +
 src/imports/ipynb.rs                          | 107 ++++++++++++++++++
 src/imports/mod.rs                            |   3 +
 src/imports/py.rs                             |  79 +++++++++++++
 src/{imports.rs => imports/shared.rs}         |  75 +-----------
 src/lib.rs                                    |  12 +-
 tests/unit/imports/test_extract.py            |  10 +-
 13 files changed, 255 insertions(+), 211 deletions(-)
 delete mode 100644 python/deptry/imports/extractors/__init__.py
 delete mode 100644 python/deptry/imports/extractors/base.py
 delete mode 100644 python/deptry/imports/extractors/notebook_import_extractor.py
 create mode 100644 src/imports/ipynb.rs
 create mode 100644 src/imports/mod.rs
 create mode 100644 src/imports/py.rs
 rename src/{imports.rs => imports/shared.rs} (51%)

diff --git a/Cargo.lock b/Cargo.lock
index cf366939..efcac224 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -139,6 +139,7 @@ dependencies = [
  "regex",
  "rustpython-ast",
  "rustpython-parser",
+ "serde_json",
 ]
 
 [[package]]
@@ -254,6 +255,12 @@ dependencies = [
  "either",
 ]
 
+[[package]]
+name = "itoa"
+version = "1.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
+
 [[package]]
 name = "keccak"
 version = "0.1.5"
@@ -781,6 +788,37 @@ version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca"
 
+[[package]]
+name = "serde"
+version = "1.0.197"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.197"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.52",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0"
+dependencies = [
+ "itoa",
+ "ryu",
+ "serde",
+]
+
 [[package]]
 name = "sha3"
 version = "0.9.1"
diff --git a/Cargo.toml b/Cargo.toml
index 07afc45f..44d02e46 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,3 +18,4 @@ rayon = "1.9.0"
 regex = "1.10.3"
 rustpython-ast = { version = "0.3.0", features = ["visitor"] }
 rustpython-parser = "0.3.0"
+serde_json = "1.0.114"
diff --git a/python/deptry/imports/extract.py b/python/deptry/imports/extract.py
index 36701dbd..293630f6 100644
--- a/python/deptry/imports/extract.py
+++ b/python/deptry/imports/extract.py
@@ -4,8 +4,7 @@
 from collections import defaultdict
 from typing import TYPE_CHECKING
 
-from deptry.imports.extractors import NotebookImportExtractor
-from deptry.rust import get_imports_from_py_files
+from deptry.rust import get_imports_from_ipynb_files, get_imports_from_py_files
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -19,7 +18,7 @@ def get_imported_modules_from_list_of_files(list_of_files: list[Path]) -> dict[s
     logging.info("Scanning %d %s...", len(list_of_files), "files" if len(list_of_files) > 1 else "file")
 
     py_files = [str(file) for file in list_of_files if file.suffix == ".py"]
-    ipynb_files = [file for file in list_of_files if file.suffix == ".ipynb"]
+    ipynb_files = [str(file) for file in list_of_files if file.suffix == ".ipynb"]
 
     modules: dict[str, list[Location]] = defaultdict(list)
 
@@ -29,9 +28,10 @@ def get_imported_modules_from_list_of_files(list_of_files: list[Path]) -> dict[s
         for module, locations in convert_rust_locations_to_python_locations(rust_result).items():
             modules[module].extend(locations)
 
-    # Process each .ipynb file individually
-    for file in ipynb_files:
-        for module, locations in get_imported_modules_from_ipynb_file(file).items():
+    # Process all .ipynb files in parallel using Rust
+    if ipynb_files:
+        rust_result = get_imports_from_ipynb_files(ipynb_files)
+        for module, locations in convert_rust_locations_to_python_locations(rust_result).items():
             modules[module].extend(locations)
 
     logging.debug("All imported modules: %s\n", modules)
@@ -39,15 +39,6 @@ def get_imported_modules_from_list_of_files(list_of_files: list[Path]) -> dict[s
     return modules
 
 
-def get_imported_modules_from_ipynb_file(path_to_file: Path) -> dict[str, list[Location]]:
-    logging.debug("Scanning %s...", path_to_file)
-
-    modules = NotebookImportExtractor(path_to_file).extract_imports()
-
-    logging.debug("Found the following imports in %s: %s", path_to_file, modules)
-    return modules
-
-
 def convert_rust_locations_to_python_locations(
     imported_modules: dict[str, list[RustLocation]],
 ) -> dict[str, list[Location]]:
diff --git a/python/deptry/imports/extractors/__init__.py b/python/deptry/imports/extractors/__init__.py
deleted file mode 100644
index ba141ce1..00000000
--- a/python/deptry/imports/extractors/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from __future__ import annotations
-
-from deptry.imports.extractors.notebook_import_extractor import NotebookImportExtractor
-
-__all__ = ("NotebookImportExtractor",)
diff --git a/python/deptry/imports/extractors/base.py b/python/deptry/imports/extractors/base.py
deleted file mode 100644
index 7df4bdc9..00000000
--- a/python/deptry/imports/extractors/base.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from __future__ import annotations
-
-import ast
-from abc import ABC, abstractmethod
-from collections import defaultdict
-from dataclasses import dataclass
-from typing import TYPE_CHECKING
-
-import chardet
-
-from deptry.imports.location import Location
-
-if TYPE_CHECKING:
-    from pathlib import Path
-
-
-@dataclass
-class ImportExtractor(ABC):
-    """
-    Base class for other classes that can be used to extract the imported modules from a file.
-    """
-
-    file: Path
-
-    @abstractmethod
-    def extract_imports(self) -> dict[str, list[Location]]:
-        raise NotImplementedError()
-
-    def _extract_imports_from_ast(self, tree: ast.AST) -> dict[str, list[Location]]:
-        """
-        Given an Abstract Syntax Tree, find the imported top-level modules.
-        For example, given the source tree of a file with contents:
-
-            from pandas.tools import scatter_matrix
-
-        Will return the set {"pandas"}.
-        """
-
-        imported_modules: dict[str, list[Location]] = defaultdict(list)
-
-        for node in ast.walk(tree):
-            if isinstance(node, ast.Import):
-                for module in node.names:
-                    imported_modules[module.name.split(".")[0]].append(
-                        Location(self.file, node.lineno, node.col_offset)
-                    )
-            elif isinstance(node, ast.ImportFrom) and node.module and node.level == 0:
-                imported_modules[node.module.split(".")[0]].append(Location(self.file, node.lineno, node.col_offset))
-
-        return imported_modules
-
-    @staticmethod
-    def _get_file_encoding(file: Path) -> str:
-        with file.open("rb") as f:
-            return chardet.detect(f.read())["encoding"]
diff --git a/python/deptry/imports/extractors/notebook_import_extractor.py b/python/deptry/imports/extractors/notebook_import_extractor.py
deleted file mode 100644
index 18b307b7..00000000
--- a/python/deptry/imports/extractors/notebook_import_extractor.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from __future__ import annotations
-
-import ast
-import itertools
-import json
-import logging
-import re
-from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any
-
-from deptry.imports.extractors.base import ImportExtractor
-
-if TYPE_CHECKING:
-    from pathlib import Path
-
-    from deptry.imports.location import Location
-
-
-@dataclass
-class NotebookImportExtractor(ImportExtractor):
-    """Extract import statements from a Jupyter notebook."""
-
-    def extract_imports(self) -> dict[str, list[Location]]:
-        """Extract the imported top-level modules from all code cells in the Jupyter Notebook."""
-        notebook = self._read_ipynb_file(self.file)
-        if not notebook:
-            return {}
-
-        cells = self._keep_code_cells(notebook)
-        import_statements = [self._extract_import_statements_from_cell(cell) for cell in cells]
-        tree = ast.parse("\n".join(itertools.chain.from_iterable(import_statements)), str(self.file))
-        return self._extract_imports_from_ast(tree)
-
-    @classmethod
-    def _read_ipynb_file(cls, path_to_ipynb: Path) -> dict[str, Any] | None:
-        try:
-            with path_to_ipynb.open() as ipynb_file:
-                notebook: dict[str, Any] = json.load(ipynb_file)
-        except ValueError:
-            try:
-                with path_to_ipynb.open(encoding=cls._get_file_encoding(path_to_ipynb)) as ipynb_file:
-                    notebook = json.load(ipynb_file, strict=False)
-            except UnicodeDecodeError:
-                logging.warning("Warning: File %s could not be decoded. Skipping...", path_to_ipynb)
-                return None
-        return notebook
-
-    @staticmethod
-    def _keep_code_cells(notebook: dict[str, Any]) -> list[dict[str, Any]]:
-        return [cell for cell in notebook["cells"] if cell["cell_type"] == "code"]
-
-    @staticmethod
-    def _contains_import_statements(line: str) -> bool:
-        return re.search(r"^(?:from\s+(\w+)(?:\.\w+)?\s+)?import\s+([^\s,.]+)(?:\.\w+)?", line) is not None
-
-    @classmethod
-    def _extract_import_statements_from_cell(cls, cell: dict[str, Any]) -> list[str]:
-        return [line for line in cell["source"] if cls._contains_import_statements(line)]
diff --git a/python/deptry/rust.pyi b/python/deptry/rust.pyi
index f4344df9..9cad70fa 100644
--- a/python/deptry/rust.pyi
+++ b/python/deptry/rust.pyi
@@ -1,7 +1,9 @@
 from .rust import Location as RustLocation
 
 def get_imports_from_py_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ...
+def get_imports_from_ipynb_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ...
 def get_imports_from_py_file(file_path: str) -> dict[str, list[RustLocation]]: ...
+def get_imports_from_ipynb_file(file_path: str) -> dict[str, list[RustLocation]]: ...
 
 class Location:
     file: str
diff --git a/src/imports/ipynb.rs b/src/imports/ipynb.rs
new file mode 100644
index 00000000..00bbd5af
--- /dev/null
+++ b/src/imports/ipynb.rs
@@ -0,0 +1,107 @@
+use crate::file_utils;
+use crate::location;
+
+use file_utils::read_file;
+use location::Location;
+use pyo3::exceptions::PySyntaxError;
+use pyo3::prelude::*;
+use pyo3::types::PyString;
+use rayon::prelude::*;
+use regex::Regex;
+use std::collections::HashMap;
+
+use super::shared::{
+    convert_imports_with_textranges_to_location_objects, convert_to_python_dict,
+    extract_imports_from_ast, get_ast_from_file_content,
+};
+
+/// Processes multiple Python files in parallel to extract import statements and their locations.
+/// Accepts a list of file paths and returns a dictionary mapping module names to their import locations.
+#[pyfunction]
+pub fn get_imports_from_ipynb_files(py: Python, file_paths: Vec<&PyString>) -> PyResult<PyObject> {
+    let rust_file_paths: Vec<String> = file_paths
+        .iter()
+        .map(|py_str| py_str.to_str().unwrap().to_owned())
+        .collect();
+
+    // Process each file in parallel and collect results
+    let results: PyResult<Vec<_>> = rust_file_paths
+        .par_iter()
+        .map(|path_str| _get_imports_from_ipynb_file(path_str))
+        .collect();
+
+    let results = results?;
+
+    // Merge results from each thread
+    let mut all_imports = HashMap::new();
+    for file_result in results {
+        for (module, locations) in file_result {
+            all_imports
+                .entry(module)
+                .or_insert_with(Vec::new)
+                .extend(locations);
+        }
+    }
+
+    convert_to_python_dict(py, all_imports)
+}
+
+/// Processes a single Python file to extract import statements and their locations.
+/// Accepts a single file path and returns a dictionary mapping module names to their import locations.
+#[pyfunction]
+pub fn get_imports_from_ipynb_file(py: Python, file_path: &PyString) -> PyResult<PyObject> {
+    let path_str = file_path.to_str()?;
+    let result = _get_imports_from_ipynb_file(path_str)?;
+
+    convert_to_python_dict(py, result)
+}
+
+/// Core helper function that extracts import statements and their locations from the content of a single Python file.
+/// Used internally by both parallel and single file processing functions.
+fn _get_imports_from_ipynb_file(path_str: &str) -> PyResult<HashMap<String, Vec<Location>>> {
+    let file_content = match read_file(path_str) {
+        Ok(content) => content,
+        Err(_) => {
+            log::warn!("Warning: File {} could not be read. Skipping...", path_str);
+            return Ok(HashMap::new());
+        }
+    };
+
+    let notebook: serde_json::Value = match serde_json::from_str(&file_content) {
+        Ok(content) => content,
+        Err(_) => {
+            log::warn!("Warning: File {} is not valid JSON. Skipping...", path_str);
+            return Ok(HashMap::new());
+        }
+    };
+
+    let cells = notebook["cells"].as_array().ok_or_else(|| {
+        PySyntaxError::new_err("Invalid notebook structure: 'cells' is not an array")
+    })?;
+
+    let import_regex =
+        Regex::new(r"^(?:from\s+(\w+)(?:\.\w+)?\s+)?import\s+([^\s,.]+)(?:\.\w+)?").unwrap();
+
+    let import_statements: Vec<String> = cells
+        .iter()
+        .filter(|cell| cell["cell_type"] == "code")
+        .flat_map(|cell| cell["source"].as_array())
+        .flatten()
+        .filter_map(|line| line.as_str())
+        .filter(|line| import_regex.is_match(line))
+        .map(|line| line.to_string())
+        .collect();
+
+    let imports_script = import_statements.join("\n");
+
+    let ast = get_ast_from_file_content(&imports_script, path_str)
+        .map_err(|e| PySyntaxError::new_err(format!("Error parsing file {}: {}", path_str, e)))?;
+
+    let imported_modules = extract_imports_from_ast(ast);
+
+    Ok(convert_imports_with_textranges_to_location_objects(
+        imported_modules,
+        path_str,
+        &file_content,
+    ))
+}
diff --git a/src/imports/mod.rs b/src/imports/mod.rs
new file mode 100644
index 00000000..07dca57d
--- /dev/null
+++ b/src/imports/mod.rs
@@ -0,0 +1,3 @@
+pub mod ipynb;
+pub mod py;
+pub mod shared;
diff --git a/src/imports/py.rs b/src/imports/py.rs
new file mode 100644
index 00000000..f3fb740c
--- /dev/null
+++ b/src/imports/py.rs
@@ -0,0 +1,79 @@
+use crate::file_utils;
+use crate::location;
+
+use file_utils::read_file;
+use location::Location;
+use pyo3::exceptions::PySyntaxError;
+use pyo3::prelude::*;
+use pyo3::types::PyString;
+use rayon::prelude::*;
+use std::collections::HashMap;
+
+use super::shared::{
+    convert_imports_with_textranges_to_location_objects, convert_to_python_dict,
+    extract_imports_from_ast, get_ast_from_file_content,
+};
+
+/// Processes multiple Python files in parallel to extract import statements and their locations.
+/// Accepts a list of file paths and returns a dictionary mapping module names to their import locations.
+#[pyfunction]
+pub fn get_imports_from_py_files(py: Python, file_paths: Vec<&PyString>) -> PyResult<PyObject> {
+    let rust_file_paths: Vec<String> = file_paths
+        .iter()
+        .map(|py_str| py_str.to_str().unwrap().to_owned())
+        .collect();
+
+    // Process each file in parallel and collect results
+    let results: PyResult<Vec<_>> = rust_file_paths
+        .par_iter()
+        .map(|path_str| _get_imports_from_py_file(path_str))
+        .collect();
+
+    let results = results?;
+
+    // Merge results from each thread
+    let mut all_imports = HashMap::new();
+    for file_result in results {
+        for (module, locations) in file_result {
+            all_imports
+                .entry(module)
+                .or_insert_with(Vec::new)
+                .extend(locations);
+        }
+    }
+
+    convert_to_python_dict(py, all_imports)
+}
+
+/// Processes a single Python file to extract import statements and their locations.
+/// Accepts a single file path and returns a dictionary mapping module names to their import locations.
+#[pyfunction]
+pub fn get_imports_from_py_file(py: Python, file_path: &PyString) -> PyResult<PyObject> {
+    let path_str = file_path.to_str()?;
+    let result = _get_imports_from_py_file(path_str)?;
+
+    convert_to_python_dict(py, result)
+}
+
+/// Core helper function that extracts import statements and their locations from the content of a single Python file.
+/// Used internally by both parallel and single file processing functions.
+fn _get_imports_from_py_file(path_str: &str) -> PyResult<HashMap<String, Vec<Location>>> {
+    let file_content = match read_file(path_str) {
+        Ok(content) => content,
+        Err(_) => {
+            log::warn!("Warning: File {} could not be read. Skipping...", path_str);
+            return Ok(HashMap::new());
+        }
+    };
+
+    let ast = get_ast_from_file_content(&file_content, path_str)
+        .map_err(|e| PySyntaxError::new_err(format!("Error parsing file {}: {}", path_str, e)))?;
+
+    let imported_modules = extract_imports_from_ast(ast);
+
+    Ok(convert_imports_with_textranges_to_location_objects(
+        imported_modules,
+        path_str,
+        &file_content,
+    ))
+}
diff --git a/src/imports.rs b/src/imports/shared.rs
similarity index 51%
rename from src/imports.rs
rename to src/imports/shared.rs
index 25db77c7..65cca9ae 100644
--- a/src/imports.rs
+++ b/src/imports/shared.rs
@@ -1,13 +1,10 @@
-use crate::file_utils;
 use crate::location;
 use crate::visitor;
 
-use file_utils::read_file;
 use location::Location;
 use pyo3::exceptions::PySyntaxError;
 use pyo3::prelude::*;
-use pyo3::types::{PyDict, PyList, PyString};
-use rayon::prelude::*;
+use pyo3::types::{PyDict, PyList};
 use rustpython_ast::Mod;
 use rustpython_ast::Visitor;
 use rustpython_parser::source_code::LineIndex;
@@ -16,70 +13,6 @@ use rustpython_parser::{parse, Mode};
 use std::collections::HashMap;
 use visitor::ImportVisitor;
 
-/// Processes multiple Python files in parallel to extract import statements and their locations.
-/// Accepts a list of file paths and returns a dictionary mapping module names to their import locations.
-#[pyfunction]
-pub fn get_imports_from_py_files(py: Python, file_paths: Vec<&PyString>) -> PyResult<PyObject> {
-    let rust_file_paths: Vec<String> = file_paths
-        .iter()
-        .map(|py_str| py_str.to_str().unwrap().to_owned())
-        .collect();
-
-    // Process each file in parallel and collect results
-    let results: PyResult<Vec<_>> = rust_file_paths
-        .par_iter()
-        .map(|path_str| _get_imports_from_py_file(path_str))
-        .collect();
-
-    let results = results?;
-
-    // Merge results from each thread
-    let mut all_imports = HashMap::new();
-    for file_result in results {
-        for (module, locations) in file_result {
-            all_imports
-                .entry(module)
-                .or_insert_with(Vec::new)
-                .extend(locations);
-        }
-    }
-
-    convert_to_python_dict(py, all_imports)
-}
-
-/// Processes a single Python file to extract import statements and their locations.
-/// Accepts a single file path and returns a dictionary mapping module names to their import locations.
-#[pyfunction]
-pub fn get_imports_from_py_file(py: Python, file_path: &PyString) -> PyResult<PyObject> {
-    let path_str = file_path.to_str()?;
-    let result = _get_imports_from_py_file(path_str)?;
-
-    convert_to_python_dict(py, result)
-}
-
-/// Core helper function that extracts import statements and their locations from the content of a single Python file.
-/// Used internally by both parallel and single file processing functions.
-fn _get_imports_from_py_file(path_str: &str) -> PyResult<HashMap<String, Vec<Location>>> {
-    let file_content = match read_file(path_str) {
-        Ok(content) => content,
-        Err(_) => {
-            log::warn!("Warning: File {} could not be read. Skipping...", path_str);
-            return Ok(HashMap::new());
-        }
-    };
-
-    let ast = get_ast_from_file_content(&file_content, path_str)
-        .map_err(|e| PySyntaxError::new_err(format!("Error parsing file {}: {}", path_str, e)))?;
-
-    let imported_modules = extract_imports_from_ast(ast);
-
-    Ok(convert_imports_with_textranges_to_location_objects(
-        imported_modules,
-        path_str,
-        &file_content,
-    ))
-}
-
 /// Parses the content of a Python file into an abstract syntax tree (AST).
 pub fn get_ast_from_file_content(file_content: &str, file_path: &str) -> PyResult<Mod> {
     parse(file_content, Mode::Module, file_path)
@@ -88,7 +21,7 @@ pub fn get_ast_from_file_content(file_content: &str, file_path: &str) -> PyResul
 
 /// Iterates through an AST to identify and collect import statements, and returns them together with their
 /// respective TextRange for each occurrence.
-fn extract_imports_from_ast(ast: Mod) -> HashMap<String, Vec<TextRange>> {
+pub fn extract_imports_from_ast(ast: Mod) -> HashMap<String, Vec<TextRange>> {
     let mut visitor = ImportVisitor::new();
 
     if let Mod::Module(module) = ast {
@@ -102,7 +35,7 @@ fn extract_imports_from_ast(ast: Mod) -> HashMap<String, Vec<TextRange>> {
 
 /// Converts textual ranges of import statements into structured location objects.
 /// Facilitates the mapping of imports to detailed, file-specific location data (file, line, column).
-fn convert_imports_with_textranges_to_location_objects(
+pub fn convert_imports_with_textranges_to_location_objects(
     imports: HashMap<String, Vec<TextRange>>,
     file_path: &str,
     source_code: &str,
@@ -132,7 +65,7 @@ fn convert_imports_with_textranges_to_location_objects(
 }
 
 /// Transforms a Rust HashMap containing import data into a Python dictionary suitable for Python-side consumption.
-fn convert_to_python_dict(
+pub fn convert_to_python_dict(
     py: Python<'_>,
     imports_with_locations: HashMap<String, Vec<Location>>,
 ) -> PyResult<PyObject> {
diff --git a/src/lib.rs b/src/lib.rs
index defb4cb6..ba37d250 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -13,8 +13,16 @@ use location::Location;
 fn rust(_py: Python, m: &PyModule) -> PyResult<()> {
     pyo3_log::init(); // Initialize logging to forward to Python's logger
 
-    m.add_function(wrap_pyfunction!(imports::get_imports_from_py_files, m)?)?;
-    m.add_function(wrap_pyfunction!(imports::get_imports_from_py_file, m)?)?;
+    m.add_function(wrap_pyfunction!(imports::py::get_imports_from_py_files, m)?)?;
+    m.add_function(wrap_pyfunction!(imports::py::get_imports_from_py_file, m)?)?;
+    m.add_function(wrap_pyfunction!(
+        imports::ipynb::get_imports_from_ipynb_files,
+        m
+    )?)?;
+    m.add_function(wrap_pyfunction!(
+        imports::ipynb::get_imports_from_ipynb_file,
+        m
+    )?)?;
     m.add_class::<Location>()?;
     Ok(())
 }
diff --git a/tests/unit/imports/test_extract.py b/tests/unit/imports/test_extract.py
index 05dd79a5..2a6fdd3a 100644
--- a/tests/unit/imports/test_extract.py
+++ b/tests/unit/imports/test_extract.py
@@ -9,7 +9,7 @@
 
 import pytest
 
-from deptry.imports.extract import get_imported_modules_from_ipynb_file, get_imported_modules_from_list_of_files
+from deptry.imports.extract import get_imported_modules_from_list_of_files
 from deptry.imports.location import Location
 from tests.utils import run_within_dir
 
@@ -44,10 +44,10 @@ def test_import_parser_py() -> None:
 def test_import_parser_ipynb() -> None:
     notebook_path = Path("tests/data/example_project/src/notebook.ipynb")
 
-    assert get_imported_modules_from_ipynb_file(notebook_path) == {
-        "click": [Location(notebook_path, 1, 0)],
-        "toml": [Location(notebook_path, 5, 0)],
-        "urllib3": [Location(notebook_path, 3, 0)],
+    assert get_imported_modules_from_list_of_files([notebook_path]) == {
+        "click": [Location(notebook_path, 2, 6)],
+        "toml": [Location(notebook_path, 5, 9)],
+        "urllib3": [Location(notebook_path, 3, 1)],
     }
 
 

From a7a04e40f4917fdc72be8c918745b2f2ad95bafe Mon Sep 17 00:00:00 2001
From: Florian Maas <fpgmaas@gmail.com>
Date: Sat, 16 Mar 2024 15:47:34 +0100
Subject: [PATCH 2/9] improved modules related to import extraction in Rust

---
 src/imports/ipynb.rs               | 52 ++++++++++++++++++------------
 tests/unit/imports/test_extract.py |  4 +--
 2 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/src/imports/ipynb.rs b/src/imports/ipynb.rs
index 00bbd5af..1cea7c7c 100644
--- a/src/imports/ipynb.rs
+++ b/src/imports/ipynb.rs
@@ -15,7 +15,7 @@ use super::shared::{
     extract_imports_from_ast, get_ast_from_file_content,
 };
 
-/// Processes multiple Python files in parallel to extract import statements and their locations.
+/// Processes multiple .ipynb files in parallel to extract import statements and their locations.
 /// Accepts a list of file paths and returns a dictionary mapping module names to their import locations.
 #[pyfunction]
 pub fn get_imports_from_ipynb_files(py: Python, file_paths: Vec<&PyString>) -> PyResult<PyObject> {
@@ -46,7 +46,7 @@ pub fn get_imports_from_ipynb_files(py: Python, file_paths: Vec<&PyString>) -> P
     convert_to_python_dict(py, all_imports)
 }
 
-/// Processes a single Python file to extract import statements and their locations.
+/// Processes a single .ipynb file to extract import statements and their locations.
 /// Accepts a single file path and returns a dictionary mapping module names to their import locations.
 #[pyfunction]
 pub fn get_imports_from_ipynb_file(py: Python, file_path: &PyString) -> PyResult<PyObject> {
@@ -56,8 +56,24 @@ pub fn get_imports_from_ipynb_file(py: Python, file_path: &PyString) -> PyResult
     convert_to_python_dict(py, result)
 }
 
-/// Core helper function that extracts import statements and their locations from the content of a single Python file.
-/// Used internally by both parallel and single file processing functions.
+fn _extract_import_statements_from_notebook_cells(cells: &[serde_json::Value]) -> String {
+    let import_regex =
+        Regex::new(r"^(?:from\s+(\w+)(?:\.\w+)?\s+)?import\s+([^\s,.]+)(?:\.\w+)?").unwrap();
+
+    let import_statements: Vec<String> = cells
+        .iter()
+        .filter(|cell| cell["cell_type"] == "code")
+        .flat_map(|cell| cell["source"].as_array())
+        .flatten()
+        .filter_map(|line| line.as_str())
+        .filter(|line| import_regex.is_match(line))
+        .map(|line| line.to_string())
+        .collect();
+
+    import_statements.join("\n")
+}
+
+/// Core helper function that extracts import statements and their locations from the content of a single .ipynb file.
 fn _get_imports_from_ipynb_file(path_str: &str) -> PyResult<HashMap<String, Vec<Location>>> {
     let file_content = match read_file(path_str) {
         Ok(content) => content,
@@ -75,24 +91,18 @@ fn _get_imports_from_ipynb_file(path_str: &str) -> PyResult<HashMap<String, Vec<
         }
     };
 
-    let cells = notebook["cells"].as_array().ok_or_else(|| {
-        PySyntaxError::new_err("Invalid notebook structure: 'cells' is not an array")
-    })?;
-
-    let import_regex =
-        Regex::new(r"^(?:from\s+(\w+)(?:\.\w+)?\s+)?import\s+([^\s,.]+)(?:\.\w+)?").unwrap();
-
-    let import_statements: Vec<String> = cells
-        .iter()
-        .filter(|cell| cell["cell_type"] == "code")
-        .flat_map(|cell| cell["source"].as_array())
-        .flatten()
-        .filter_map(|line| line.as_str())
-        .filter(|line| import_regex.is_match(line))
-        .map(|line| line.to_string())
-        .collect();
+    let cells = match notebook["cells"].as_array() {
+        Some(cells) => cells,
+        None => {
+            log::warn!(
+                "Warning: File {} is not a valid notebook: 'cells' is not an array. Skipping...",
+                path_str
+            );
+            return Ok(HashMap::new());
+        }
+    };
 
-    let imports_script = import_statements.join("\n");
+    let imports_script = _extract_import_statements_from_notebook_cells(cells);
 
     let ast = get_ast_from_file_content(&imports_script, path_str)
         .map_err(|e| PySyntaxError::new_err(format!("Error parsing file {}: {}", path_str, e)))?;
diff --git a/tests/unit/imports/test_extract.py b/tests/unit/imports/test_extract.py
index 2a6fdd3a..03b849f0 100644
--- a/tests/unit/imports/test_extract.py
+++ b/tests/unit/imports/test_extract.py
@@ -119,7 +119,7 @@ def test_import_parser_file_encodings_ipynb(code_cell_content: list[str], encodi
             }
             f.write(json.dumps(file_content))
 
-        assert get_imported_modules_from_list_of_files([random_file]) == {"foo": [Location(random_file, 1, 0)]}
+        assert get_imported_modules_from_list_of_files([random_file]) == {"foo": [Location(random_file, 1, 8)]}
 
 
 def test_import_parser_file_encodings_warning(tmp_path: Path, caplog: LogCaptureFixture) -> None:
@@ -135,6 +135,6 @@ def test_import_parser_file_encodings_warning(tmp_path: Path, caplog: LogCapture
 
         # //TODO logging from Rust still includes it's own warning and file + line number. Can we get rid of that?
         pattern = re.compile(
-            r"WARNING  deptry.imports:imports.rs:\d+ Warning: File file1.py could not be read. Skipping...\n"
+            r"WARNING  deptry.imports.py:py.rs:\d+ Warning: File file1.py could not be read. Skipping...\n"
         )
         assert pattern.search(caplog.text) is not None

From f76794f12cecc5653c472e98647b3ca3afb020fe Mon Sep 17 00:00:00 2001
From: Florian Maas <fpgmaas@gmail.com>
Date: Sat, 16 Mar 2024 16:21:06 +0100
Subject: [PATCH 3/9] fix notebook extraction

---
 src/imports/ipynb.rs               | 17 ++++++-----------
 tests/unit/imports/test_extract.py |  4 ++--
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/src/imports/ipynb.rs b/src/imports/ipynb.rs
index 1cea7c7c..dd4d6d3a 100644
--- a/src/imports/ipynb.rs
+++ b/src/imports/ipynb.rs
@@ -7,7 +7,6 @@ use pyo3::exceptions::PySyntaxError;
 use pyo3::prelude::*;
 use pyo3::types::PyString;
 use rayon::prelude::*;
-use regex::Regex;
 use std::collections::HashMap;
 
 use super::shared::{
@@ -56,21 +55,17 @@ pub fn get_imports_from_ipynb_file(py: Python, file_path: &PyString) -> PyResult
     convert_to_python_dict(py, result)
 }
 
-fn _extract_import_statements_from_notebook_cells(cells: &[serde_json::Value]) -> String {
-    let import_regex =
-        Regex::new(r"^(?:from\s+(\w+)(?:\.\w+)?\s+)?import\s+([^\s,.]+)(?:\.\w+)?").unwrap();
-
-    let import_statements: Vec<String> = cells
+fn _extract_code_from_notebook_cells(cells: &[serde_json::Value]) -> String {
+    let code_lines: Vec<String> = cells
         .iter()
         .filter(|cell| cell["cell_type"] == "code")
         .flat_map(|cell| cell["source"].as_array())
         .flatten()
         .filter_map(|line| line.as_str())
-        .filter(|line| import_regex.is_match(line))
         .map(|line| line.to_string())
         .collect();
 
-    import_statements.join("\n")
+    code_lines.join("\n")
 }
 
 /// Core helper function that extracts import statements and their locations from the content of a single .ipynb file.
@@ -102,9 +97,9 @@ fn _get_imports_from_ipynb_file(path_str: &str) -> PyResult<HashMap<String, Vec<
         }
     };
 
-    let imports_script = _extract_import_statements_from_notebook_cells(cells);
+    let python_code = _extract_code_from_notebook_cells(cells);
 
-    let ast = get_ast_from_file_content(&imports_script, path_str)
+    let ast = get_ast_from_file_content(&python_code, path_str)
         .map_err(|e| PySyntaxError::new_err(format!("Error parsing file {}: {}", path_str, e)))?;
 
     let imported_modules = extract_imports_from_ast(ast);
@@ -112,6 +107,6 @@ fn _get_imports_from_ipynb_file(path_str: &str) -> PyResult<HashMap<String, Vec<
     Ok(convert_imports_with_textranges_to_location_objects(
         imported_modules,
         path_str,
-        &file_content,
+        &python_code,
     ))
 }
diff --git a/tests/unit/imports/test_extract.py b/tests/unit/imports/test_extract.py
index 03b849f0..3f0badb4 100644
--- a/tests/unit/imports/test_extract.py
+++ b/tests/unit/imports/test_extract.py
@@ -45,8 +45,8 @@ def test_import_parser_ipynb() -> None:
     notebook_path = Path("tests/data/example_project/src/notebook.ipynb")
 
     assert get_imported_modules_from_list_of_files([notebook_path]) == {
-        "click": [Location(notebook_path, 2, 6)],
-        "toml": [Location(notebook_path, 5, 9)],
+        "click": [Location(notebook_path, 1, 8)],
+        "toml": [Location(notebook_path, 5, 8)],
         "urllib3": [Location(notebook_path, 3, 1)],
     }
 

From 08f2f64a514fd51d1655e9de94a6afcea626e5b4 Mon Sep 17 00:00:00 2001
From: Florian Maas <fpgmaas@gmail.com>
Date: Sat, 16 Mar 2024 16:25:01 +0100
Subject: [PATCH 4/9] remove chardet

---
 pdm.lock       | 13 +------------
 pyproject.toml |  1 -
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/pdm.lock b/pdm.lock
index 28439bd4..3a38d00d 100644
--- a/pdm.lock
+++ b/pdm.lock
@@ -5,7 +5,7 @@
 groups = ["default", "dev", "docs", "typing"]
 strategy = ["cross_platform", "inherit_metadata"]
 lock_version = "4.4.1"
-content_hash = "sha256:b6907482d32747a9b5502c17aff64523173d8d2cdab8357e8755558793c14d81"
+content_hash = "sha256:075a94a78d7250b4bdc64a8e66445f285b6afec47004ca0e45aab3b0744eec29"
 
 [[package]]
 name = "babel"
@@ -43,17 +43,6 @@ files = [
     {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"},
 ]
 
-[[package]]
-name = "chardet"
-version = "5.2.0"
-requires_python = ">=3.7"
-summary = "Universal encoding detector for Python 3"
-groups = ["default"]
-files = [
-    {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"},
-    {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"},
-]
-
 [[package]]
 name = "charset-normalizer"
 version = "3.3.2"
diff --git a/pyproject.toml b/pyproject.toml
index eb5d4b41..a34128f7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,7 +22,6 @@ classifiers = [
     "Programming Language :: Python :: Implementation :: CPython",
 ]
 dependencies = [
-    "chardet>=4.0.0",
     "click>=8.0.0,<9",
     "pathspec>=0.9.0",
     "colorama>=0.4.6; sys_platform == 'win32'",

From d1e7813b8e2b14fe411f8e5b3a4ffe7e44575960 Mon Sep 17 00:00:00 2001
From: Florian Maas <fpgmaas@gmail.com>
Date: Sun, 17 Mar 2024 09:34:54 +0100
Subject: [PATCH 5/9] fix test

---
 tests/functional/cli/test_cli_requirements_txt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/functional/cli/test_cli_requirements_txt.py b/tests/functional/cli/test_cli_requirements_txt.py
index 59eccc17..4ca6f23b 100644
--- a/tests/functional/cli/test_cli_requirements_txt.py
+++ b/tests/functional/cli/test_cli_requirements_txt.py
@@ -98,7 +98,7 @@ def test_cli_single_requirements_txt(pip_venv_factory: PipVenvFactory) -> None:
                 "location": {
                     "file": str(Path("src/notebook.ipynb")),
                     "line": 3,
-                    "column": 0,
+                    "column": 1,
                 },
             },
         ]

From 55d1dc25bce362741c9bed2f27623ea3ed4bf4ca Mon Sep 17 00:00:00 2001
From: Florian Maas <fpgmaas@gmail.com>
Date: Mon, 18 Mar 2024 16:32:31 +0100
Subject: [PATCH 6/9] fix nit tests

---
 notebook_ok.ipynb                  |  1 -
 notebook_with_bad_encoding.ipynb   |  1 -
 notebook_with_syntax_error.ipynb   |  1 -
 tests/unit/imports/test_extract.py | 67 +++++++++++++++---------------
 4 files changed, 34 insertions(+), 36 deletions(-)
 delete mode 100644 notebook_ok.ipynb
 delete mode 100644 notebook_with_bad_encoding.ipynb
 delete mode 100644 notebook_with_syntax_error.ipynb

diff --git a/notebook_ok.ipynb b/notebook_ok.ipynb
deleted file mode 100644
index 7d0b4c84..00000000
--- a/notebook_ok.ipynb
+++ /dev/null
@@ -1 +0,0 @@
-{"cells": [{"cell_type": "code", "source": ["import numpy\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 2}
diff --git a/notebook_with_bad_encoding.ipynb b/notebook_with_bad_encoding.ipynb
deleted file mode 100644
index efead06e..00000000
--- a/notebook_with_bad_encoding.ipynb
+++ /dev/null
@@ -1 +0,0 @@
-{"cells": [{"cell_type": "code", "source": ["print('\u00e6\u00f8\u00e5')"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 2}
diff --git a/notebook_with_syntax_error.ipynb b/notebook_with_syntax_error.ipynb
deleted file mode 100644
index 2bcb357c..00000000
--- a/notebook_with_syntax_error.ipynb
+++ /dev/null
@@ -1 +0,0 @@
-{"cells": [{"cell_type": "code", "source": ["import n invalid_syntax:::\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 2}
diff --git a/tests/unit/imports/test_extract.py b/tests/unit/imports/test_extract.py
index 69177a53..091b2459 100644
--- a/tests/unit/imports/test_extract.py
+++ b/tests/unit/imports/test_extract.py
@@ -158,38 +158,39 @@ def test_import_parser_for_ipynb_errors(tmp_path: Path, caplog: LogCaptureFixtur
     notebook_ok = Path("notebook_ok.ipynb")
     notebook_with_syntax_error = Path("notebook_with_syntax_error.ipynb")
 
-    # Create a well-formed notebook
-    with notebook_ok.open("w") as f:
-        json.dump(
-            {
-                "cells": [{"cell_type": "code", "source": ["import numpy\n"]}],
-                "metadata": {},
-                "nbformat": 4,
-                "nbformat_minor": 2,
-            },
-            f,
-        )
+    with run_within_dir(tmp_path):
+        # Create a well-formed notebook
+        with notebook_ok.open("w") as f:
+            json.dump(
+                {
+                    "cells": [{"cell_type": "code", "source": ["import numpy\n"]}],
+                    "metadata": {},
+                    "nbformat": 4,
+                    "nbformat_minor": 2,
+                },
+                f,
+            )
+
+        # Create a notebook with invalid Python syntax in a code cell
+        with notebook_with_syntax_error.open("w") as f:
+            json.dump(
+                {
+                    "cells": [{"cell_type": "code", "source": ["import n invalid_syntax:::\n"]}],
+                    "metadata": {},
+                    "nbformat": 4,
+                    "nbformat_minor": 2,
+                },
+                f,
+            )
+
+        # Execute function and assert the result for well-formed notebook
+        with caplog.at_level(logging.WARNING):
+            assert get_imported_modules_from_list_of_files([
+                notebook_ok,
+                notebook_with_syntax_error,
+            ]) == {"numpy": [Location(file=Path("notebook_ok.ipynb"), line=1, column=8)]}
 
-    # Create a notebook with invalid Python syntax in a code cell
-    with notebook_with_syntax_error.open("w") as f:
-        json.dump(
-            {
-                "cells": [{"cell_type": "code", "source": ["import n invalid_syntax:::\n"]}],
-                "metadata": {},
-                "nbformat": 4,
-                "nbformat_minor": 2,
-            },
-            f,
+        assert re.search(
+            r"WARNING  .*:ipynb.rs:\d+ Warning: Skipping processing of notebook_with_syntax_error.ipynb because of the following error: \"SyntaxError: invalid syntax. Got unexpected token 'invalid_syntax' at byte offset 9\"",
+            caplog.text,
         )
-
-    # Execute function and assert the result for well-formed notebook
-    with caplog.at_level(logging.WARNING):
-        assert get_imported_modules_from_list_of_files([
-            notebook_ok,
-            notebook_with_syntax_error,
-        ]) == {"numpy": [Location(file=Path("notebook_ok.ipynb"), line=1, column=8)]}
-
-    assert re.search(
-        r"WARNING  .*:ipynb.rs:\d+ Warning: Skipping processing of notebook_with_syntax_error.ipynb because of the following error: \"SyntaxError: invalid syntax. Got unexpected token 'invalid_syntax' at byte offset 9\"",
-        caplog.text,
-    )

From fbfbbaf71accb49f34912f84deab7f0e083e2a1e Mon Sep 17 00:00:00 2001
From: Florian Maas <fpgmaas@gmail.com>
Date: Mon, 18 Mar 2024 17:22:59 +0100
Subject: [PATCH 7/9] added type aliasses

---
 package-lock.json                  |  6 +++
 src/imports/ipynb.rs               | 61 +++++++-----------------------
 src/imports/py.rs                  | 54 +++++++-------------------
 src/imports/shared.rs              | 45 +++++++++++++++++++++-
 tests/unit/imports/test_extract.py |  6 +--
 5 files changed, 79 insertions(+), 93 deletions(-)
 create mode 100644 package-lock.json

diff --git a/package-lock.json b/package-lock.json
new file mode 100644
index 00000000..11460110
--- /dev/null
+++ b/package-lock.json
@@ -0,0 +1,6 @@
+{
+  "name": "deptry",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {}
+}
diff --git a/src/imports/ipynb.rs b/src/imports/ipynb.rs
index 0c638891..496908e8 100644
--- a/src/imports/ipynb.rs
+++ b/src/imports/ipynb.rs
@@ -9,12 +9,9 @@ use pyo3::types::PyString;
 use rayon::prelude::*;
 use std::collections::HashMap;
 
-use super::shared::{
-    convert_imports_with_textranges_to_location_objects, convert_to_python_dict,
-    extract_imports_from_ast, get_ast_from_file_content,
-};
+use super::shared;
 
-/// Processes multiple .ipynb files in parallel to extract import statements and their locations.
+/// Processes multiple Python files in parallel to extract import statements and their locations.
 /// Accepts a list of file paths and returns a dictionary mapping module names to their import locations.
 #[pyfunction]
 pub fn get_imports_from_ipynb_files(py: Python, file_paths: Vec<&PyString>) -> PyResult<PyObject> {
@@ -23,68 +20,38 @@ pub fn get_imports_from_ipynb_files(py: Python, file_paths: Vec<&PyString>) -> P
         .map(|py_str| py_str.to_str().unwrap().to_owned())
         .collect();
 
-    // Process each file in parallel and collect results
     let results: Vec<_> = rust_file_paths
         .par_iter()
-        .map(|path_str| match _get_imports_from_ipynb_file(path_str) {
-            Ok(result) => (path_str, Ok(result)),
-            Err(e) => (path_str, Err(e)),
+        .map(|path_str| {
+            let result = _get_imports_from_ipynb_file(path_str);
+            shared::ThreadResult {
+                file: path_str.to_string(),
+                result,
+            }
         })
         .collect();
 
-    // Merge results from each thread
-    let mut all_imports = HashMap::new();
-    let mut errors = Vec::new();
-
-    for (path, file_result) in results {
-        match file_result {
-            Ok(file_result) => {
-                for (module, locations) in file_result {
-                    all_imports
-                        .entry(module)
-                        .or_insert_with(Vec::new)
-                        .extend(locations);
-                }
-            }
-            Err(e) => errors.push((path.to_string(), e)),
-        }
-    }
-
-    for (path, error) in errors {
-        log::warn!(
-            "Warning: Skipping processing of {} because of the following error: \"{}\".",
-            path,
-            error
-        );
-    }
+    let (all_imports, errors) = shared::merge_results_from_threads(results);
+    shared::log_python_errors_as_warnings(&errors);
 
-    convert_to_python_dict(py, all_imports)
+    shared::convert_to_python_dict(py, all_imports)
 }
 
 /// Core helper function that extracts import statements and their locations from a single .ipynb file.
 /// Ensures robust error handling and provides clearer, more detailed comments.
 fn _get_imports_from_ipynb_file(path_str: &str) -> PyResult<HashMap<String, Vec<Location>>> {
-    // Read the content of the .ipynb file, handling potential IO errors.
     let file_content = read_file(path_str)?;
-
-    // Deserialize the JSON content of the notebook, handling syntax errors.
     let notebook: serde_json::Value =
         serde_json::from_str(&file_content).map_err(|e| PySyntaxError::new_err(e.to_string()))?;
-
-    // Extract the code cells from the notebook, handling unexpected data structures.
     let cells = notebook["cells"]
         .as_array()
         .ok_or_else(|| PySyntaxError::new_err("Expected 'cells' to be an array"))?;
-
-    // Concatenate the code from all code cells into a single string.
     let python_code = _extract_code_from_notebook_cells(cells);
 
-    // Parse the Python code to AST and extract import statements.
-    let ast = get_ast_from_file_content(&python_code, path_str)?;
-    let imported_modules = extract_imports_from_ast(ast);
+    let ast = shared::get_ast_from_file_content(&python_code, path_str)?;
+    let imported_modules = shared::extract_imports_from_ast(ast);
 
-    // Convert the extracted import data into location objects.
-    Ok(convert_imports_with_textranges_to_location_objects(
+    Ok(shared::convert_imports_with_textranges_to_location_objects(
         imported_modules,
         path_str,
         &python_code,
diff --git a/src/imports/py.rs b/src/imports/py.rs
index 46f93ff7..e27a8684 100644
--- a/src/imports/py.rs
+++ b/src/imports/py.rs
@@ -8,10 +8,7 @@ use pyo3::types::PyString;
 use rayon::prelude::*;
 use std::collections::HashMap;
 
-use super::shared::{
-    convert_imports_with_textranges_to_location_objects, convert_to_python_dict,
-    extract_imports_from_ast, get_ast_from_file_content,
-};
+use super::shared;
 
 /// Processes multiple Python files in parallel to extract import statements and their locations.
 /// Accepts a list of file paths and returns a dictionary mapping module names to their import locations.
@@ -22,54 +19,29 @@ pub fn get_imports_from_py_files(py: Python, file_paths: Vec<&PyString>) -> PyRe
         .map(|py_str| py_str.to_str().unwrap().to_owned())
         .collect();
 
-    // Process each file in parallel and collect results
     let results: Vec<_> = rust_file_paths
         .par_iter()
-        .map(|path_str| match _get_imports_from_py_file(path_str) {
-            Ok(result) => (path_str, Ok(result)),
-            Err(e) => (path_str, Err(e)),
+        .map(|path_str| {
+            let result = _get_imports_from_py_file(path_str);
+            shared::ThreadResult {
+                file: path_str.to_string(),
+                result,
+            }
         })
         .collect();
 
-    // Merge results from each thread
-    let mut all_imports = HashMap::new();
-    let mut errors = Vec::new();
-
-    for (path, file_result) in results {
-        match file_result {
-            Ok(file_result) => {
-                for (module, locations) in file_result {
-                    all_imports
-                        .entry(module)
-                        .or_insert_with(Vec::new)
-                        .extend(locations);
-                }
-            }
-            Err(e) => errors.push((path.to_string(), e)),
-        }
-    }
-
-    for (path, error) in errors {
-        log::warn!(
-            "Warning: Skipping processing of {} because of the following error: \"{}\".",
-            path,
-            error
-        );
-    }
-
-    convert_to_python_dict(py, all_imports)
+    let (all_imports, errors) = shared::merge_results_from_threads(results);
+    shared::log_python_errors_as_warnings(&errors);
+    shared::convert_to_python_dict(py, all_imports)
 }
 
 /// Core helper function that extracts import statements and their locations from the content of a single Python file.
 /// Used internally by both parallel and single file processing functions.
 fn _get_imports_from_py_file(path_str: &str) -> PyResult<HashMap<String, Vec<Location>>> {
     let file_content = read_file(path_str)?;
-
-    let ast = get_ast_from_file_content(&file_content, path_str)?;
-
-    let imported_modules = extract_imports_from_ast(ast);
-
-    Ok(convert_imports_with_textranges_to_location_objects(
+    let ast = shared::get_ast_from_file_content(&file_content, path_str)?;
+    let imported_modules = shared::extract_imports_from_ast(ast);
+    Ok(shared::convert_imports_with_textranges_to_location_objects(
         imported_modules,
         path_str,
         &file_content,
diff --git a/src/imports/shared.rs b/src/imports/shared.rs
index b2afd04b..87ea3996 100644
--- a/src/imports/shared.rs
+++ b/src/imports/shared.rs
@@ -13,6 +13,14 @@ use rustpython_parser::{parse, Mode};
 use std::collections::HashMap;
 use visitor::ImportVisitor;
 
+pub type FileToImportsMap = HashMap<String, Vec<Location>>;
+pub type ErrorList = Vec<(String, PyErr)>;
+
+pub struct ThreadResult {
+    pub file: String,
+    pub result: PyResult<FileToImportsMap>,
+}
+
 /// Parses the content of a Python file into an abstract syntax tree (AST).
 pub fn get_ast_from_file_content(file_content: &str, file_path: &str) -> PyResult<Mod> {
     let ast = parse(file_content, Mode::Module, file_path)
@@ -40,7 +48,7 @@ pub fn convert_imports_with_textranges_to_location_objects(
     imports: HashMap<String, Vec<TextRange>>,
     file_path: &str,
     source_code: &str,
-) -> HashMap<String, Vec<Location>> {
+) -> FileToImportsMap {
     let line_index = LineIndex::from_source_text(source_code);
     let mut imports_with_locations = HashMap::<String, Vec<Location>>::new();
 
@@ -68,7 +76,7 @@ pub fn convert_imports_with_textranges_to_location_objects(
 /// Transforms a Rust HashMap containing import data into a Python dictionary suitable for Python-side consumption.
 pub fn convert_to_python_dict(
     py: Python<'_>,
-    imports_with_locations: HashMap<String, Vec<Location>>,
+    imports_with_locations: FileToImportsMap,
 ) -> PyResult<PyObject> {
     let imports_dict = PyDict::new(py);
 
@@ -83,3 +91,36 @@ pub fn convert_to_python_dict(
 
     Ok(imports_dict.into())
 }
+
+// Shared logic for merging results from different threads.
+pub fn merge_results_from_threads(results: Vec<ThreadResult>) -> (FileToImportsMap, ErrorList) {
+    let mut all_imports = HashMap::new();
+    let mut errors = Vec::new();
+
+    for thread_result in results {
+        match thread_result.result {
+            Ok(file_result) => {
+                for (module, locations) in file_result {
+                    all_imports
+                        .entry(module)
+                        .or_insert_with(Vec::new)
+                        .extend(locations);
+                }
+            }
+            Err(e) => errors.push((thread_result.file, e)),
+        }
+    }
+
+    (all_imports, errors)
+}
+
+// Shared logic for logging errors.
+pub fn log_python_errors_as_warnings(errors: &[(String, PyErr)]) {
+    for (path, error) in errors {
+        log::warn!(
+            "Warning: Skipping processing of {} because of the following error: \"{}\".",
+            path,
+            error
+        );
+    }
+}
diff --git a/tests/unit/imports/test_extract.py b/tests/unit/imports/test_extract.py
index 091b2459..165eebcd 100644
--- a/tests/unit/imports/test_extract.py
+++ b/tests/unit/imports/test_extract.py
@@ -145,11 +145,11 @@ def test_import_parser_errors(tmp_path: Path, caplog: LogCaptureFixture) -> None
             ]) == {"black": [Location(file=Path("file_ok.py"), line=1, column=8)]}
 
         assert re.search(
-            r"WARNING  deptry.imports.py:py.rs:\d+ Warning: Skipping processing of file_with_bad_encoding.py because of the following error: \"OSError: Failed to decode file content with the detected encoding.\".",
+            r"WARNING  .*:shared.rs:\d+ Warning: Skipping processing of file_with_bad_encoding.py because of the following error: \"OSError: Failed to decode file content with the detected encoding.\".",
             caplog.text,
         )
         assert re.search(
-            r"WARNING  deptry.imports.py:py.rs:\d+ Warning: Skipping processing of file_with_syntax_error.py because of the following error: \"SyntaxError: invalid syntax. Got unexpected token ':' at byte offset 15\".",
+            r"WARNING  .*:shared.rs:\d+ Warning: Skipping processing of file_with_syntax_error.py because of the following error: \"SyntaxError: invalid syntax. Got unexpected token ':' at byte offset 15\".",
             caplog.text,
         )
 
@@ -191,6 +191,6 @@ def test_import_parser_for_ipynb_errors(tmp_path: Path, caplog: LogCaptureFixtur
             ]) == {"numpy": [Location(file=Path("notebook_ok.ipynb"), line=1, column=8)]}
 
         assert re.search(
-            r"WARNING  .*:ipynb.rs:\d+ Warning: Skipping processing of notebook_with_syntax_error.ipynb because of the following error: \"SyntaxError: invalid syntax. Got unexpected token 'invalid_syntax' at byte offset 9\"",
+            r"WARNING  .*:shared.rs:\d+ Warning: Skipping processing of notebook_with_syntax_error.ipynb because of the following error: \"SyntaxError: invalid syntax. Got unexpected token 'invalid_syntax' at byte offset 9\"",
             caplog.text,
         )

From 4e8cea5314fcc0c39c7c5751f49c480f4d7cef34 Mon Sep 17 00:00:00 2001
From: Florian Maas <fpgmaas@gmail.com>
Date: Mon, 18 Mar 2024 17:29:31 +0100
Subject: [PATCH 8/9] remove missing function

---
 python/deptry/rust.pyi | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/deptry/rust.pyi b/python/deptry/rust.pyi
index 9cad70fa..98ae6f4a 100644
--- a/python/deptry/rust.pyi
+++ b/python/deptry/rust.pyi
@@ -2,8 +2,6 @@ from .rust import Location as RustLocation
 
 def get_imports_from_py_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ...
 def get_imports_from_ipynb_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ...
-def get_imports_from_py_file(file_path: str) -> dict[str, list[RustLocation]]: ...
-def get_imports_from_ipynb_file(file_path: str) -> dict[str, list[RustLocation]]: ...
 
 class Location:
     file: str

From 818722edc20abd05d8cfae45bbc03adc58f05932 Mon Sep 17 00:00:00 2001
From: Florian Maas <fpgmaas@gmail.com>
Date: Mon, 18 Mar 2024 17:31:51 +0100
Subject: [PATCH 9/9] remove file that was added by accident

---
 package-lock.json | 6 ------
 1 file changed, 6 deletions(-)
 delete mode 100644 package-lock.json

diff --git a/package-lock.json b/package-lock.json
deleted file mode 100644
index 11460110..00000000
--- a/package-lock.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-  "name": "deptry",
-  "lockfileVersion": 3,
-  "requires": true,
-  "packages": {}
-}