cased · tnm · Jan 7, 2026 · Jan 7, 2026
diff --git a/src/kit/pr_review/line_ref_fixer.py b/src/kit/pr_review/line_ref_fixer.py
@@ -2,10 +2,13 @@
 
 import bisect
 import re
-from typing import Dict, List, Tuple
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
 
 from .diff_parser import DiffParser
 
+if TYPE_CHECKING:
+    from .diff_parser import FileDiff
+
 
 class LineRefFixer:
     """Utility to validate and auto-fix file:line references in an AI review comment."""
@@ -15,8 +18,22 @@ class LineRefFixer:
     REF_PATTERN = re.compile(r"([\w./+-]+\.[a-zA-Z0-9]{1,10}):(\d+)(?:-(\d+))?")
 
     @classmethod
-    def _build_valid_line_map(cls, diff_text: str) -> Dict[str, set[int]]:
-        diff_files = DiffParser.parse_diff(diff_text)
+    def _build_valid_line_map(
+        cls,
+        diff_text_or_parsed: Union[str, Dict[str, "FileDiff"]],
+    ) -> Dict[str, set[int]]:
+        """Build map of valid line numbers from diff.
+
+        Args:
+            diff_text_or_parsed: Either raw diff text (str) or pre-parsed diff dict.
+                Passing pre-parsed diff avoids redundant parsing when caller
+                has already parsed the diff.
+        """
+        if isinstance(diff_text_or_parsed, str):
+            diff_files = DiffParser.parse_diff(diff_text_or_parsed)
+        else:
+            diff_files = diff_text_or_parsed
+
         valid: Dict[str, set[int]] = {}
         for filename, fd in diff_files.items():
             line_set: set[int] = set()
@@ -31,12 +48,25 @@ def _build_valid_line_map(cls, diff_text: str) -> Dict[str, set[int]]:
         return valid
 
     @classmethod
-    def fix_comment(cls, comment: str, diff_text: str) -> Tuple[str, List[Tuple[str, int, int]]]:
+    def fix_comment(
+        cls,
+        comment: str,
+        diff_text: str,
+        parsed_diff: Optional[Dict[str, "FileDiff"]] = None,
+    ) -> Tuple[str, List[Tuple[str, int, int]]]:
         """Return (fixed_comment, fixes).
 
-        fixes list items are (filename, old_line, new_line).
+        Args:
+            comment: The review comment text to fix.
+            diff_text: Raw diff text (used if parsed_diff not provided).
+            parsed_diff: Pre-parsed diff dict. If provided, avoids re-parsing
+                the diff which saves ~0.1ms per call.
+
+        Returns:
+            Tuple of (fixed_comment, fixes) where fixes is a list of
+            (filename, old_line, new_line) tuples.
         """
-        valid_map = cls._build_valid_line_map(diff_text)
+        valid_map = cls._build_valid_line_map(parsed_diff if parsed_diff else diff_text)
         # Convert sets to sorted lists once for O(log n) lookups
         sorted_lines_cache: Dict[str, List[int]] = {}
         fixes: List[Tuple[str, int, int]] = []

diff --git a/src/kit/pr_review/reviewer.py b/src/kit/pr_review/reviewer.py
@@ -394,7 +394,9 @@ def review_pr(self, pr_input: str) -> str:
                             if validation.metrics.get("line_reference_errors", 0) > 0:
                                 from .line_ref_fixer import LineRefFixer
 
-                                analysis, fixes = LineRefFixer.fix_comment(analysis, pr_diff)
+                                # Use cached parsed diff to avoid re-parsing
+                                cached_parsed = self.get_parsed_diff(owner, repo, pr_number)
+                                analysis, fixes = LineRefFixer.fix_comment(analysis, pr_diff, parsed_diff=cached_parsed)
                                 if fixes and not quiet:
                                     print(
                                         f"🔧 Auto-fixed {len(fixes) // (2 if any(f[1] != f[2] for f in fixes) else 1)} line reference(s)"
@@ -443,7 +445,11 @@ def review_pr(self, pr_input: str) -> str:
                                 if validation.metrics.get("line_reference_errors", 0) > 0:
                                     from .line_ref_fixer import LineRefFixer
 
-                                    analysis, fixes = LineRefFixer.fix_comment(analysis, pr_diff)
+                                    # Use cached parsed diff to avoid re-parsing
+                                    cached_parsed = self.get_parsed_diff(owner, repo, pr_number)
+                                    analysis, fixes = LineRefFixer.fix_comment(
+                                        analysis, pr_diff, parsed_diff=cached_parsed
+                                    )
                                     if fixes and not quiet:
                                         print(
                                             f"🔧 Auto-fixed {len(fixes) // (2 if any(f[1] != f[2] for f in fixes) else 1)} line reference(s)"
@@ -605,7 +611,8 @@ def review_local_diff(self, diff_spec: str, repo_path: str = ".") -> str:
                         if validation.metrics.get("line_reference_errors", 0) > 0:
                             from .line_ref_fixer import LineRefFixer
 
-                            analysis, fixes = LineRefFixer.fix_comment(analysis, diff_content)
+                            # Reuse already-parsed diff to avoid re-parsing
+                            analysis, fixes = LineRefFixer.fix_comment(analysis, diff_content, parsed_diff=parsed_diff)
                             if fixes and not quiet:
                                 is_different = [f[1] != f[2] for f in fixes]
                                 divisor = 2 if any(is_different) else 1

diff --git a/src/kit/vector_searcher.py b/src/kit/vector_searcher.py
@@ -327,25 +327,72 @@ def __init__(self, repo, embed_fn, backend: Optional[VectorDBBackend] = None, pe
         self.chunk_metadatas: List[Dict[str, Any]] = []
         self.chunk_embeddings: List[List[float]] = []
 
-    def build_index(self, chunk_by: str = "symbols"):
+    def build_index(self, chunk_by: str = "symbols", parallel: bool = True, max_workers: Optional[int] = None):
+        """Build the vector index from repository files.
+
+        Args:
+            chunk_by: Chunking strategy - "symbols" or "lines"
+            parallel: Whether to process files in parallel (default True)
+            max_workers: Max parallel workers. Defaults to min(4, cpu_count).
+                Set via KIT_INDEXER_MAX_WORKERS env var.
+        """
         self.chunk_metadatas = []
         chunk_codes: List[str] = []
 
-        for file in self.repo.get_file_tree():
-            if file["is_dir"]:
-                continue
-            path = file["path"]
-            if chunk_by == "symbols":
-                chunks = self.repo.chunk_file_by_symbols(path)
-                for chunk in chunks:
-                    code = chunk["code"]
-                    self.chunk_metadatas.append({"file": path, **chunk})
-                    chunk_codes.append(code)
-            else:
-                chunks = self.repo.chunk_file_by_lines(path, max_lines=50)
-                for code in chunks:
-                    self.chunk_metadatas.append({"file": path, "code": code})
-                    chunk_codes.append(code)
+        files_to_process = [f["path"] for f in self.repo.get_file_tree() if not f["is_dir"]]
+
+        if parallel and len(files_to_process) > 1:
+            # Parallel processing for better performance on multi-core systems
+            from concurrent.futures import ThreadPoolExecutor, as_completed
+
+            if max_workers is None:
+                import os as _os
+
+                env_workers = _os.environ.get("KIT_INDEXER_MAX_WORKERS")
+                if env_workers:
+                    try:
+                        max_workers = int(env_workers)
+                    except ValueError:
+                        max_workers = None
+                if max_workers is None:
+                    cpu_count = _os.cpu_count() or 4
+                    max_workers = min(4, cpu_count)
+
+            def process_file(path: str) -> List[Dict[str, Any]]:
+                """Process a single file and return its chunks."""
+                if chunk_by == "symbols":
+                    chunks = self.repo.chunk_file_by_symbols(path)
+                    return [{"file": path, **chunk} for chunk in chunks]
+                else:
+                    chunks = self.repo.chunk_file_by_lines(path, max_lines=50)
+                    return [{"file": path, "code": code} for code in chunks]
+
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                futures = {executor.submit(process_file, path): path for path in files_to_process}
+                for future in as_completed(futures):
+                    try:
+                        file_chunks = future.result()
+                        for chunk in file_chunks:
+                            code = chunk.get("code", "")
+                            self.chunk_metadatas.append(chunk)
+                            chunk_codes.append(code)
+                    except Exception:
+                        # Skip files that fail to process
+                        pass
+        else:
+            # Sequential processing (fallback or single file)
+            for path in files_to_process:
+                if chunk_by == "symbols":
+                    chunks = self.repo.chunk_file_by_symbols(path)
+                    for chunk in chunks:
+                        code = chunk["code"]
+                        self.chunk_metadatas.append({"file": path, **chunk})
+                        chunk_codes.append(code)
+                else:
+                    chunks = self.repo.chunk_file_by_lines(path, max_lines=50)
+                    for code in chunks:
+                        self.chunk_metadatas.append({"file": path, "code": code})
+                        chunk_codes.append(code)
 
         # Embed in batch (attempt). Fallback to per-item if embed_fn doesn't support list input.
         if chunk_codes: