Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 185 additions & 1 deletion amplifier_foundation/modules/install_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,105 @@
Tracks fingerprints of installed modules to skip redundant `uv pip install` calls.
When a module's pyproject.toml/requirements.txt hasn't changed, we can skip
the install step entirely, significantly speeding up startup.

Self-healing: Also verifies that declared dependencies are actually installed.
This catches stale state after `uv tool install --force` wipes the venv.
"""

from __future__ import annotations

import hashlib
import importlib.metadata
import json
import logging
import re
import sys
import tempfile
from pathlib import Path

logger = logging.getLogger(__name__)


def _extract_dependencies_from_pyproject(pyproject_path: Path) -> list[str]:
"""Extract dependency names from a pyproject.toml file.

Args:
pyproject_path: Path to pyproject.toml file.

Returns:
List of dependency package names (without version specifiers).
"""
if not pyproject_path.exists():
return []

try:
# Use tomllib (Python 3.11+) or tomli as fallback
try:
import tomllib
except ImportError:
try:
import tomli as tomllib # type: ignore[import-not-found]
except ImportError:
# No TOML parser available - skip dependency check
logger.debug("No TOML parser available, skipping dependency extraction")
return []

with open(pyproject_path, "rb") as f:
config = tomllib.load(f)
except Exception as e:
logger.debug(f"Failed to parse {pyproject_path}: {e}")
return []

deps = []

# Get dependencies from [project.dependencies]
project_deps = config.get("project", {}).get("dependencies", [])
for dep in project_deps:
# Parse dependency string like "aiohttp>=3.8", "requests[security]", or "zope.interface>=5.0"
# Extract the full package name including dots (for namespace packages)
# Stops at: whitespace, extras [...], version specifiers [<>=!~], markers [;], URL [@]
match = re.match(r"^([a-zA-Z0-9._-]+?)(?:\s|\[|[<>=!~;@]|$)", dep)
if match:
deps.append(match.group(1))

return deps


def _check_dependency_installed(dep_name: str) -> bool:
"""Check if a dependency is installed in the current environment.

Uses importlib.metadata to check by distribution name, which correctly
handles packages where the import name differs from the package name
(e.g., Pillow -> PIL, beautifulsoup4 -> bs4, scikit-learn -> sklearn).

Args:
dep_name: Package/distribution name (e.g., "aiohttp", "Pillow").

Returns:
True if the package is installed, False otherwise.
"""
# Normalize for comparison: PEP 503 says package names are case-insensitive
# and treats hyphens/underscores as equivalent
normalized = dep_name.lower().replace("-", "_").replace(".", "_")

try:
# Try exact name first
importlib.metadata.distribution(dep_name)
return True
except importlib.metadata.PackageNotFoundError:
pass

# Try normalized variations (handles case differences and hyphen/underscore)
for variation in [normalized, normalized.replace("_", "-")]:
try:
importlib.metadata.distribution(variation)
return True
except importlib.metadata.PackageNotFoundError:
continue

return False


class InstallStateManager:
"""Tracks module installation state for fast startup.

Expand Down Expand Up @@ -105,11 +190,17 @@ def _compute_fingerprint(self, module_path: Path) -> str:
def is_installed(self, module_path: Path) -> bool:
"""Check if module is already installed with matching fingerprint.

Also verifies that declared dependencies are actually present in the
Python environment. This catches stale install state after operations
like `uv tool install --force` that wipe the venv but don't clear
the install-state.json file.

Args:
module_path: Path to the module directory.

Returns:
True if module is installed and fingerprint matches.
True if module is installed, fingerprint matches, AND all
dependencies are actually present.
"""
path_key = str(module_path.resolve())
entry = self._state["modules"].get(path_key)
Expand All @@ -127,6 +218,25 @@ def is_installed(self, module_path: Path) -> bool:
)
return False

# Self-healing: Verify dependencies are actually installed
# This catches stale state after venv wipe (e.g., uv tool install --force)
pyproject_path = module_path / "pyproject.toml"
deps = _extract_dependencies_from_pyproject(pyproject_path)

missing_deps = []
for dep in deps:
if not _check_dependency_installed(dep):
missing_deps.append(dep)

if missing_deps:
logger.info(
f"Module {module_path.name} has missing dependencies: {missing_deps}. "
f"Will reinstall."
)
# Invalidate this entry so save() will persist the change
self.invalidate(module_path)
return False

return True

def mark_installed(self, module_path: Path) -> None:
Expand Down Expand Up @@ -191,3 +301,77 @@ def invalidate(self, module_path: Path | None = None) -> None:
del self._state["modules"][path_key]
self._dirty = True
logger.debug(f"Invalidated install state for {module_path.name}")

def clear(self) -> None:
"""Clear all module install state.

This is a convenience method equivalent to `invalidate(None)`.
Use after operations that may have invalidated the Python environment,
such as `amplifier reset --remove cache`.

Changes are not persisted until `save()` is called.
"""
self.invalidate(None)

def invalidate_modules_with_missing_deps(self) -> tuple[int, int]:
"""Surgically invalidate only modules whose dependencies are missing.

Checks each tracked module's declared dependencies against what's
actually installed in the Python environment. Only invalidates entries
for modules that have missing dependencies.

This is useful after operations like `uv tool install --force` that
recreate the Python environment but don't clear install-state.json.
Modules with all dependencies still satisfied won't be reinstalled.

Returns:
Tuple of (modules_checked, modules_invalidated).

Note:
Changes are persisted immediately (calls save() internally).
"""
modules = self._state.get("modules", {})
if not modules:
logger.debug("No modules in install state to check")
return (0, 0)

modules_checked = 0
modules_to_invalidate = []

for module_path_str in list(modules.keys()):
module_path = Path(module_path_str)

# Module directory no longer exists - mark for invalidation
if not module_path.exists():
modules_to_invalidate.append(module_path_str)
continue

pyproject_path = module_path / "pyproject.toml"
deps = _extract_dependencies_from_pyproject(pyproject_path)
modules_checked += 1

# Check if all dependencies are installed
missing_deps = []
for dep in deps:
if not _check_dependency_installed(dep):
missing_deps.append(dep)

if missing_deps:
logger.debug(
f"Module {module_path.name} has missing deps: {missing_deps}"
)
modules_to_invalidate.append(module_path_str)

# Remove invalidated entries
for path_str in modules_to_invalidate:
del self._state["modules"][path_str]
module_name = Path(path_str).name
logger.info(
f"Invalidated install state for {module_name} (missing dependencies)"
)

if modules_to_invalidate:
self._dirty = True
self.save()

return (modules_checked, len(modules_to_invalidate))
Loading