diff --git a/.gitignore b/.gitignore index 23c92f9..83793bb 100644 --- a/.gitignore +++ b/.gitignore @@ -130,3 +130,6 @@ dmypy.json # pixi .pixi/ + +# Used in debugging +explicit.txt diff --git a/conda_pypi/__init__.py b/conda_pypi/__init__.py index b6d2aa1..b2d8e7f 100644 --- a/conda_pypi/__init__.py +++ b/conda_pypi/__init__.py @@ -1,3 +1,5 @@ """ conda-pypi """ + +__version__ = "0.1.0" diff --git a/conda_pypi/cli/__init__.py b/conda_pypi/cli/__init__.py new file mode 100644 index 0000000..cb72ca6 --- /dev/null +++ b/conda_pypi/cli/__init__.py @@ -0,0 +1,3 @@ +from . import install, list, pip + +__all__ = ["install", "list", "pip"] diff --git a/conda_pypi/cli/install.py b/conda_pypi/cli/install.py new file mode 100644 index 0000000..7036b9b --- /dev/null +++ b/conda_pypi/cli/install.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import sys +from logging import getLogger +from pathlib import Path +from typing import TYPE_CHECKING + +from conda.base.context import context +from conda.common.io import Spinner +from conda.exceptions import CondaVerificationError, CondaFileIOError + +from ..main import run_pip_install, compute_record_sum, PyPIDistribution +from ..utils import get_env_site_packages + +if TYPE_CHECKING: + from typing import Iterable, Literal + +log = getLogger(f"conda.{__name__}") + + +def _prepare_pypi_transaction(lines: Iterable[str]) -> dict[str, dict[str, str]]: + pkgs = {} + for line in lines: + dist = PyPIDistribution.from_lockfile_line(line) + pkgs[(dist.name, dist.version)] = { + "url": dist.find_wheel_url(), + "hashes": dist.record_checksums, + } + return pkgs + + +def _verify_pypi_transaction( + prefix: str, + pkgs: dict[str, dict[str, str]], + on_error: Literal["ignore", "warn", "error"] = "warn", +): + site_packages = get_env_site_packages(prefix) + errors = [] + dist_infos = [path for path in site_packages.glob("*.dist-info") if path.is_dir()] + for (name, version), pkg in pkgs.items(): + norm_name = name.lower().replace("-", "_").replace(".", "_") + dist_info = next( + ( + d + for d in dist_infos + if d.stem.rsplit("-", 1) in ([name, version], [norm_name, version]) + ), + None, + ) + if not dist_info: + errors.append(f"Could not find installation for {name}=={version}") + continue + + expected_hashes = pkg.get("hashes") + if expected_hashes: + found_hashes = compute_record_sum(dist_info / "RECORD", expected_hashes.keys()) + log.info("Verifying %s==%s with %s", name, version, ", ".join(expected_hashes)) + for algo, expected_hash in expected_hashes.items(): + found_hash = found_hashes.get(algo) + if found_hash and expected_hash != found_hash: + msg = ( + "%s checksum for %s==%s didn't match! Expected=%s, found=%s", + algo, + name, + version, + expected_hash, + found_hash, + ) + if on_error == "warn": + log.warning(*msg) + elif on_error == "error": + errors.append(msg[0] % msg[1:]) + else: + log.debug(*msg) + if errors: + errors = "\n- ".join(errors) + raise CondaVerificationError(f"PyPI packages checksum verification failed:\n- {errors}") + + +def post_command(command: str) -> int: + if command not in ("install", "create"): + return 0 + + pypi_lines = _pypi_lines_from_paths() + if not pypi_lines: + return 0 + + with Spinner("\nPreparing PyPI transaction", enabled=not context.quiet, json=context.json): + pkgs = _prepare_pypi_transaction(pypi_lines) + + with Spinner("Executing PyPI transaction", enabled=not context.quiet, json=context.json): + run_pip_install( + context.target_prefix, + args=[pkg["url"] for pkg in pkgs.values()], + dry_run=context.dry_run, + quiet=context.quiet, + verbosity=context.verbosity, + force_reinstall=context.force_reinstall, + yes=context.always_yes, + check=True, + ) + + with Spinner("Verifying PyPI transaction", enabled=not context.quiet, json=context.json): + on_error_dict = {"disabled": "ignore", "warn": "warn", "enabled": "error"} + on_error = on_error_dict.get(context.safety_checks, "warn") + _verify_pypi_transaction(context.target_prefix, pkgs, on_error=on_error) + + return 0 + + +def _pypi_lines_from_paths(paths: Iterable[str] | None = None) -> list[str]: + if paths is None: + file_arg = context.raw_data["cmd_line"].get("file") + if file_arg is None: + return [] + paths = file_arg.value(None) + lines = [] + line_prefix = PyPIDistribution._line_prefix + for path in paths: + path = path.value(None) + try: + with open(path) as f: + for line in f: + if line.startswith(line_prefix): + lines.append(line[len(line_prefix) :]) + except OSError as exc: + raise CondaFileIOError(f"Could not process {path}") from exc + return lines diff --git a/conda_pypi/cli/list.py b/conda_pypi/cli/list.py new file mode 100644 index 0000000..9cf732d --- /dev/null +++ b/conda_pypi/cli/list.py @@ -0,0 +1,22 @@ +import sys +from conda.base.context import context + +from .. import __version__ +from ..main import pypi_lines_for_explicit_lockfile + + +def post_command(command: str): + if command != "list": + return + cmd_line = context.raw_data.get("cmd_line", {}) + if "--explicit" not in sys.argv and not cmd_line.get("explicit").value(None): + return + if "--no-pip" in sys.argv or not cmd_line.get("pip"): + return + checksums = ("md5",) if ("--md5" in sys.argv or cmd_line.get("md5").value(None)) else None + to_print = pypi_lines_for_explicit_lockfile(context.target_prefix, checksums=checksums) + if to_print: + sys.stdout.flush() + print(f"# The following lines were added by conda-pypi v{__version__}") + print("# This is an experimental feature subject to change. Do not use in production.") + print(*to_print, sep="\n") diff --git a/conda_pypi/cli.py b/conda_pypi/cli/pip.py similarity index 95% rename from conda_pypi/cli.py rename to conda_pypi/cli/pip.py index 5c6f940..507e203 100644 --- a/conda_pypi/cli.py +++ b/conda_pypi/cli/pip.py @@ -20,7 +20,7 @@ def configure_parser(parser: argparse.ArgumentParser): - from .dependencies import BACKENDS + from ..dependencies import BACKENDS add_parser_help(parser) add_parser_prefix(parser) @@ -69,14 +69,14 @@ def configure_parser(parser: argparse.ArgumentParser): def execute(args: argparse.Namespace) -> int: from conda.common.io import Spinner from conda.models.match_spec import MatchSpec - from .dependencies import analyze_dependencies - from .main import ( + from ..dependencies import analyze_dependencies + from ..main import ( validate_target_env, ensure_externally_managed, run_conda_install, run_pip_install, ) - from .utils import get_prefix + from ..utils import get_prefix prefix = get_prefix(args.prefix, args.name) packages_not_installed = validate_target_env(prefix, args.packages) @@ -150,7 +150,7 @@ def execute(args: argparse.Namespace) -> int: if pypi_specs: if not args.quiet or not args.json: print("Running pip install...") - retcode = run_pip_install( + process = run_pip_install( prefix, pypi_specs, dry_run=args.dry_run, @@ -159,8 +159,8 @@ def execute(args: argparse.Namespace) -> int: force_reinstall=args.force_reinstall, yes=args.yes, ) - if retcode: - return retcode + if process.returncode: + return process.returncode if os.environ.get("CONDA_BUILD_STATE") != "BUILD": ensure_externally_managed(prefix) return 0 diff --git a/conda_pypi/dependencies/pip.py b/conda_pypi/dependencies/pip.py index 8137463..fbd1c9b 100644 --- a/conda_pypi/dependencies/pip.py +++ b/conda_pypi/dependencies/pip.py @@ -1,15 +1,9 @@ from __future__ import annotations import json -import os from logging import getLogger from collections import defaultdict -from subprocess import run -from tempfile import NamedTemporaryFile - -from conda.exceptions import CondaError - -from ..utils import get_env_python +from ..main import dry_run_pip_json logger = getLogger(f"conda.{__name__}") @@ -19,43 +13,7 @@ def _analyze_with_pip( prefix: str | None = None, force_reinstall: bool = False, ) -> tuple[dict[str, list[str]], dict[str, list[str]]]: - # pip can output to stdout via `--report -` (dash), but this - # creates issues on Windows due to undecodable characters on some - # project descriptions (e.g. charset-normalizer, amusingly), which - # makes pip crash internally. Probably a bug on their end. - # So we use a temporary file instead to work with bytes. - json_output = NamedTemporaryFile(suffix=".json", delete=False) - json_output.close() # Prevent access errors on Windows - - cmd = [ - str(get_env_python(prefix)), - "-mpip", - "install", - "--dry-run", - "--ignore-installed", - *(("--force-reinstall",) if force_reinstall else ()), - "--report", - json_output.name, - *packages, - ] - process = run(cmd, capture_output=True, text=True) - if process.returncode != 0: - raise CondaError( - f"Failed to analyze dependencies with pip:\n" - f" command: {' '.join(cmd)}\n" - f" exit code: {process.returncode}\n" - f" stderr:\n{process.stderr}\n" - f" stdout:\n{process.stdout}\n" - ) - logger.debug("pip (%s) provided the following report:\n%s", " ".join(cmd), process.stdout) - - with open(json_output.name, "rb") as f: - # We need binary mode because the JSON output might - # contain weird unicode stuff (as part of the project - # description or README). - report = json.loads(f.read()) - os.unlink(json_output.name) - + report = dry_run_pip_json(("--prefix", prefix, *packages), force_reinstall) deps_from_pip = defaultdict(list) conda_deps = defaultdict(list) for item in report["install"]: diff --git a/conda_pypi/main.py b/conda_pypi/main.py index a2a2ea0..53545ac 100644 --- a/conda_pypi/main.py +++ b/conda_pypi/main.py @@ -1,24 +1,44 @@ from __future__ import annotations +import argparse +import json import os +import shlex +import sys +from csv import reader as csv_reader +from email.parser import HeaderParser from logging import getLogger from pathlib import Path -from subprocess import run -from typing import Iterable +from subprocess import run, CompletedProcess +from tempfile import NamedTemporaryFile +from typing import Any, Iterable, Literal try: from importlib.resources import files as importlib_files except ImportError: from importlib_resources import files as importlib_files -from conda.history import History + from conda.base.context import context +from conda.common.pkg_formats.python import PythonDistribution from conda.core.prefix_data import PrefixData +from conda.exceptions import InvalidVersionSpec +from conda.gateways.disk.read import compute_sum +from conda.models.enums import PackageType +from conda.models.records import PackageRecord +from conda.history import History from conda.cli.python_api import run_command from conda.exceptions import CondaError, CondaSystemExit from conda.models.match_spec import MatchSpec +from packaging.requirements import Requirement +from packaging.tags import parse_tag -from .utils import get_env_python, get_externally_managed_path, pypi_spec_variants +from .utils import ( + get_env_python, + get_env_site_packages, + get_externally_managed_path, + pypi_spec_variants, +) logger = getLogger(f"conda.{__name__}") HERE = Path(__file__).parent.resolve() @@ -83,24 +103,30 @@ def run_conda_install( def run_pip_install( prefix: Path, - specs: Iterable[str], + args: Iterable[str], upgrade: bool = False, dry_run: bool = False, quiet: bool = False, verbosity: int = 0, force_reinstall: bool = False, yes: bool = False, -) -> int: - if not specs: + capture_output: bool = False, + check: bool = True, +) -> CompletedProcess: + if not args: return 0 command = [ get_env_python(prefix), "-mpip", "install", "--no-deps", - "--prefix", - str(prefix), ] + if any( + flag in args for flag in ("--platform", "--abi", "--implementation", "--python-version") + ): + command += ["--target", str(get_env_site_packages(prefix))] + else: + command += ["--prefix", str(prefix)] if dry_run: command.append("--dry-run") if quiet: @@ -111,11 +137,21 @@ def run_pip_install( command.append("--force-reinstall") if upgrade: command.append("--upgrade") - command.extend(specs) + command.extend(args) logger.info("pip install command: %s", command) - process = run(command) - return process.returncode + process = run(command, capture_output=capture_output or check, text=capture_output or check) + if check and process.returncode: + raise CondaError( + f"Failed to run pip:\n" + f" command: {shlex.join(command)}\n" + f" exit code: {process.returncode}\n" + f" stderr:\n{process.stderr}\n" + f" stdout:\n{process.stdout}" + ) + logger.debug("pip install stdout:\n%s", process.stdout) + logger.debug("pip install stderr:\n%s", process.stderr) + return process def ensure_externally_managed(prefix: os.PathLike = None) -> Path: @@ -168,3 +204,345 @@ def ensure_target_env_has_externally_managed(command: str): path.unlink() else: raise ValueError(f"command {command} not recognized.") + + +def pypi_lines_for_explicit_lockfile( + prefix: Path | str, checksums: Iterable[Literal["md5", "sha256"]] | None = None +) -> list[str]: + """ + Write pip install pseudo commands for each non-conda-installed Python package in prefix. + See `PyPIDistribution.to_lockfile_line()` for more details. + """ + PrefixData._cache_.clear() + pd = PrefixData(str(prefix), pip_interop_enabled=True) + pd.load() + lines = [] + python_record = list(pd.query("python")) + assert len(python_record) == 1 + python_record = python_record[0] + for record in pd.iter_records(): + if record.package_type != PackageType.VIRTUAL_PYTHON_WHEEL: + continue + pypi_dist = PyPIDistribution.from_conda_record( + record, python_record, prefix, checksums=checksums + ) + if pypi_dist.editable: + continue + lines.append(pypi_dist.to_lockfile_line()) + return lines + + +def dry_run_pip_json( + args: Iterable[str], + ignore_installed: bool = True, + force_reinstall: bool = False, + python_version: str = "", + implementation: str = "", + abi: Iterable[str] = (), + platform: Iterable[str] = (), +) -> dict[str, Any]: + """ + Runs pip in dry-run mode with the goal of obtaining a JSON report that encodes + what would have been done. This is useful to invoke pip as a solver only, or + to obtain the URL of which wheel would have been installed for a particular set of constraints. + + It returns the parsed JSON payload as a dict. + """ + # pip can output to stdout via `--report -` (dash), but this + # creates issues on Windows due to undecodable characters on some + # project descriptions (e.g. charset-normalizer, amusingly), which + # makes pip crash internally. Probably a bug on their end. + # So we use a temporary file instead to work with bytes. + json_output = NamedTemporaryFile(suffix=".json", delete=False) + json_output.close() # Prevent access errors on Windows + + try: + cmd = [ + sys.executable, + "-mpip", + "install", + "--dry-run", + "--report", + json_output.name, + "--target", + json_output.name + ".dir", # This won't be created + ] + if ignore_installed: + cmd.append("--ignore-installed") + if force_reinstall: + cmd.append("--force-reinstall") + if python_version: + cmd += ["--python-version", python_version] + if implementation: + cmd += ["--implementation", implementation] + for tag in abi: + cmd += ["--abi", tag] + for tag in platform: + cmd += ["--platform", tag] + cmd += args + logger.info("pip dry-run command: %s", cmd) + process = run(cmd, capture_output=True, text=True) + if process.returncode != 0: + raise CondaError( + f"Failed to dry-run pip:\n" + f" command: {shlex.join(cmd)}\n" + f" exit code: {process.returncode}\n" + f" stderr:\n{process.stderr}\n" + f" stdout:\n{process.stdout}" + ) + logger.debug("pip dry-run stdout:\n%s", process.stdout) + logger.debug("pip dry-run stderr:\n%s", process.stderr) + with open(json_output.name, "rb") as f: + # We need binary mode because the JSON output might + # contain weird unicode stuff (as part of the project + # description or README). + return json.loads(f.read()) + finally: + os.unlink(json_output.name) + + +class PyPIDistribution: + _line_prefix = "# pypi: " + _arg_parser = None + + def __init__( + self, + name: str, + version: str, + python_version: str | None = None, + python_implementation: str | None = None, + python_abi_tags: Iterable[str] = (), + python_platform_tags: Iterable[str] = (), + record_checksums: dict[str, str] | None = None, + editable: bool = False, + ): + self.name = name + self.version = version + self.python_version = python_version + self.python_implementation = python_implementation + self.python_abi_tags = python_abi_tags or () + self.python_platform_tags = python_platform_tags or () + self.record_checksums = record_checksums or {} + self.editable = editable + self.url = None # currently no way to know, use .find_wheel_url() + + @classmethod + def from_conda_record( + cls, + record: PackageRecord, + python_record: PackageRecord, + prefix: str | Path, + checksums: Iterable[Literal["md5", "sha256"]] | None = None, + ) -> PyPIDistribution: + # Locate anchor file + sitepackages = get_env_site_packages(prefix) + if record.fn.endswith(".dist-info"): + anchor = sitepackages / record.fn / "METADATA" + elif record.fn.endswith(".egg-info"): + anchor = sitepackages / record.fn + if anchor.is_dir(): + anchor = anchor / "PKG-INFO" + else: + raise ValueError("Unrecognized anchor file for Python metadata") + + # Estimate python implementation out of build strings + python_version = ".".join(python_record.version.split(".")[:3]) + if "pypy" in python_record.build: + python_impl = "pp" + elif "cpython" in python_record.build: + python_impl = "cp" + else: + python_impl = None + + # Find the hash for the RECORD file + python_dist = PythonDistribution.init(prefix, str(anchor), python_record.version) + if checksums: + manifest = python_dist.manifest_full_path + record_checksums = compute_record_sum(manifest, checksums) + else: + record_checksums = None + + # Scan files for editable markers and wheel metadata + files = python_dist.get_paths() + editable = cls._is_record_editable(files) + wheel_file = next((path for path, *_ in files if path.endswith(".dist-info/WHEEL")), None) + if wheel_file: + wheel_details = cls._parse_wheel_file(Path(prefix, wheel_file)) + abi_tags, platform_tags = cls._tags_from_wheel(wheel_details) + else: + abi_tags, platform_tags = (), () + + return cls( + name=record.name, + version=record.version, + python_version=python_version, + python_implementation=python_impl, + record_checksums=record_checksums, + python_abi_tags=abi_tags, + python_platform_tags=platform_tags, + editable=editable, + ) + + @classmethod + def from_lockfile_line(cls, line: str | Iterable[str]): + if isinstance(line, str): + if line.startswith(cls._line_prefix): + line = line[len(cls._line_prefix):] + line = shlex.split(line.strip()) + if cls._arg_parser is None: + cls._arg_parser = cls._build_arg_parser() + args = cls._arg_parser.parse_args(line) + requirement = Requirement(args.spec) + specifiers = list(requirement.specifier) + if len(specifiers) != 1 or specifiers[0].operator != "==": + raise InvalidVersionSpec( + f"{args.spec} is not a valid requirement. " + "PyPI requirements must be exact; i.e. 'name==version'." + ) + pkg_name = requirement.name + version = specifiers[0].version + return cls( + name=pkg_name, + version=version, + python_version=args.python_version, + python_implementation=args.implementation, + python_abi_tags=args.abi, + python_platform_tags=args.platform, + ) + + def to_lockfile_line(self) -> list[str]: + """ + Builds a pseudo command-line input for a pip-like interface, with the goal of providing + enough information to retrieve a single wheel or sdist providing the package. The format is: + + ``` + # pypi: [==] [--python-version str] [--implementation str] [--abi str ...] + [--platform str ...] [--record-checksum =] + ``` + + All fields above should be part of the same line. The CLI mimics what `pip` currently + accepts, with the exception of `--record-checksum`, which is a custom addition. + + The value of `--record-checksum` is given by `compute_record_sum()`. + """ + if self.url: + return f"{self._line_prefix}{self.url}" + + line = ( + f"{self._line_prefix}{self.name}=={self.version}" + f" --python-version {self.python_version}" + f" --implementation {self.python_implementation}" + ) + for abi in self.python_abi_tags: + line += f" --abi {abi}" + for platform in self.python_platform_tags: + line += f" --platform {platform}" + for algo, checksum in self.record_checksums.items(): + line += f" --record-checksum={algo}:{checksum}" + + # Here we could invoke self.find_wheel_url() to get the resolved URL but I'm not sure it's + # guaranteed we get the exact same source so for now we defer to install time, which at + # least will give something compatible with the target machine + + return line + + def find_wheel_url(self) -> list[str]: + report = dry_run_pip_json( + ["--no-deps", f"{self.name}=={self.version}"], + python_version=self.python_version, + implementation=self.python_implementation, + abi=self.python_abi_tags, + platform=self.python_platform_tags, + ) + return report["install"][0]["download_info"]["url"] + + @staticmethod + def _parse_wheel_file(path) -> dict[str, list[str]]: + path = Path(path) + if not path.is_file(): + return {} + with open(path) as f: + parsed = HeaderParser().parse(f) + data = {} + for key, value in parsed.items(): + data.setdefault(key, []).append(value) + return data + + @staticmethod + def _tags_from_wheel(data: dict[str, Any]) -> tuple[tuple[str], tuple[str]]: + abi_tags = set() + platform_tags = set() + for tag_str in data.get("Tag", ()): + for tag in parse_tag(tag_str): + if tag.abi != "none": + abi_tags.add(tag.abi) + if tag.platform != "any": + platform_tags.add(tag.platform) + return tuple(abi_tags), tuple(platform_tags) + + @staticmethod + def _is_record_editable(files: tuple[str, str, int]) -> bool: + for path, *_ in files: + path = Path(path) + if "__editable__" in path.stem: + return True + if path.name == "direct_url.json" and path.parent.suffix == ".dist-info": + if path.is_file(): + data = json.loads(path.read_text()) + if data.get("dir_info", {}).get("editable"): + return True + return False + + @staticmethod + def _build_arg_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + parser.add_argument("spec") + parser.add_argument("--python-version") + parser.add_argument("--implementation") + parser.add_argument("--abi", action="append", default=[]) + parser.add_argument("--platform", action="append", default=[]) + parser.add_argument("--record-checksum", action="append", default=[]) + return parser + +def compute_record_sum(manifest: str, algos: Iterable[str] = ("sha256",)) -> dict[str, str]: + """ + Given a `RECORD` file, compute hashes out of a subset of its sorted contents. + + We skip `*.dist-info` files other than `METADATA` and `WHEEL`. + For non site-packages files, we only keep the path for those than fall in `bin`, `lib` + and `Scripts` because their hash and size might change with path relocation. + + The list of tuples `(path, hash, size)` is then sorted and written as JSON with no spaces or + indentation. This output is what gets hashed. + """ + contents = [] + try: + with open(manifest) as f: + reader = csv_reader(f, delimiter=",", quotechar='"') + for row in reader: + path, hash_, size = row + path = Path(path) + if size: + size = int(size) + if path.parts[0].endswith(".dist-info") and path.name not in ("METADATA", "WHEEL"): + # we only want to check the metadata and wheel parts of dist-info; everything else + # is not deterministic or useful + continue + if path.parts[0] == ".." and any( + part in path.parts for part in ("bin", "lib", "Scripts") + ): + # entry points are autogenerated and can have different hashes/size + # depending on prefix + hash_, size = "", 0 + contents.append((str(path), hash_, size)) + except OSError as exc: + logger.warning("Could not compute RECORD checksum for %s", manifest) + logger.debug("Could not open %s", manifest, exc_info=exc) + return + + try: + with NamedTemporaryFile("w", delete=False) as tmp: + tmp.write(json.dumps(contents, indent=0, separators=(",", ":"))) + return {algo: compute_sum(tmp.name, algo) for algo in algos} + finally: + os.unlink(tmp.name) diff --git a/conda_pypi/plugin.py b/conda_pypi/plugin.py index a95b6fb..38dc4c9 100644 --- a/conda_pypi/plugin.py +++ b/conda_pypi/plugin.py @@ -1,6 +1,6 @@ from conda import plugins -from .cli import configure_parser, execute +from . import cli from .main import ensure_target_env_has_externally_managed @@ -9,8 +9,8 @@ def conda_subcommands(): yield plugins.CondaSubcommand( name="pip", summary="Run pip commands within conda environments in a safer way", - action=execute, - configure_parser=configure_parser, + action=cli.pip.execute, + configure_parser=cli.pip.configure_parser, ) @@ -21,3 +21,13 @@ def conda_post_commands(): action=ensure_target_env_has_externally_managed, run_for={"install", "create", "update", "remove"}, ) + yield plugins.CondaPostCommand( + name="conda-pypi-post-list", + action=cli.list.post_command, + run_for={"list"}, + ) + yield plugins.CondaPostCommand( + name="conda-pypi-post-install-create", + action=cli.install.post_command, + run_for={"install", "create"}, + ) diff --git a/conda_pypi/utils.py b/conda_pypi/utils.py index 327bda3..ed025aa 100644 --- a/conda_pypi/utils.py +++ b/conda_pypi/utils.py @@ -31,22 +31,30 @@ def get_env_python(prefix: os.PathLike = None) -> Path: return prefix / "bin" / "python" -def get_env_stdlib(prefix: os.PathLike = None) -> Path: +def _get_env_sysconfig_path(key: str, prefix: os.PathLike = None) -> Path: prefix = Path(prefix or sys.prefix) if str(prefix) == sys.prefix: - return Path(sysconfig.get_path("stdlib")) + return Path(sysconfig.get_path(key)) return Path( check_output( [ get_env_python(prefix), "-c", - "import sysconfig; print(sysconfig.get_paths()['stdlib'])", + f"import sysconfig; print(sysconfig.get_paths()['{key}'])", ], text=True, ).strip() ) +def get_env_stdlib(prefix: os.PathLike = None) -> Path: + return _get_env_sysconfig_path("stdlib", prefix) + + +def get_env_site_packages(prefix: os.PathLike = None) -> Path: + return _get_env_sysconfig_path("purelib", prefix) + + def get_externally_managed_path(prefix: os.PathLike = None) -> Iterator[Path]: prefix = Path(prefix or sys.prefix) if os.name == "nt": diff --git a/docs/features.md b/docs/features.md new file mode 100644 index 0000000..ec3fa7c --- /dev/null +++ b/docs/features.md @@ -0,0 +1,65 @@ +# Features + +`conda-pypi` uses the `conda` plugin system to implement several features that make `conda` integrate better with the PyPI ecosystem: + +## The `conda pip` subcommand + +This new subcommand wraps `pip` (and/or other PyPI tools) so you can install PyPI packages (or their conda equivalents) in your conda environment in a safer way. + +The main logic currently works like this: + +1. Collect the PyPI requirements and execute `pip install --dry-run` to obtain a JSON report of "what would have happened". +2. The JSON report is parsed and the resolved dependencies are normalized and mapped to the configured conda channels via different sources (e.g. `cf-graph-countyfair`, `grayskull`, `parselmouth`). +3. The packages that were found on the configured conda channels are installed with `conda`. Those _not_ on conda are installed individually with `pip install --no-deps`. + +:::{admonition} Coming soon +:class: seealso + +Right now we are not disallowing compiled wheels, but we might add options in the future to only allow pure Python wheels via `whl2conda`. +::: + +(pypi-lines)= + +## `conda list` integrations + +`conda` has native support for listing PyPI dependencies as part of `conda list`. However, this is not enabled in all output modes. `conda list --explicit`, used sometimes as a lockfile replacement, does not include any information about the PyPI dependencies. + +We have added a post-command plugin to list PyPI dependencies via `# pypi:` comments. This is currently an experimental, non-standard extension of the file format subject to change. The syntax is: + +``` +# pypi: [==] [--python-version str] [--implementation str] [--abi str ...] [--platform str ...] [--record-checksum =] +``` + +All fields above should be part of the same line. The CLI mimics what `pip` currently accepts (as +of v24.0), with the exception of `--record-checksum`, which is a custom addition. +`--record-checksum` is currently calculated like this: + +1. Given a `RECORD` file, we parse it as a list of 3-tuples: path, hash and size. +2. We skip `*.dist-info` files other than `METADATA` and `WHEEL`. +3. For non site-packages files, we only keep the path for those than fall in `bin`, `lib` + and `Scripts` because their hash and size might change with path relocation. +4. The list of tuples `(path, hash, size)` is then sorted and written as a JSON string with no + spaces or indentation. +5. This is written to a temporary file and then hashed with MD5 or SHA256. + +## `conda install` integrations + +Another post-command plugin is also available to process `@EXPLICIT` lockfiles and search for `# pypi:` lines as discussed above. Again, this is experimental and subject to change. + +## `conda env` integrations + +:::{admonition} Coming soon +:class: seealso + +`environment.yml` files famously allow a `pip` subsection in their `dependencies`. This is handled internally by `conda env` via a `pip` subprocess. We are adding new plugin hooks so `conda-pypi` can handle these in the same way we do with the `conda pip` subcommand. +::: + +(externally-managed)= + +## Environment marker files + +`conda-pypi` adds support for [PEP-668](https://peps.python.org/pep-0668/)'s [`EXTERNALLY-MANAGED`](https://packaging.python.org/en/latest/specifications/externally-managed-environments/) environment marker files. + +This file will tell `pip` and other PyPI installers to not install or remove any packages in that environment, guiding the user towards a safer way of achieving the same result. In our case, the message will let you know that a `conda pip` subcommand is available (see above). + +With this file we mostly want to avoid accidental overwrites that could break your environment. You can still use `pip` directly if you want, but you'll need to add the `--break-system-packages` flag. diff --git a/docs/index.md b/docs/index.md index 0038a52..e08d1b4 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,8 +1,36 @@ # conda-pypi -Welcome to the conda-pypi documentation! +Welcome to the `conda-pypi` documentation! + +`conda-pypi` allows you to run `conda pip install ...` in a safe way, and many other things. + + +::::{grid} 2 + +:::{grid-item-card} 🏡 Getting started +:link: quickstart +:link-type: doc +New to `conda-pypi`? Start here to learn the essentials +::: + +:::{grid-item-card} 💡 Motivation and vision +:link: why +:link-type: doc +Read about why `conda-pypi` exists and when you should use it +::: + +:::{grid-item-card} 🍱 Features +:link: features +:link-type: doc +Overview of what `conda-pypi` can do for you +::: + +:::: ```{toctree} +:hidden: + quickstart why +features ``` diff --git a/docs/quickstart.md b/docs/quickstart.md index f895f9e..551f159 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -10,42 +10,29 @@ conda install -n base conda-pypi ## Basic usage -`conda-pypi` provides several functionalities: +`conda-pypi` provides several {doc}`features`. Some of them are discussed here: -- A `conda pip` subcommand -- A `post_command` hook that will place environment proctection markers +### Safer pip installations -Their usage is discussed below. - -### New environments - -You need to create a new environment with `python` _and_ `pip`, because we will rely on the target `pip` to process the potential PyPI dependencies: - -``` -conda create -n my-python-env python pip -``` - -### Existing environments - -Assuming you have an activated conda environment named `my-python-env` that includes `python` and `pip` installed: +Assuming you have an activated conda environment named `my-python-env` that includes `python` and `pip` installed, and `conda-forge` in your configured channels, you can run `conda pip` like this: ``` conda pip install requests ``` -This will install `requests` from conda, along with all its dependencies, because everything is available. The dependency tree translates one-to-one from PyPI to conda, so there are no issues. +This will install `requests` from conda-forge, along with all its dependencies, because everything is available there. The dependency tree translates one-to-one from PyPI to conda, so there are no issues. ``` conda pip install build ``` -This will install the `python-build` package from conda-forge. Note how `conda pip` knows how to map the different project names. This is done via semi-automated mappings provided by the `grayskull` and `cf-graph-countyfair` projects. +This will install the `python-build` package from conda-forge. Note how `conda pip` knows how to map the different project names. This is done via semi-automated mappings provided by the `grayskull`, `cf-graph-countyfair` and `parselmouth` projects. ``` conda pip install PyQt5 ``` -This will install `pyqt=5` from conda, which also brings `qt=5` separately. This is because `pyqt` on conda _depennds_ on the Qt libraries instead of bundling them in the same package. Again, the `PyQt5 -> pyqt` mapping is handled as expected. +This will install `pyqt=5` from conda, which also brings `qt=5` separately. This is because `pyqt` on conda _depends_ on the Qt libraries instead of bundling them in the same package. Again, the `PyQt5 -> pyqt` mapping is handled as expected. ``` conda pip install ib_insync @@ -61,6 +48,46 @@ conda pip install 5-exercise-upload-to-pypi This package is not available on conda-forge. We will analyze the dependency tree and install all the available ones with `conda`. The rest will be installed with `pip install --no-deps`. +### Lockfiles support + +`conda-pypi` integrates with `conda list --explicit` to add some custom comments so your `@EXPLICIT` lockfiles contain PyPI information. `conda-pypi` also integrates with `conda install` and `conda create` to process these special lines. See more at {ref}`pypi-lines`. + +You can generate these lockfiles with `conda list --explicit --md5`, and they will look like this: + +``` +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: osx-arm64 +@EXPLICIT +https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-h93a5062_5.conda#1bbc659ca658bfd49a481b5ef7a0f40f +https://conda.anaconda.org/conda-forge/osx-arm64/ca-certificates-2024.2.2-hf0a4a13_0.conda#fb416a1795f18dcc5a038bc2dc54edf9 +https://conda.anaconda.org/conda-forge/osx-arm64/libexpat-2.6.2-hebf3989_0.conda#e3cde7cfa87f82f7cb13d482d5e0ad09 +https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.4.2-h3422bc3_5.tar.bz2#086914b672be056eb70fd4285b6783b6 +https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.2.13-h53f4e23_5.conda#1a47f5236db2e06a320ffa0392f81bd8 +https://conda.anaconda.org/conda-forge/osx-arm64/ncurses-6.4.20240210-h078ce10_0.conda#616ae8691e6608527d0071e6766dcb81 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/osx-arm64/xz-5.2.6-h57fd34a_0.tar.bz2#39c6b54e94014701dd157f4f576ed211 +https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.45.2-h091b4b1_0.conda#9d07427ee5bd9afd1e11ce14368a48d6 +https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.2.1-h0d3ecfb_1.conda#eb580fb888d93d5d550c557323ac5cee +https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.2-h92ec313_1.conda#8cbb776a2f641b943d413b3e19df71f4 +https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h5083fa2_1.conda#b50a57ba89c32b62428b71a875291c9b +https://conda.anaconda.org/conda-forge/osx-arm64/python-3.12.2-hdf0ec26_0_cpython.conda#85e91138ae921a2771f57a50120272bd +https://conda.anaconda.org/conda-forge/noarch/absl-py-2.1.0-pyhd8ed1ab_0.conda#035d1d58677c13ec93122d9eb6b8803b +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.2.0-pyhd8ed1ab_0.conda#da214ecd521a720a9d521c68047682dc +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +# The following lines were added by conda-pypi v0.1.0 +# This is an experimental feature subject to change. Do not use in production. +# pypi: charset-normalizer==3.3.2 --python-version 3.12.2 --implementation cp --abi cp312 --platform macosx_11_0_arm64 --record-checksum=md5:a88a07f3a23748b3d78b24ca3812e7d8 +# pypi: certifi==2024.2.2 --python-version 3.12.2 --implementation cp --record-checksum=md5:1c186605aa7d0c050cf4ef147fcf750d +# pypi: tf-slim==1.1.0 --python-version 3.12.2 --implementation cp --record-checksum=md5:96c65c0d90cd8c93f3bbe22ee34190c5 +# pypi: aaargh==0.7.1 --python-version 3.12.2 --implementation cp --record-checksum=md5:55f5aa1765064955792866812afdef6f +# pypi: requests==2.32.2 --python-version 3.12.2 --implementation cp --record-checksum=md5:d7e8849718b3ffb565fd3cbe2575ea97 +# pypi: 5-exercise-upload-to-pypi==1.2 --python-version 3.12.2 --implementation cp --record-checksum=md5:c96a1cd6037f6e3b659e2139b0839c97 +# pypi: idna==3.7 --python-version 3.12.2 --implementation cp --record-checksum=md5:5b2f9f2c52705a9b1e32818f1b387356 +# pypi: urllib3==2.2.1 --python-version 3.12.2 --implementation cp --record-checksum=md5:1bd9312a95c73a644f721ca96c9d8b45 +``` + ### Environment protection `conda-pypi` ships a special file, `EXTERNALLY-MANAGED`, that will be installed in: @@ -69,6 +96,4 @@ This package is not available on conda-forge. We will analyze the dependency tre - All new environments that include `pip`. - Existing environments that `pip`, but only after running a conda command on them. -This file is designed after [PEP668](https://peps.python.org/pep-0668/). You can read more about in [Externally Managed Environments at packaging.python.org](https://packaging.python.org/en/latest/specifications/externally-managed-environments/). - -Essentially, the presence of this file in a given environment will prevent users from using `pip` directly on them. An [informative error message](https://github.com/jaimergp/conda-pip/blob/main/conda_pypi/data/EXTERNALLY-MANAGED) is provided instead. +More details at {ref}`externally-managed`. diff --git a/docs/why.md b/docs/why.md index 65d1e22..c7be388 100644 --- a/docs/why.md +++ b/docs/why.md @@ -54,4 +54,4 @@ Are we expecting you to do all that manually? Of course not! This is what `conda ## Expected behavior -Refer to the [Quickstart guide](quickstart.md). +Refer to the [Quick start guide](quickstart.md). diff --git a/tests/test_install.py b/tests/test_install.py index 5f96093..f1d3cda 100644 --- a/tests/test_install.py +++ b/tests/test_install.py @@ -1,14 +1,17 @@ from __future__ import annotations import sys +from pathlib import Path +from subprocess import run +from typing import Iterable import pytest - from conda.core.prefix_data import PrefixData from conda.models.match_spec import MatchSpec from conda.testing import CondaCLIFixture, TmpEnvFixture from conda_pypi.dependencies import NAME_MAPPINGS, BACKENDS, _pypi_spec_to_conda_spec +from conda_pypi.utils import get_env_python @pytest.mark.parametrize("source", NAME_MAPPINGS.keys()) @@ -16,14 +19,15 @@ def test_mappings_one_by_one(source: str): assert _pypi_spec_to_conda_spec("build", sources=(source,)) == "python-build" -@pytest.mark.parametrize("pypi_spec,conda_spec", +@pytest.mark.parametrize( + "pypi_spec,conda_spec", [ ("numpy", "numpy"), ("build", "python-build"), ("ib_insync", "ib-insync"), ("pyqt5", "pyqt>=5.0.0,<6.0.0.0dev0"), ("PyQt5", "pyqt>=5.0.0,<6.0.0.0dev0"), - ] + ], ) def test_mappings_fallback(pypi_spec: str, conda_spec: str): assert MatchSpec(_pypi_spec_to_conda_spec(pypi_spec)) == MatchSpec(conda_spec) @@ -102,10 +106,11 @@ def test_spec_normalization( assert "All packages are already installed." in out + err -@pytest.mark.parametrize("pypi_spec,requested_conda_spec,installed_conda_specs", +@pytest.mark.parametrize( + "pypi_spec,requested_conda_spec,installed_conda_specs", [ ("PyQt5", "pyqt[version='>=5.0.0,<6.0.0.0dev0']", ("pyqt-5", "qt-main-5")), - ] + ], ) def test_pyqt( tmp_env: TmpEnvFixture, @@ -122,4 +127,70 @@ def test_pyqt( assert requested_conda_spec in out for conda_spec in installed_conda_specs: assert conda_spec in out - + + +@pytest.mark.parametrize("specs", (("requests",),)) +@pytest.mark.parametrize("pure_pip", (True, False)) +@pytest.mark.parametrize("with_md5", (True, False)) +def test_lockfile_roundtrip( + tmp_path: Path, + tmp_env: TmpEnvFixture, + conda_cli: CondaCLIFixture, + specs: Iterable[str], + pure_pip: bool, + with_md5: bool, +): + md5 = ("--md5",) if with_md5 else () + with tmp_env("python=3.9", "pip") as prefix: + if pure_pip: + p = run( + [get_env_python(prefix), "-mpip", "install", "--break-system-packages", *specs], + capture_output=True, + text=True, + check=False, + ) + print(p.stdout) + print(p.stderr, file=sys.stderr) + assert p.returncode == 0 + else: + out, err, rc = conda_cli("pip", "--prefix", prefix, "--yes", "install", *specs) + print(out) + print(err, file=sys.stderr) + assert rc == 0 + out, err, rc = conda_cli("list", "--explicit", "--prefix", prefix, *md5) + print(out) + print(err, file=sys.stderr) + assert rc == 0 + if pure_pip: + assert "# pypi: requests" in out + if md5: + assert "--record-checksum=md5:" in out + + (tmp_path / "lockfile.txt").write_text(out) + p = run( + [ + sys.executable, + "-mconda", + "create", + "--prefix", + tmp_path / "env", + "--file", + tmp_path / "lockfile.txt", + ], + capture_output=True, + text=True, + check=False, + ) + print(p.stdout) + print(p.stderr, file=sys.stderr) + assert p.returncode == 0 + if pure_pip: + assert "Preparing PyPI transaction" in p.stdout + assert "Executing PyPI transaction" in p.stdout + assert "Verifying PyPI transaction" in p.stdout + + out2, err2, rc2 = conda_cli("list", "--explicit", *md5, "--prefix", tmp_path / "env") + print(out2) + print(err2, file=sys.stderr) + assert rc2 == 0 + assert sorted(out2.splitlines()) == sorted(out.splitlines())