From e7371978f60ebd5c8a28c8b4dfe7b8429da9f55d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Fri, 26 May 2023 18:08:49 +0200 Subject: [PATCH 01/15] Make `kedro micropkg package` accept `--verbose` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- kedro/framework/cli/micropkg.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kedro/framework/cli/micropkg.py b/kedro/framework/cli/micropkg.py index ce7d28fb8b..e2e157af75 100644 --- a/kedro/framework/cli/micropkg.py +++ b/kedro/framework/cli/micropkg.py @@ -227,7 +227,7 @@ def _package_micropkgs_from_manifest(metadata: ProjectMetadata) -> None: click.secho("Micro-packages packaged!", fg="green") -@micropkg.command("package") +@command_with_verbosity(micropkg, "package") @env_option( help="Environment where the micro-package configuration lives. Defaults to `base`." ) @@ -254,8 +254,14 @@ def _package_micropkgs_from_manifest(metadata: ProjectMetadata) -> None: @click.argument("module_path", nargs=1, required=False, callback=_check_module_path) @click.pass_obj # this will pass the metadata as first argument def package_micropkg( - metadata: ProjectMetadata, module_path, env, alias, destination, all_flag -): + metadata: ProjectMetadata, + module_path, + env, + alias, + destination, + all_flag, + **kwargs, +): # pylint: disable=unused-argument """Package up a modular pipeline or micro-package as a Python source distribution.""" if not module_path and not all_flag: click.secho( From 726d6f2da571a64bc6056e821d6b1303234b5d27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Mon, 29 May 2023 16:05:10 +0200 Subject: [PATCH 02/15] Improve error when `micropkg pull` does not find sdist Fix gh-2542. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- .../nodes_and_pipelines/micro_packaging.md | 4 +-- kedro/framework/cli/micropkg.py | 20 +++++++++---- .../cli/micropkg/test_micropkg_pull.py | 28 ++++++++++++++++--- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/docs/source/nodes_and_pipelines/micro_packaging.md b/docs/source/nodes_and_pipelines/micro_packaging.md index f034b14204..d72a0c3b4f 100644 --- a/docs/source/nodes_and_pipelines/micro_packaging.md +++ b/docs/source/nodes_and_pipelines/micro_packaging.md @@ -6,7 +6,7 @@ Micro-packaging allows users to share Kedro micro-packages across codebases, org You can package a micro-package by executing: `kedro micropkg package ` -* This will generate a new [tar](https://docs.python.org/3/distutils/sourcedist.html) file for this micro-package. +* This will generate a new [source distribution](https://docs.python.org/3/distutils/sourcedist.html) for this micro-package. * By default, the tar file will be saved into `dist/` directory inside your project. * You can customise the target with the `--destination` (`-d`) option. @@ -64,7 +64,7 @@ The examples above apply to any generic Python package, modular pipelines fall u You can pull a micro-package from a tar file by executing `kedro micropkg pull `. -* The `` must either be a package name on PyPI or a path to the tar file. +* The `` must either be a package name on PyPI or a path to the source distribution file. * Kedro will unpack the tar file, and install the files in following locations in your Kedro project: * All the micro-package code in `src///` * Configuration files in `conf//parameters/.yml`, where `` defaults to `base`. diff --git a/kedro/framework/cli/micropkg.py b/kedro/framework/cli/micropkg.py index e2e157af75..bc662885e5 100644 --- a/kedro/framework/cli/micropkg.py +++ b/kedro/framework/cli/micropkg.py @@ -330,15 +330,25 @@ def _unpack_sdist(location: str, destination: Path, fs_args: str | None) -> None safe_extract(tar_file, destination) else: python_call( - "pip", ["download", "--no-deps", "--dest", str(destination), location] + "pip", + [ + "download", + "--no-deps", + "--no-binary", # the micropackaging expects an sdist, + ":all:", # wheels are not supported + "--dest", + str(destination), + location, + ], ) sdist_file = list(destination.glob("*.tar.gz")) - # `--no-deps` should fetch only one source distribution file, and CLI should fail if that's - # not the case. - if len(sdist_file) != 1: + # `--no-deps --no-binary :all:` should fetch only one source distribution file, + # and CLI should fail if that's not the case. + # No need to check for zero sdists, in that case the previous command would fail. + if len(sdist_file) > 1: file_names = [sf.name for sf in sdist_file] raise KedroCliError( - f"More than 1 or no sdist files found: {file_names}. " + f"More than 1 sdist files found: {file_names}. " f"There has to be exactly one source distribution file." ) with tarfile.open(sdist_file[0], "r:gz") as fs_file: diff --git a/tests/framework/cli/micropkg/test_micropkg_pull.py b/tests/framework/cli/micropkg/test_micropkg_pull.py index 05552fb74b..b2d9031f77 100644 --- a/tests/framework/cli/micropkg/test_micropkg_pull.py +++ b/tests/framework/cli/micropkg/test_micropkg_pull.py @@ -645,6 +645,8 @@ def test_pull_from_pypi( [ "download", "--no-deps", + "--no-binary", + ":all:", "--dest", str(tmp_path), package_name, @@ -695,7 +697,16 @@ def test_invalid_pull_from_pypi( assert result.exit_code python_call_mock.assert_called_once_with( - "pip", ["download", "--no-deps", "--dest", str(tmp_path), invalid_pypi_name] + "pip", + [ + "download", + "--no-deps", + "--no-binary", + ":all:", + "--dest", + str(tmp_path), + invalid_pypi_name, + ], ) assert pypi_error_message in result.stdout @@ -724,14 +735,14 @@ def test_pull_from_pypi_more_than_one_sdist_file( ) assert result.exit_code - assert "Error: More than 1 or no sdist files found:" in result.output + assert "Error: More than 1 sdist files found:" in result.output def test_pull_unsupported_protocol_by_fsspec( self, fake_project_cli, fake_metadata, tmp_path, mocker ): protocol = "unsupported" exception_message = f"Protocol not known: {protocol}" - error_message = "Error: More than 1 or no sdist files found:" + error_message = "Error: More than 1 sdist files found:" package_path = f"{protocol}://{PIPELINE_NAME}" python_call_mock = mocker.patch("kedro.framework.cli.micropkg.python_call") @@ -750,7 +761,16 @@ def test_pull_unsupported_protocol_by_fsspec( assert result.exit_code filesystem_mock.assert_called_once_with(protocol) python_call_mock.assert_called_once_with( - "pip", ["download", "--no-deps", "--dest", str(tmp_path), package_path] + "pip", + [ + "download", + "--no-deps", + "--no-binary", + ":all:", + "--dest", + str(tmp_path), + package_path, + ], ) assert exception_message in result.output assert "Trying to use 'pip download'..." in result.output From 66120c77e17e6a5393aa21580000db2c26ff805f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Fri, 26 May 2023 18:09:36 +0200 Subject: [PATCH 03/15] Stop using pkg_resources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- kedro/framework/cli/micropkg.py | 42 +++++++++++++------ .../micropkg/test_micropkg_requirements.py | 2 +- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/kedro/framework/cli/micropkg.py b/kedro/framework/cli/micropkg.py index bc662885e5..7c9946afc7 100644 --- a/kedro/framework/cli/micropkg.py +++ b/kedro/framework/cli/micropkg.py @@ -11,7 +11,7 @@ from typing import Iterable, List, Tuple, Union import click -import pkg_resources +from packaging.requirements import InvalidRequirement, Requirement from rope.base.project import Project from rope.contrib import generate from rope.refactor.move import MoveModule @@ -584,13 +584,23 @@ def _sync_path_list(source: list[tuple[Path, str]], target: Path) -> None: _sync_dirs(source_path, target_with_suffix) +def _drop_comment(line): + # https://github.com/pypa/setuptools/blob/b545fc7/\ + # pkg_resources/_vendor/jaraco/text/__init__.py#L554-L566 + return line.partition(" #")[0] + + def _make_install_requires(requirements_txt: Path) -> list[str]: """Parses each line of requirements.txt into a version specifier valid to put in - install_requires.""" + install_requires. + Matches pkg_resources.parse_requirements""" if not requirements_txt.exists(): return [] - requirements = pkg_resources.parse_requirements(requirements_txt.read_text()) - return [str(requirement) for requirement in requirements] + return [ + str(Requirement(_drop_comment(requirement_line))) + for requirement_line in requirements_txt.read_text().splitlines() + if requirement_line and not requirement_line.startswith("#") + ] def _create_nested_package(project: Project, package_path: Path) -> Path: @@ -872,17 +882,25 @@ def _append_package_reqs( def _safe_parse_requirements( requirements: str | Iterable[str], -) -> set[pkg_resources.Requirement]: - """Safely parse a requirement or set of requirements. This effectively replaces - pkg_resources.parse_requirements, which blows up with a ValueError as soon as it +) -> set[Requirement]: + """Safely parse a requirement or set of requirements. This avoids blowing up when it encounters a requirement it cannot parse (e.g. `-r requirements.txt`). This way we can still extract all the parseable requirements out of a set containing some unparseable requirements. """ parseable_requirements = set() - for requirement in pkg_resources.yield_lines(requirements): - try: - parseable_requirements.add(pkg_resources.Requirement.parse(requirement)) - except ValueError: - continue + if isinstance(requirements, str): + requirements = requirements.splitlines() + # TODO: Properly handle continuation lines, + # see https://github.com/pypa/setuptools/blob/v67.8.0/setuptools/_reqs.py + for requirement_line in requirements: + if ( + requirement_line + and not requirement_line.startswith("#") + and not requirement_line.startswith("-e") + ): + try: + parseable_requirements.add(Requirement(_drop_comment(requirement_line))) + except InvalidRequirement: + continue return parseable_requirements diff --git a/tests/framework/cli/micropkg/test_micropkg_requirements.py b/tests/framework/cli/micropkg/test_micropkg_requirements.py index a0dbf69ba6..b0070a1bee 100644 --- a/tests/framework/cli/micropkg/test_micropkg_requirements.py +++ b/tests/framework/cli/micropkg/test_micropkg_requirements.py @@ -27,7 +27,7 @@ """ # These requirements can be used in requirements.txt but not in METADATA Requires-Dist. -# They cannot be parsed by pkg_resources. +# They cannot be parsed by packaging. COMPLEX_REQUIREMENTS = """--extra-index-url https://this.wont.work -r other_requirements.txt ./path/to/package.whl From 1456b8e176b0eb7952f512d362d09443bc526c05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Mon, 29 May 2023 20:02:59 +0200 Subject: [PATCH 04/15] Do not rely on setup.py to generate sdist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See gh-2414. Signed-off-by: Juan Luis Cano Rodríguez --- kedro/framework/cli/micropkg.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/kedro/framework/cli/micropkg.py b/kedro/framework/cli/micropkg.py index 7c9946afc7..e588bce4fb 100644 --- a/kedro/framework/cli/micropkg.py +++ b/kedro/framework/cli/micropkg.py @@ -775,9 +775,7 @@ def _generate_sdist_file( raise KedroCliError(f"{cls.__module__}.{cls.__qualname__}: {exc}") from exc _generate_manifest_file(temp_dir_path) - setup_file = _generate_setup_file( - package_name, version, install_requires, temp_dir_path - ) + _generate_setup_file(package_name, version, install_requires, temp_dir_path) package_file = destination / _get_sdist_name(name=package_name, version=version) @@ -786,14 +784,14 @@ def _generate_sdist_file( f"Package file {package_file} will be overwritten!", fg="yellow" ) - # python setup.py sdist --formats=gztar --dist-dir + # python -m build --outdir call( [ sys.executable, - str(setup_file.resolve()), - "sdist", - "--formats=gztar", - "--dist-dir", + "-m", + "build", + "--sdist", + "--outdir", str(destination), ], cwd=temp_dir, From 8dfe1038e170699acc6ac8905f80f693c5e00e38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Mon, 29 May 2023 17:12:25 +0200 Subject: [PATCH 05/15] Stop relying on .egg-info directories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix gh-2567. Signed-off-by: Juan Luis Cano Rodríguez --- kedro/framework/cli/micropkg.py | 78 ++++++++++++++----- .../cli/micropkg/test_micropkg_pull.py | 27 ------- 2 files changed, 59 insertions(+), 46 deletions(-) diff --git a/kedro/framework/cli/micropkg.py b/kedro/framework/cli/micropkg.py index e588bce4fb..7690f8430b 100644 --- a/kedro/framework/cli/micropkg.py +++ b/kedro/framework/cli/micropkg.py @@ -11,11 +11,13 @@ from typing import Iterable, List, Tuple, Union import click +from build.util import project_wheel_metadata from packaging.requirements import InvalidRequirement, Requirement from rope.base.project import Project from rope.contrib import generate from rope.refactor.move import MoveModule from rope.refactor.rename import Rename +from setuptools.discovery import FlatLayoutPackageFinder from kedro.framework.cli.pipeline import ( _assert_pkg_name_ok, @@ -131,7 +133,7 @@ def pull_package( # pylint:disable=unused-argument, too-many-arguments click.secho(message, fg="green") -# pylint: disable=too-many-arguments, too-many-locals +# pylint: disable=too-many-arguments def _pull_package( package_path: str, metadata: ProjectMetadata, @@ -144,25 +146,54 @@ def _pull_package( temp_dir_path = Path(temp_dir).resolve() _unpack_sdist(package_path, temp_dir_path, fs_args) - egg_info_files = list((temp_dir_path).rglob("*.egg-info")) - if len(egg_info_files) != 1: - raise KedroCliError( - f"More than 1 or no egg-info files found from {package_path}. " - f"There has to be exactly one egg-info directory." + # temp_dir_path is the parent directory of the project root dir + contents = [member for member in temp_dir_path.iterdir() if member.is_dir()] + if len(contents) != 1: + raise RuntimeError( + "Invalid sdist was extracted: exactly one directory was expected, " + f"got {contents}" ) - egg_info_file = egg_info_files[0] - package_name = egg_info_file.stem - package_requirements = egg_info_file.parent / "setup.py" - - # Finds a string representation of 'install_requires' list from setup.py - reqs_list_pattern = r"install_requires\=(.*?)\,\n" - list_reqs = re.findall( - reqs_list_pattern, package_requirements.read_text(encoding="utf-8") - ) + project_root_dir = contents[0] + + # This is much slower than parsing the requirements + # directly from the metadata files + # because it installs the package in an isolated environment, + # but it's the only reliable way of doing it + # without making assumptions on the project metadata. + library_meta = project_wheel_metadata(project_root_dir) + + # Project name will be `my-pipeline` even if `setup.py` says `my_pipeline` + # because standards mandate normalization of names for comparison, + # see https://packaging.python.org/en/latest/specifications/core-metadata/#name + # The proper way to get it would be + # project_name = library_meta.get("Name") + # However, the rest of the code somehow mixes package name with project name + # and expects the non-normalized version, so we have to find it. + # In a pre-pyproject.toml world we could just parse setup.py, + # but now things are more complicated. + # kedro stores the package name in pyproject.toml, + # but micropackages are not kedro projects. + # Since micropackaging has a runtime dependency on setuptools already, + # we will assume flat (non-src) layout, + # which is what `kedro micropkg package` produces, + # and use setuptools utilities to save some complexity. + packages = [ + package + for package in FlatLayoutPackageFinder().find(project_root_dir) + if "." not in package + ] + if len(packages) != 1: + # Should not happen if user is calling `micropkg pull` + # with the result of a `micropkg package`, + # and in general if the distribution only contains one package (most likely), + # but helps give a sensible error message otherwise + raise RuntimeError( + "Invalid package contents: exactly one package was expected, " + f"got {packages}" + ) + package_name = packages[0] - # Finds all elements from the above string representation of a list - reqs_element_pattern = r"\'(.*?)\'" - package_reqs = re.findall(reqs_element_pattern, list_reqs[0]) + package_reqs = _get_all_library_reqs(library_meta) if package_reqs: requirements_txt = metadata.source_dir / "requirements.txt" @@ -172,7 +203,7 @@ def _pull_package( _install_files( metadata, package_name, - egg_info_file.parent, + project_root_dir, env, alias, destination, @@ -878,6 +909,15 @@ def _append_package_reqs( ) +def _get_all_library_reqs(metadata): + # Inspired by https://discuss.python.org/t/\ + # programmatically-getting-non-optional-requirements-of-current-directory/26963/2 + # but leaving markers intact + return [ + str(Requirement(dep_str)) for dep_str in metadata.get_all("Requires-Dist", []) + ] + + def _safe_parse_requirements( requirements: str | Iterable[str], ) -> set[Requirement]: diff --git a/tests/framework/cli/micropkg/test_micropkg_pull.py b/tests/framework/cli/micropkg/test_micropkg_pull.py index b2d9031f77..fddee03ede 100644 --- a/tests/framework/cli/micropkg/test_micropkg_pull.py +++ b/tests/framework/cli/micropkg/test_micropkg_pull.py @@ -424,33 +424,6 @@ def test_pull_sdist_fs_args( "file", fs_arg_1=1, fs_arg_2={"fs_arg_2_nested_1": 2} ) - def test_pull_two_egg_info( - self, fake_project_cli, fake_repo_path, mocker, tmp_path, fake_metadata - ): - """Test for pulling an sdist file with more than one - dist-info directory. - """ - call_pipeline_create(fake_project_cli, fake_metadata) - call_micropkg_package(fake_project_cli, fake_metadata) - sdist_file = ( - fake_repo_path / "dist" / _get_sdist_name(name=PIPELINE_NAME, version="0.1") - ) - assert sdist_file.is_file() - - (tmp_path / f"{PIPELINE_NAME}-0.1" / "dummy.egg-info").mkdir(parents=True) - - mocker.patch( - "kedro.framework.cli.micropkg.tempfile.TemporaryDirectory", - return_value=tmp_path, - ) - result = CliRunner().invoke( - fake_project_cli, - ["micropkg", "pull", str(sdist_file)], - obj=fake_metadata, - ) - assert result.exit_code - assert "Error: More than 1 or no egg-info files found" in result.output - @pytest.mark.parametrize("env", [None, "local"]) @pytest.mark.parametrize("alias", [None, "alias_path"]) def test_pull_tests_missing( From f945f7ad9021a2380dd79aea80735a76e1477352 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Tue, 30 May 2023 10:22:01 +0200 Subject: [PATCH 06/15] Note change from pkg_requirements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/pypa/packaging/issues/644#issuecomment-1567982812 Signed-off-by: Juan Luis Cano Rodríguez --- kedro/framework/cli/micropkg.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kedro/framework/cli/micropkg.py b/kedro/framework/cli/micropkg.py index 7690f8430b..6693115306 100644 --- a/kedro/framework/cli/micropkg.py +++ b/kedro/framework/cli/micropkg.py @@ -876,6 +876,9 @@ def _append_package_reqs( requirements_txt: Path, package_reqs: list[str], package_name: str ) -> None: """Appends micro-package requirements to project level requirements.txt""" + # NOTE: packaging.requirements.Requirement equality check + # does not normalize names, and as such is not equivalent to pkg_resources.Requirement, + # see https://github.com/pypa/packaging/issues/644#issuecomment-1567982812 incoming_reqs = _safe_parse_requirements(package_reqs) if requirements_txt.is_file(): existing_reqs = _safe_parse_requirements(requirements_txt.read_text()) From 10e24c8c82d9eab19c33584c901566c07d75dd11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Mon, 5 Jun 2023 09:22:59 +0200 Subject: [PATCH 07/15] Improve code comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- kedro/framework/cli/micropkg.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/kedro/framework/cli/micropkg.py b/kedro/framework/cli/micropkg.py index 6693115306..debdab112f 100644 --- a/kedro/framework/cli/micropkg.py +++ b/kedro/framework/cli/micropkg.py @@ -167,16 +167,8 @@ def _pull_package( # see https://packaging.python.org/en/latest/specifications/core-metadata/#name # The proper way to get it would be # project_name = library_meta.get("Name") - # However, the rest of the code somehow mixes package name with project name - # and expects the non-normalized version, so we have to find it. - # In a pre-pyproject.toml world we could just parse setup.py, - # but now things are more complicated. - # kedro stores the package name in pyproject.toml, - # but micropackages are not kedro projects. - # Since micropackaging has a runtime dependency on setuptools already, - # we will assume flat (non-src) layout, - # which is what `kedro micropkg package` produces, - # and use setuptools utilities to save some complexity. + # However, the rest of the code expects the non-normalized package name, + # so we have to find it. packages = [ package for package in FlatLayoutPackageFinder().find(project_root_dir) @@ -913,9 +905,9 @@ def _append_package_reqs( def _get_all_library_reqs(metadata): - # Inspired by https://discuss.python.org/t/\ + """Get all library requirements from metadata, leaving markers intact.""" + # See https://discuss.python.org/t/\ # programmatically-getting-non-optional-requirements-of-current-directory/26963/2 - # but leaving markers intact return [ str(Requirement(dep_str)) for dep_str in metadata.get_all("Requires-Dist", []) ] From e2a64f74a7c23555a9f43fd913ef8ad87bf68bb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Mon, 5 Jun 2023 09:23:38 +0200 Subject: [PATCH 08/15] Fix equality checks of equivalent requirements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- kedro/framework/cli/micropkg.py | 64 ++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/kedro/framework/cli/micropkg.py b/kedro/framework/cli/micropkg.py index debdab112f..f2d40cccdb 100644 --- a/kedro/framework/cli/micropkg.py +++ b/kedro/framework/cli/micropkg.py @@ -8,11 +8,12 @@ import tempfile from importlib import import_module from pathlib import Path -from typing import Iterable, List, Tuple, Union +from typing import Any, Iterable, List, Tuple, Union import click from build.util import project_wheel_metadata from packaging.requirements import InvalidRequirement, Requirement +from packaging.utils import canonicalize_name from rope.base.project import Project from rope.contrib import generate from rope.refactor.move import MoveModule @@ -49,6 +50,53 @@ """ +class _EquivalentRequirement(Requirement): + # See https://github.com/pypa/packaging/issues/644#issuecomment-1567982812 + + @property + def canonical_name(self) -> str: + """Canonicalized name according to the rules of PEP 503.""" + return canonicalize_name(self.name) + + def _to_str(self, name: str) -> str: + parts: list[str] = [name] + + if self.extras: + formatted_extras = ",".join(sorted(self.extras)) + parts.append(f"[{formatted_extras}]") + + if self.specifier: + parts.append(str(self.specifier)) + + if self.url: + parts.append(f"@ {self.url}") + if self.marker: + parts.append(" ") + + if self.marker: + parts.append(f"; {self.marker}") + + return "".join(parts) + + def __str__(self) -> str: + return self._to_str(self.name) + + def __hash__(self) -> int: + return hash((self.__class__.__name__, self._to_str(self.canonical_name))) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, (Requirement, _EquivalentRequirement)): + return NotImplemented + + return ( + self.canonical_name == other.canonical_name + and self.extras == other.extras + and self.specifier == other.specifier + and self.url == other.url + and self.marker == other.marker + ) + + def _check_module_path(ctx, param, value): # pylint: disable=unused-argument if value and not re.match(r"^[\w.]+$", value): message = ( @@ -620,7 +668,7 @@ def _make_install_requires(requirements_txt: Path) -> list[str]: if not requirements_txt.exists(): return [] return [ - str(Requirement(_drop_comment(requirement_line))) + str(_EquivalentRequirement(_drop_comment(requirement_line))) for requirement_line in requirements_txt.read_text().splitlines() if requirement_line and not requirement_line.startswith("#") ] @@ -868,9 +916,6 @@ def _append_package_reqs( requirements_txt: Path, package_reqs: list[str], package_name: str ) -> None: """Appends micro-package requirements to project level requirements.txt""" - # NOTE: packaging.requirements.Requirement equality check - # does not normalize names, and as such is not equivalent to pkg_resources.Requirement, - # see https://github.com/pypa/packaging/issues/644#issuecomment-1567982812 incoming_reqs = _safe_parse_requirements(package_reqs) if requirements_txt.is_file(): existing_reqs = _safe_parse_requirements(requirements_txt.read_text()) @@ -909,13 +954,14 @@ def _get_all_library_reqs(metadata): # See https://discuss.python.org/t/\ # programmatically-getting-non-optional-requirements-of-current-directory/26963/2 return [ - str(Requirement(dep_str)) for dep_str in metadata.get_all("Requires-Dist", []) + str(_EquivalentRequirement(dep_str)) + for dep_str in metadata.get_all("Requires-Dist", []) ] def _safe_parse_requirements( requirements: str | Iterable[str], -) -> set[Requirement]: +) -> set[_EquivalentRequirement]: """Safely parse a requirement or set of requirements. This avoids blowing up when it encounters a requirement it cannot parse (e.g. `-r requirements.txt`). This way we can still extract all the parseable requirements out of a set containing some @@ -933,7 +979,9 @@ def _safe_parse_requirements( and not requirement_line.startswith("-e") ): try: - parseable_requirements.add(Requirement(_drop_comment(requirement_line))) + parseable_requirements.add( + _EquivalentRequirement(_drop_comment(requirement_line)) + ) except InvalidRequirement: continue return parseable_requirements From 8625ec57384f0794db93c37f8baab9e5fc554796 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Mon, 5 Jun 2023 10:35:49 +0200 Subject: [PATCH 09/15] Add micropackaging improvements to release notes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- RELEASE.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index 77453869dc..6a3bf5c0da 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -13,6 +13,8 @@ ## Major features and improvements ## Bug fixes and other changes +* Reworked micropackaging workflow to use standard Python packaging practices. +* Make `kedro micropkg package` accept `--verbose`. ## Breaking changes to the API From 7b74dacfff3a05f8ab29c7e1b84d24d515096d72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Mon, 5 Jun 2023 16:19:16 +0200 Subject: [PATCH 10/15] Revert sdist check to make it more testable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- kedro/framework/cli/micropkg.py | 5 ++--- tests/framework/cli/micropkg/test_micropkg_pull.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/kedro/framework/cli/micropkg.py b/kedro/framework/cli/micropkg.py index f2d40cccdb..404b684d0b 100644 --- a/kedro/framework/cli/micropkg.py +++ b/kedro/framework/cli/micropkg.py @@ -415,11 +415,10 @@ def _unpack_sdist(location: str, destination: Path, fs_args: str | None) -> None sdist_file = list(destination.glob("*.tar.gz")) # `--no-deps --no-binary :all:` should fetch only one source distribution file, # and CLI should fail if that's not the case. - # No need to check for zero sdists, in that case the previous command would fail. - if len(sdist_file) > 1: + if len(sdist_file) != 1: file_names = [sf.name for sf in sdist_file] raise KedroCliError( - f"More than 1 sdist files found: {file_names}. " + f"More than 1 or no sdist files found: {file_names}. " f"There has to be exactly one source distribution file." ) with tarfile.open(sdist_file[0], "r:gz") as fs_file: diff --git a/tests/framework/cli/micropkg/test_micropkg_pull.py b/tests/framework/cli/micropkg/test_micropkg_pull.py index fddee03ede..15067c5d92 100644 --- a/tests/framework/cli/micropkg/test_micropkg_pull.py +++ b/tests/framework/cli/micropkg/test_micropkg_pull.py @@ -708,14 +708,14 @@ def test_pull_from_pypi_more_than_one_sdist_file( ) assert result.exit_code - assert "Error: More than 1 sdist files found:" in result.output + assert "Error: More than 1 or no sdist files found:" in result.output def test_pull_unsupported_protocol_by_fsspec( self, fake_project_cli, fake_metadata, tmp_path, mocker ): protocol = "unsupported" exception_message = f"Protocol not known: {protocol}" - error_message = "Error: More than 1 sdist files found:" + error_message = "Error: More than 1 or no sdist files found:" package_path = f"{protocol}://{PIPELINE_NAME}" python_call_mock = mocker.patch("kedro.framework.cli.micropkg.python_call") From d797acf77d43f749874de1355ad605424ebc52e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Tue, 13 Jun 2023 13:37:50 +0200 Subject: [PATCH 11/15] Fix micropkg pull error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- kedro/framework/cli/micropkg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kedro/framework/cli/micropkg.py b/kedro/framework/cli/micropkg.py index 404b684d0b..b64d6c9ce2 100644 --- a/kedro/framework/cli/micropkg.py +++ b/kedro/framework/cli/micropkg.py @@ -197,7 +197,7 @@ def _pull_package( # temp_dir_path is the parent directory of the project root dir contents = [member for member in temp_dir_path.iterdir() if member.is_dir()] if len(contents) != 1: - raise RuntimeError( + raise KedroCliError( "Invalid sdist was extracted: exactly one directory was expected, " f"got {contents}" ) @@ -227,7 +227,7 @@ def _pull_package( # with the result of a `micropkg package`, # and in general if the distribution only contains one package (most likely), # but helps give a sensible error message otherwise - raise RuntimeError( + raise KedroCliError( "Invalid package contents: exactly one package was expected, " f"got {packages}" ) From 23541bf1adb6a054746834977faa9024529bc89f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Tue, 13 Jun 2023 13:38:15 +0200 Subject: [PATCH 12/15] Add tests for new micropkg pull branches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- .../cli/micropkg/test_micropkg_pull.py | 84 ++++++++++++++++++- 1 file changed, 82 insertions(+), 2 deletions(-) diff --git a/tests/framework/cli/micropkg/test_micropkg_pull.py b/tests/framework/cli/micropkg/test_micropkg_pull.py index 15067c5d92..120e2f70f4 100644 --- a/tests/framework/cli/micropkg/test_micropkg_pull.py +++ b/tests/framework/cli/micropkg/test_micropkg_pull.py @@ -1,8 +1,8 @@ import filecmp import shutil +import tarfile import textwrap from pathlib import Path -from tarfile import TarInfo from unittest.mock import Mock import pytest @@ -749,6 +749,86 @@ def test_pull_unsupported_protocol_by_fsspec( assert "Trying to use 'pip download'..." in result.output assert error_message in result.output + def test_micropkg_pull_invalid_sdist( + self, fake_project_cli, fake_repo_path, fake_metadata, tmp_path + ): + """ + Test for pulling an invalid sdist file locally with more than one package. + """ + error_message = ( + "Invalid sdist was extracted: exactly one directory was expected" + ) + + call_pipeline_create(fake_project_cli, fake_metadata) + call_micropkg_package(fake_project_cli, fake_metadata) + + sdist_file = ( + fake_repo_path / "dist" / _get_sdist_name(name=PIPELINE_NAME, version="0.1") + ) + assert sdist_file.is_file() + + with tarfile.open(sdist_file, "r:gz") as tar: + tar.extractall(tmp_path) + + # Create extra project + extra_project = tmp_path / f"{PIPELINE_NAME}-0.1_extra" + extra_project.mkdir() + (extra_project / "README.md").touch() + + # Recreate sdist + sdist_file.unlink() + with tarfile.open(sdist_file, "w:gz") as tar: + # Adapted from https://stackoverflow.com/a/65820259/554319 + for fn in tmp_path.iterdir(): + tar.add(fn, arcname=fn.relative_to(tmp_path)) + + result = CliRunner().invoke( + fake_project_cli, + ["micropkg", "pull", str(sdist_file)], + obj=fake_metadata, + ) + assert result.exit_code == 1 + assert error_message in result.stdout + + def test_micropkg_pull_invalid_package_contents( + self, fake_project_cli, fake_repo_path, fake_metadata, tmp_path + ): + """ + Test for pulling an invalid sdist file locally with more than one package. + """ + error_message = "Invalid package contents: exactly one package was expected" + + call_pipeline_create(fake_project_cli, fake_metadata) + call_micropkg_package(fake_project_cli, fake_metadata) + + sdist_file = ( + fake_repo_path / "dist" / _get_sdist_name(name=PIPELINE_NAME, version="0.1") + ) + assert sdist_file.is_file() + + with tarfile.open(sdist_file, "r:gz") as tar: + tar.extractall(tmp_path) + + # Create extra package + extra_package = tmp_path / f"{PIPELINE_NAME}-0.1" / f"{PIPELINE_NAME}_extra" + extra_package.mkdir() + (extra_package / "__init__.py").touch() + + # Recreate sdist + sdist_file.unlink() + with tarfile.open(sdist_file, "w:gz") as tar: + # Adapted from https://stackoverflow.com/a/65820259/554319 + for fn in tmp_path.iterdir(): + tar.add(fn, arcname=fn.relative_to(tmp_path)) + + result = CliRunner().invoke( + fake_project_cli, + ["micropkg", "pull", str(sdist_file)], + obj=fake_metadata, + ) + assert result.exit_code == 1 + assert error_message in result.stdout + @pytest.mark.parametrize( "tar_members,path_name", [ @@ -764,7 +844,7 @@ def test_path_traversal( """Test for checking path traversal attempt in tar file""" tar = Mock() tar.getmembers.return_value = [ - TarInfo(name=tar_name) for tar_name in tar_members + tarfile.TarInfo(name=tar_name) for tar_name in tar_members ] path = Path(path_name) with pytest.raises(Exception, match="Failed to safely extract tar file."): From f39a02408d792e8e718b05d4531f0f6e2a31cf40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Tue, 13 Jun 2023 13:38:53 +0200 Subject: [PATCH 13/15] Remove untested path of private code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- kedro/framework/cli/micropkg.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/kedro/framework/cli/micropkg.py b/kedro/framework/cli/micropkg.py index b64d6c9ce2..639d842cf8 100644 --- a/kedro/framework/cli/micropkg.py +++ b/kedro/framework/cli/micropkg.py @@ -85,9 +85,6 @@ def __hash__(self) -> int: return hash((self.__class__.__name__, self._to_str(self.canonical_name))) def __eq__(self, other: Any) -> bool: - if not isinstance(other, (Requirement, _EquivalentRequirement)): - return NotImplemented - return ( self.canonical_name == other.canonical_name and self.extras == other.extras From 5387918b36cc56d7d85ab4631668e56723a62f4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Tue, 13 Jun 2023 14:28:18 +0200 Subject: [PATCH 14/15] Fix micropkg tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- tests/framework/cli/micropkg/test_micropkg_pull.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/framework/cli/micropkg/test_micropkg_pull.py b/tests/framework/cli/micropkg/test_micropkg_pull.py index 120e2f70f4..9cbad00a90 100644 --- a/tests/framework/cli/micropkg/test_micropkg_pull.py +++ b/tests/framework/cli/micropkg/test_micropkg_pull.py @@ -600,6 +600,17 @@ def test_pull_from_pypi( return_value=tmp_path, ) + # Mock needed to avoid an error when build.util.project_wheel_metadata + # calls tempfile.TemporaryDirectory, which is mocked + class _FakeWheelMetadata: + def get_all(self, name, failobj=None): # pylint: disable=unused-argument + return [] + + mocker.patch( + "kedro.framework.cli.micropkg.project_wheel_metadata", + return_value=_FakeWheelMetadata(), + ) + options = ["-e", env] if env else [] options += ["--alias", alias] if alias else [] From 254badc4c55ad5454d784d0411306eaaf1b333c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Tue, 13 Jun 2023 17:33:58 +0200 Subject: [PATCH 15/15] Add more detailed explanation of Requirement custom subclass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- kedro/framework/cli/micropkg.py | 41 ++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/kedro/framework/cli/micropkg.py b/kedro/framework/cli/micropkg.py index 639d842cf8..c9bec3f255 100644 --- a/kedro/framework/cli/micropkg.py +++ b/kedro/framework/cli/micropkg.py @@ -8,7 +8,7 @@ import tempfile from importlib import import_module from pathlib import Path -from typing import Any, Iterable, List, Tuple, Union +from typing import Any, Iterable, Iterator, List, Tuple, Union import click from build.util import project_wheel_metadata @@ -51,42 +51,47 @@ class _EquivalentRequirement(Requirement): - # See https://github.com/pypa/packaging/issues/644#issuecomment-1567982812 + """Parse a requirement according to PEP 508. - @property - def canonical_name(self) -> str: - """Canonicalized name according to the rules of PEP 503.""" - return canonicalize_name(self.name) + This class overrides __eq__ to be backwards compatible with pkg_resources.Requirement + while making __str__ and __hash__ use the non-canonicalized name + as agreed in https://github.com/pypa/packaging/issues/644, - def _to_str(self, name: str) -> str: - parts: list[str] = [name] + Implementation taken from https://github.com/pypa/packaging/pull/696/ + """ + + def _iter_parts(self, name: str) -> Iterator[str]: + yield name if self.extras: formatted_extras = ",".join(sorted(self.extras)) - parts.append(f"[{formatted_extras}]") + yield f"[{formatted_extras}]" if self.specifier: - parts.append(str(self.specifier)) + yield str(self.specifier) if self.url: - parts.append(f"@ {self.url}") + yield f"@ {self.url}" if self.marker: - parts.append(" ") + yield " " if self.marker: - parts.append(f"; {self.marker}") - - return "".join(parts) + yield f"; {self.marker}" def __str__(self) -> str: - return self._to_str(self.name) + return "".join(self._iter_parts(self.name)) def __hash__(self) -> int: - return hash((self.__class__.__name__, self._to_str(self.canonical_name))) + return hash( + ( + self.__class__.__name__, + *self._iter_parts(canonicalize_name(self.name)), + ) + ) def __eq__(self, other: Any) -> bool: return ( - self.canonical_name == other.canonical_name + canonicalize_name(self.name) == canonicalize_name(other.name) and self.extras == other.extras and self.specifier == other.specifier and self.url == other.url