From 67209454c7da80242c6eb01cf8e1818212b319d7 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Fri, 8 Mar 2024 14:16:48 +0900 Subject: [PATCH 01/70] feat(bzlmod): introduce `pypi_index` for using bazel's downloader This is a variant of #1625 and was inspired by #1788. In #1625, we attempt to parse the simple API HTML files in the same `pip.parse` extension and it brings the follownig challenges: * The `pip.parse` cannot be easily use in `isolated` mode and it may be difficult to implement the isolation if bazelbuild/bazel#20186 moves forward. * Splitting the `pypi_index` out of the `pip.parse` allows us to accept the location of the parsed simple API artifacts encoded as a bazel label. * Separation of the logic allows us to very easily implement usage of the downloader for cross-platform wheels. * The `whl` `METADATA` might not be exposed through older versions of Artifactory, so having the complexity hidden in this single extension allows us to not increase the complexity and scope of `pip.parse` too much. * The repository structure can be reused for `pypi_install` extension from #1728. TODO: - [ ] Add unit tests for functions in `pypi_index.bzl` bzlmod extension if the design looks good. - [ ] Changelog. Out of scope of this PR: - Further usage of the downloaded artifacts to implement something similar to #1625 or #1744. This needs #1750 and #1764. - Making the lock file the same on all platforms - We would need to fully parse the requirements file. - Support for different dependency versions in the `pip.parse` hub repos based on each platform - we would need to be able to interpret platform markers in some way, but `pypi_index` should be good already. - Implementing the parsing of METADATA to detect dependency cycles. - Support for `requirements` files that are not created via `pip-compile`. - Support for other lock formats, though that would be reasonably trivial to add. Open questions: - Support for VCS dependencies in requirements files - We should probably handle them as `overrides` in the `pypi_index` extension and treat them in `pip.parse` just as an `sdist`, but I am not sure it would work without any issues. --- .bazelrc | 4 +- .bazelversion | 2 +- MODULE.bazel | 23 +- examples/bzlmod/MODULE.bazel | 24 ++ python/extensions/pypi_index.bzl | 19 ++ python/pip_install/pip_repository.bzl | 37 ++- python/private/auth.bzl | 9 +- python/private/bzlmod/pip.bzl | 51 ++++ python/private/bzlmod/pypi_index.bzl | 399 ++++++++++++++++++++++++++ 9 files changed, 550 insertions(+), 18 deletions(-) create mode 100644 python/extensions/pypi_index.bzl create mode 100644 python/private/bzlmod/pypi_index.bzl diff --git a/.bazelrc b/.bazelrc index 27e89faa97..3f16396659 100644 --- a/.bazelrc +++ b/.bazelrc @@ -4,8 +4,8 @@ # (Note, we cannot use `common --deleted_packages` because the bazel version command doesn't support it) # To update these lines, execute # `bazel run @rules_bazel_integration_test//tools:update_deleted_packages` -build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered -query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered +build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered +query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered test --test_output=errors diff --git a/.bazelversion b/.bazelversion index 66ce77b7ea..a8907c025d 100644 --- a/.bazelversion +++ b/.bazelversion @@ -1 +1 @@ -7.0.0 +7.0.2 diff --git a/MODULE.bazel b/MODULE.bazel index e89b8ef679..1f1a25dd16 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -4,7 +4,7 @@ module( compatibility_level = 1, ) -bazel_dep(name = "bazel_features", version = "1.1.1") +bazel_dep(name = "bazel_features", version = "1.9.0") bazel_dep(name = "bazel_skylib", version = "1.3.0") bazel_dep(name = "platforms", version = "0.0.4") @@ -53,10 +53,31 @@ use_repo(python, "pythons_hub") # This call registers the Python toolchains. register_toolchains("@pythons_hub//:all") +# This call registers the `pypi_index` extension so that it can be used in the `pip` extension +pypi_index = use_extension("//python/extensions:pypi_index.bzl", "pypi_index") +use_repo(pypi_index, "pypi_index") + # ===== DEV ONLY DEPS AND SETUP BELOW HERE ===== bazel_dep(name = "stardoc", version = "0.6.2", dev_dependency = True, repo_name = "io_bazel_stardoc") bazel_dep(name = "rules_bazel_integration_test", version = "0.20.0", dev_dependency = True) +# This call additionally only adds items to the `pypi_index` if we are +# not ignoring dev dependencies, making it no-op for the regular usage. +dev_pypi_index = use_extension( + "//python/extensions:pypi_index.bzl", + "pypi_index", + dev_dependency = True, +) +dev_pypi_index.add_requirements( + srcs = [ + # List all of the requirements files used by us + "//docs/sphinx:requirements.txt", + "//tools/publish:requirements_darwin.txt", + "//tools/publish:requirements.txt", + "//tools/publish:requirements_windows.txt", + ], +) + dev_pip = use_extension( "//python/extensions:pip.bzl", "pip", diff --git a/examples/bzlmod/MODULE.bazel b/examples/bzlmod/MODULE.bazel index ceb0010bd4..2b7a870d57 100644 --- a/examples/bzlmod/MODULE.bazel +++ b/examples/bzlmod/MODULE.bazel @@ -43,6 +43,30 @@ python.toolchain( # rules based on the `python_version` arg values. use_repo(python, "python_3_10", "python_3_9", "python_versions") +# This extension allows rules_python to optimize downloading for packages by checking +# for available artifacts on PyPI Simple API compatible mirrors. +pypi_index = use_extension("@rules_python//python/extensions:pypi_index.bzl", "pypi_index") +pypi_index.add_requirements( + srcs = [ + "//:requirements_lock_3_10.txt", + "//:requirements_lock_3_9.txt", + "//:requirements_windows_3_10.txt", + "//:requirements_windows_3_9.txt", + ], +) + +# We can also initialize the extension in dev mode. +dev_pypi_index = use_extension( + "@rules_python//python/extensions:pypi_index.bzl", + "pypi_index", + dev_dependency = True, +) +dev_pypi_index.add_requirements( + srcs = [ + "//tests/dupe_requirements:requirements.txt", + ], +) + # This extension allows a user to create modifications to how rules_python # creates different wheel repositories. Different attributes allow the user # to modify the BUILD file, and copy files. diff --git a/python/extensions/pypi_index.bzl b/python/extensions/pypi_index.bzl new file mode 100644 index 0000000000..f8a48d6a99 --- /dev/null +++ b/python/extensions/pypi_index.bzl @@ -0,0 +1,19 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""See the doc in the implementation file.""" + +load("//python/private/bzlmod:pypi_index.bzl", _pypi_index = "pypi_index") + +pypi_index = _pypi_index diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index 7b8160e956..bedb73ca6a 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -766,18 +766,27 @@ def _whl_library_impl(rctx): # Manually construct the PYTHONPATH since we cannot use the toolchain here environment = _create_repository_execution_environment(rctx, python_interpreter) - repo_utils.execute_checked( - rctx, - op = "whl_library.ResolveRequirement({}, {})".format(rctx.attr.name, rctx.attr.requirement), - arguments = args, - environment = environment, - quiet = rctx.attr.quiet, - timeout = rctx.attr.timeout, - ) + if rctx.attr.whl_file: + whl_path = rctx.path(rctx.attr.whl_file) + if not whl_path.exists: + fail("The given whl '{}' does not exist".format(rctx.attr.whl_file)) + + # Simulate the behaviour where the whl is present in the current directory. + rctx.symlink(whl_path, whl_path.basename) + whl_path = rctx.path(whl_path.basename) + else: + repo_utils.execute_checked( + rctx, + op = "whl_library.ResolveRequirement({}, {})".format(rctx.attr.name, rctx.attr.requirement), + arguments = args, + environment = environment, + quiet = rctx.attr.quiet, + timeout = rctx.attr.timeout, + ) - whl_path = rctx.path(json.decode(rctx.read("whl_file.json"))["whl_file"]) - if not rctx.delete("whl_file.json"): - fail("failed to delete the whl_file.json file") + whl_path = rctx.path(json.decode(rctx.read("whl_file.json"))["whl_file"]) + if not rctx.delete("whl_file.json"): + fail("failed to delete the whl_file.json file") if rctx.attr.whl_patches: patches = {} @@ -911,6 +920,12 @@ whl_library_attrs = { mandatory = True, doc = "Python requirement string describing the package to make available", ), + "whl_file": attr.label( + doc = """\ +The wheel file label to be used for this installation. This will not use pip to download the +whl and instead use the supplied file. Note that the label needs to point to a single file. +""", + ), "whl_patches": attr.label_keyed_string_dict( doc = """a label-keyed-string dict that has json.encode(struct([whl_file], patch_strip]) as values. This diff --git a/python/private/auth.bzl b/python/private/auth.bzl index 39ada37cae..6020edd58c 100644 --- a/python/private/auth.bzl +++ b/python/private/auth.bzl @@ -33,10 +33,13 @@ def get_auth(rctx, urls): Returns: dict: A map of authentication parameters by URL. """ - if rctx.attr.netrc: - netrc = read_netrc(rctx, rctx.attr.netrc) + attr = getattr(rctx, "attr", None) + + if getattr(attr, "netrc", None): + netrc = read_netrc(rctx, getattr(attr, "netrc")) elif "NETRC" in rctx.os.environ: netrc = read_netrc(rctx, rctx.os.environ["NETRC"]) else: netrc = read_user_netrc(rctx) - return use_netrc(netrc, urls, rctx.attr.auth_patterns) + + return use_netrc(netrc, urls, getattr(attr, "auth_patterns", None)) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index a017089803..97ae1a6f2e 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -101,6 +101,8 @@ You cannot use both the additive_build_content and additive_build_content_file a def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): python_interpreter_target = pip_attr.python_interpreter_target + pypi_index_repo = module_ctx.path(pip_attr._pypi_index_repo).dirname + # if we do not have the python_interpreter set in the attributes # we programmatically find it. hub_name = pip_attr.hub_name @@ -180,10 +182,46 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): group_name = whl_group_mapping.get(whl_name) group_deps = requirement_cycles.get(group_name, []) + pkg_pypi_index = pypi_index_repo.get_child(whl_name, "index.json") + if not pkg_pypi_index.exists: + # The wheel index for a package does not exist, so not using bazel downloader... + whl_file = None + else: + # Ensure that we have a wheel for a particular version. + # FIXME @aignas 2024-03-10: Maybe the index structure should be: + # pypi_index//:index.json? + # + # We expect the `requirement_line to be of shape '== ...' + _, _, version_tail = requirement_line.partition("==") + version, _, _ = version_tail.partition(" ") + version_segment = "-{}-".format(version.strip("\" ")) + + index_json = [struct(**v) for v in json.decode(module_ctx.read(pkg_pypi_index))] + + # For now only use the whl_file if it is a cross-platform wheel. + # This is very conservative and does that only thing that we have + # in the whl list is the cross-platform wheel. + whls = [ + dist + for dist in index_json + if dist.filename.endswith(".whl") and version_segment in dist.filename + ] + any_whls = [ + dist + for dist in whls + if dist.filename.endswith("-none-any.whl") or dist.filename.endswith("-abi3-any.whl") + ] + + if len(any_whls) == len(whls) and len(whls) == 1: + whl_file = any_whls[0].label + else: + whl_file = None + repo_name = "{}_{}".format(pip_name, whl_name) whl_library( name = repo_name, requirement = requirement_line, + whl_file = whl_file, repo = pip_name, repo_prefix = pip_name + "_", annotation = annotation, @@ -414,6 +452,19 @@ a corresponding `python.toolchain()` configured. doc = """\ A dict of labels to wheel names that is typically generated by the whl_modifications. The labels are JSON config files describing the modifications. +""", + ), + "_pypi_index_repo": attr.label( + default = "@pypi_index//:BUILD.bazel", + doc = """\ +The label to the root of the pypi_index repository to be used for this particular +call of the `pip.parse`. This ensures that we can work with isolated usage of the +pip.parse tag class, where the user may want to also have the `pypi_index` usage +isolated as well. + +This also makes the code cleaner and ensures there are no cyclic dependencies. + +NOTE: For now this is internal and will be exposed if needed. """, ), }, **pip_repository_attrs) diff --git a/python/private/bzlmod/pypi_index.bzl b/python/private/bzlmod/pypi_index.bzl new file mode 100644 index 0000000000..81a471bcfb --- /dev/null +++ b/python/private/bzlmod/pypi_index.bzl @@ -0,0 +1,399 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +PyPI index reading extension. + +This allows us to translate the lock file to URLs and labels, that we can use to set up the +rest of the packages in the hub repos. This is created as a separate repository to allow +`pip.parse` to be used in an isolated mode. + +NOTE: for now the repos resulting from this extension are only supposed to be used in the +rules_python repository until this notice is removed. + +I want the usage to be: +```starlark +pypi_index = use_extension("@rules_python//python/extensions:pypi_index.bzl", "pypi_index") +pypi_index.from_requirements( + srcs = [ + "my_requirement", + ], +) +``` + +The main index URL can be overriden with an env var PIP_INDEX_URL by default. What is more, +the user should be able to specify specific package locations to be obtained from elsewhere. + +The most important thing to support would be to also support local wheel locations, where we +could read all of the wheels from a specific folder and construct the same repo. Like: +```starlark +pypi_index.from_dirs( + srcs = [ + "my_folder1", + "my_folder2", + ], +) +``` + +The implementation is left for a future PR. + +This can be later used by `pip` extension when constructing the `whl_library` hubs by passing +the right `whl_file` to the rule. + +This `pypi_index` extension provides labels for reading the `METADATA` from wheels and downloads +metadata only if the Simple API of the PyPI compatible mirror is exposing it. Otherwise, it +falls back to downloading the whl file and then extracting the `METADATA` file so that the users +of the artifacts created by the extension do not have to care about it being any different. +Whilst this may make the downloading of the whl METADATA somewhat slower, because it will be +in the repository cache, it may be a minor hit to the performance. + +The presence of this `METADATA` allows us to essentially get the full graph of the dependencies +within a `hub` repo and contract any dependency cycles in the future as is shown in the +`pypi_install` extension PR. + +Whilst this design has been crafted for `bzlmod`, we could in theory just port this back to +WORKSPACE without too many issues. + +If you do: +```console +$ bazel query @pypi_index//requests/... +@pypi_index//requests:requests-2.28.2-py3-none-any.whl +@pypi_index//requests:requests-2.28.2-py3-none-any.whl.METADATA +@pypi_index//requests:requests-2.28.2.tar.gz +@pypi_index//requests:requests-2.31.0-py3-none-any.whl +@pypi_index//requests:requests-2.31.0-py3-none-any.whl.METADATA +@pypi_index//requests:requests-2.31.0.tar.gz +``` +""" + +load("@bazel_features//:features.bzl", "bazel_features") +load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") +load("//python/private:auth.bzl", "get_auth") +load("//python/private:envsubst.bzl", "envsubst") +load("//python/private:normalize_name.bzl", "normalize_name") +load("//python/private:text_util.bzl", "render") + +def _impl(module_ctx): + want_packages = {} + for mod in module_ctx.modules: + for reqs in mod.tags.add_requirements: + env_vars = ["PIP_INDEX_URL"] + index_url = envsubst( + reqs.index_url, + env_vars, + module_ctx.os.environ.get, + ) + pkgs = _get_packages_from_requirements(module_ctx, reqs.srcs) + for pkg, want_shas in pkgs.items(): + pkg = normalize_name(pkg) + entry = want_packages.setdefault(pkg, {"urls": {}, "want_shas": {}}) + entry["urls"]["{}/{}/".format(index_url.rstrip("/"), pkg)] = True + entry["want_shas"].update(want_shas) + + download_kwargs = {} + if bazel_features.external_deps.download_has_block_param: + download_kwargs["block"] = False + + downloads = {} + outs = {} + for pkg, args in want_packages.items(): + outs[pkg] = module_ctx.path("pypi_index/{}.html".format(pkg)) + all_urls = list(args["urls"].keys()) + + downloads[pkg] = module_ctx.download( + url = all_urls, + output = outs[pkg], + auth = get_auth( + struct( + os = module_ctx.os, + path = module_ctx.path, + read = module_ctx.read, + ), + all_urls, + ), + **download_kwargs + ) + + packages = {} + for pkg, args in want_packages.items(): + result = downloads[pkg] + if download_kwargs.get("block") == False: + result = result.wait() + + if not result.success: + fail(result) + + content = module_ctx.read(outs[pkg]) + + # TODO @aignas 2024-03-08: pass in the index urls, so that we can correctly work + packages[pkg] = _get_packages(args["urls"].keys()[0].rpartition("/")[0], content, args["want_shas"]) + + prefix = "pypi_index" + + repos = {} + for pkg, urls in packages.items(): + for url in urls: + pkg_name = "{}__{}_{}".format(prefix, pkg, url.sha256) + _archive_repo( + name = pkg_name, + urls = [url.url], + filename = url.filename, + sha256 = url.sha256, + ) + repos[pkg_name] = url.filename + + if url.metadata_sha256: + _archive_repo( + name = pkg_name + ".METADATA", + urls = [url.metadata_url], + filename = "METADATA", + sha256 = url.metadata_sha256, + ) + elif url.filename.endswith(".whl"): + _metadata_repo( + name = pkg_name + ".METADATA", + prefix = prefix, + whl = "@{}//{}:{}".format( + prefix, + pkg_name, + url.filename, + ), + ) + + _hub( + name = prefix, + repo = prefix, + repos = repos, + ) + +def _get_packages_from_requirements(module_ctx, requirements_files): + want_packages = {} + for file in requirements_files: + contents = module_ctx.read(module_ctx.path(file)) + parse_result = parse_requirements(contents) + for distribution, line in parse_result.requirements: + # NOTE @aignas 2024-03-08: this only supports Simple API, + # more complex cases may need to rely on the usual methods. + # + # if we don't have `sha256` values then we will not add this + # to our index. + want_packages.setdefault(distribution, {}).update({ + # TODO @aignas 2024-03-07: use sets + sha.strip(): True + for sha in line.split("--hash=sha256:")[1:] + }) + + return want_packages + +def _get_packages(index_url, content, want_shas): + packages = [] + for line in content.split("") + filename, _, tail = tail.partition("<") + + metadata_marker = "data-core-metadata=\"sha256=" + if metadata_marker in maybe_metadata: + # Implement https://peps.python.org/pep-0714/ + _, _, tail = maybe_metadata.partition(metadata_marker) + metadata_sha256, _, _ = tail.partition("\"") + metadata_url = url + ".metadata" + else: + metadata_sha256 = "" + metadata_url = "" + + packages.append( + struct( + filename = filename, + url = _absolute_urls(index_url, url), + sha256 = sha256, + metadata_sha256 = metadata_sha256, + metadata_url = metadata_url, + ), + ) + + if len(packages) != len(want_shas): + fail("Could not get all of the shas") + + return packages + +def _absolute_urls(index_url, candidate): + if not candidate.startswith(".."): + return candidate + + candidate_parts = candidate.split("..") + last = candidate_parts[-1] + for _ in range(len(candidate_parts) - 1): + index_url, _, _ = index_url.rstrip("/").rpartition("/") + + return "{}/{}".format(index_url, last.strip("/")) + +pypi_index = module_extension( + doc = "", + implementation = _impl, + tag_classes = { + "add_requirements": tag_class( + attrs = { + "index_url": attr.string( + doc = "We will substitute the env variable value PIP_INDEX_URL if present.", + default = "${PIP_INDEX_URL:-https://pypi.org/simple}", + ), + "srcs": attr.label_list(), + }, + ), + }, +) + +def _hub_impl(repository_ctx): + # This is so that calling the following in rules_python works: + # $ bazel query $pypi_index/... --ignore_dev_dependency + repository_ctx.file("BUILD.bazel", "") + + if not repository_ctx.attr.repos: + return + + packages = {} + for repo, filename in repository_ctx.attr.repos.items(): + head, _, sha256 = repo.rpartition("_") + _, _, pkg = head.rpartition("__") + + prefix = repository_ctx.name[:-len(repository_ctx.attr.repo)] + packages.setdefault(pkg, []).append( + struct( + sha256 = sha256, + filename = filename, + label = str(Label("@@{}{}//:{}".format(prefix, repo, filename))), + ), + ) + + for pkg, filenames in packages.items(): + # This contains the labels that should be used in the `pip` extension + # to get the labels that can be used by `whl_library`. + repository_ctx.file( + "{}/index.json".format(pkg), + json.encode(filenames), + ) + + # These labels should be used to be passed to `whl_library`. + repository_ctx.file( + "{}/BUILD.bazel".format(pkg), + "\n\n".join([ + """package(default_visibility = ["//visibility:public"])""", + """exports_files(["index.json"])""", + ] + [ + render.alias( + name = r.filename, + actual = repr(r.label), + visibility = ["//visibility:public"], + ) + for r in filenames + ] + [ + render.alias( + name = r.filename + ".METADATA", + actual = repr(r.label.split("//:")[0] + ".METADATA//:METADATA"), + visibility = ["//visibility:public"], + ) + for r in filenames + if r.filename.endswith(".whl") + ]), + ) + +_hub = repository_rule( + implementation = _hub_impl, + attrs = { + "repo": attr.string(mandatory = True), + "repos": attr.string_dict(mandatory = True), + }, +) + +def _archive_repo_impl(repository_ctx): + filename = repository_ctx.attr.filename + if repository_ctx.attr.file: + repository_ctx.symlink(repository_ctx.path(repository_ctx.attr.file), filename) + else: + # Download the wheel using the downloader + result = repository_ctx.download( + url = repository_ctx.attr.urls, + output = filename, + auth = get_auth( + repository_ctx, + repository_ctx.attr.urls, + ), + ) + + if not result.success: + fail(result) + + repository_ctx.file("BUILD.bazel", """\ +exports_files( + ["{}"], + visibility = ["//visibility:public"], +) +""".format(filename)) + +_archive_repo = repository_rule( + implementation = _archive_repo_impl, + attrs = { + "file": attr.label(mandatory = False), + "filename": attr.string(mandatory = True), + "sha256": attr.string(), + "urls": attr.string_list(), + }, +) + +# this allows to work with other implementations of Indexes that do not serve METADATA like PyPI +# or with patched METADATA in patched and re-zipped wheels. +def _metadata_repo_impl(repository_ctx): + whl_label = repository_ctx.attr.whl + prefix = repository_ctx.attr.prefix + if whl_label.repo_name.endswith(prefix): + # Here we have a hub repo label which we need to rewrite to the thing that the label + # is pointing to. We can do this because we own everything + # + # NOTE @aignas 2024-03-08: if we see restarts, then it could mean that we are not constructing + # the right label here. + whl_label = Label("@@{}//:{}".format(repository_ctx.name[:-len(".METADATA")], whl_label.name)) + + repository_ctx.symlink(repository_ctx.path(whl_label), "wheel.zip") + repository_ctx.extract("wheel.zip") + + content = None + for p in repository_ctx.path(".").readdir(): + if p.basename.endswith(".dist-info"): + content = repository_ctx.read(p.get_child("METADATA")) + repository_ctx.delete(p) + + if content == None: + fail("Could not find a METADATA file") + + repository_ctx.file("METADATA", content) + repository_ctx.file("BUILD.bazel", """\ +exports_files( + ["METADATA"], + visibility = ["//visibility:public"], +) +""") + +_metadata_repo = repository_rule( + implementation = _metadata_repo_impl, + attrs = { + "prefix": attr.string(), + "whl": attr.label(), + }, +) From 927310c7a5736c539247e6d9877c6e5b67bb0cc4 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Sun, 10 Mar 2024 19:54:32 +0900 Subject: [PATCH 02/70] revert: use bazel 7.0.2 in rules_python --- .bazelversion | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.bazelversion b/.bazelversion index a8907c025d..66ce77b7ea 100644 --- a/.bazelversion +++ b/.bazelversion @@ -1 +1 @@ -7.0.2 +7.0.0 From 677ed2d7b1338e36ab534d2ec3c66e1abfd8a427 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Sun, 10 Mar 2024 23:53:41 +0900 Subject: [PATCH 03/70] fix(bzlmod): add WORKSPACE.bzlmod and actually use bzlmod locally --- MODULE.bazel | 5 +++-- WORKSPACE.bzlmod | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 WORKSPACE.bzlmod diff --git a/MODULE.bazel b/MODULE.bazel index 1f1a25dd16..c6c2d638e3 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -12,7 +12,7 @@ bazel_dep(name = "platforms", version = "0.0.4") bazel_dep(name = "rules_proto", version = "5.3.0-21.7") bazel_dep(name = "protobuf", version = "21.7", repo_name = "com_google_protobuf") -internal_deps = use_extension("@rules_python//python/private/bzlmod:internal_deps.bzl", "internal_deps") +internal_deps = use_extension("//python/private/bzlmod:internal_deps.bzl", "internal_deps") internal_deps.install() use_repo( internal_deps, @@ -38,7 +38,7 @@ use_repo( # We need to do another use_extension call to expose the "pythons_hub" # repo. -python = use_extension("@rules_python//python/extensions:python.bzl", "python") +python = use_extension("//python/extensions:python.bzl", "python") # The default toolchain to use if nobody configures a toolchain. # NOTE: This is not a stable version. It is provided for convenience, but will @@ -98,6 +98,7 @@ dev_pip.parse( python_version = "3.11", requirements_lock = "//docs/sphinx:requirements.txt", ) +use_repo(dev_pip, "dev_pip") bazel_binaries = use_extension( "@rules_bazel_integration_test//:extensions.bzl", diff --git a/WORKSPACE.bzlmod b/WORKSPACE.bzlmod new file mode 100644 index 0000000000..1cfe5811a2 --- /dev/null +++ b/WORKSPACE.bzlmod @@ -0,0 +1 @@ +workspace(name = "rules_python") From fa1569e770680782d5b69513d061d4953d40b1f2 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Mon, 11 Mar 2024 08:16:03 +0900 Subject: [PATCH 04/70] remove WORKSPACE.bzlmod, will add in a separate PR --- WORKSPACE.bzlmod | 1 - 1 file changed, 1 deletion(-) delete mode 100644 WORKSPACE.bzlmod diff --git a/WORKSPACE.bzlmod b/WORKSPACE.bzlmod deleted file mode 100644 index 1cfe5811a2..0000000000 --- a/WORKSPACE.bzlmod +++ /dev/null @@ -1 +0,0 @@ -workspace(name = "rules_python") From 8eb2ee5bd7dbb953d8a0b9f0b4c47eb0d8c0762e Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Mon, 11 Mar 2024 13:27:17 +0900 Subject: [PATCH 05/70] start cleaning up pypi_index --- MODULE.bazel | 4 + WORKSPACE.bzlmod | 99 ++++++++++++++++ python/private/auth.bzl | 2 +- python/private/bzlmod/pip.bzl | 9 +- python/private/bzlmod/pypi_index.bzl | 164 +++++++++++++++++---------- 5 files changed, 209 insertions(+), 69 deletions(-) create mode 100644 WORKSPACE.bzlmod diff --git a/MODULE.bazel b/MODULE.bazel index c6c2d638e3..ca3f29e921 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -61,6 +61,10 @@ use_repo(pypi_index, "pypi_index") bazel_dep(name = "stardoc", version = "0.6.2", dev_dependency = True, repo_name = "io_bazel_stardoc") bazel_dep(name = "rules_bazel_integration_test", version = "0.20.0", dev_dependency = True) +# Extra gazelle deps +bazel_dep(name = "rules_go", version = "0.41.0", dev_dependency = True, repo_name = "io_bazel_rules_go") +bazel_dep(name = "gazelle", version = "0.33.0", dev_dependency = True, repo_name = "bazel_gazelle") + # This call additionally only adds items to the `pypi_index` if we are # not ignoring dev dependencies, making it no-op for the regular usage. dev_pypi_index = use_extension( diff --git a/WORKSPACE.bzlmod b/WORKSPACE.bzlmod new file mode 100644 index 0000000000..b2023607fd --- /dev/null +++ b/WORKSPACE.bzlmod @@ -0,0 +1,99 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file contains everything that is needed when using bzlmod +workspace(name = "rules_python") + +load("//python:repositories.bzl", "python_register_multi_toolchains") +load("//python:versions.bzl", "MINOR_MAPPING") + +python_register_multi_toolchains( + name = "python", + default_version = MINOR_MAPPING.values()[-2], + python_versions = MINOR_MAPPING.values(), +) + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file") + +# Used for Bazel CI +http_archive( + name = "bazelci_rules", + sha256 = "eca21884e6f66a88c358e580fd67a6b148d30ab57b1680f62a96c00f9bc6a07e", + strip_prefix = "bazelci_rules-1.0.0", + url = "https://github.com/bazelbuild/continuous-integration/releases/download/rules-1.0.0/bazelci_rules-1.0.0.tar.gz", +) + +load("@bazelci_rules//:rbe_repo.bzl", "rbe_preconfig") + +# Creates a default toolchain config for RBE. +# Use this as is if you are using the rbe_ubuntu16_04 container, +# otherwise refer to RBE docs. +rbe_preconfig( + name = "buildkite_config", + toolchain = "ubuntu1804-bazel-java11", +) + +local_repository( + name = "rules_python_gazelle_plugin", + path = "gazelle", +) + +# The rules_python gazelle extension has some third-party go dependencies +# which we need to fetch in order to compile it. +load("@rules_python_gazelle_plugin//:deps.bzl", _py_gazelle_deps = "gazelle_deps") + +# See: https://github.com/bazelbuild/rules_python/blob/main/gazelle/README.md +# This rule loads and compiles various go dependencies that running gazelle +# for python requirements. +_py_gazelle_deps() + +# This interpreter is used for various rules_python dev-time tools +load("@python//3.11.8:defs.bzl", "interpreter") + +##################### +# Install twine for our own runfiles wheel publishing. +# Eventually we might want to install twine automatically for users too, see: +# https://github.com/bazelbuild/rules_python/issues/1016. +load("@rules_python//python:pip.bzl", "pip_parse") + +pip_parse( + name = "publish_deps", + python_interpreter_target = interpreter, + requirements_darwin = "//tools/publish:requirements_darwin.txt", + requirements_lock = "//tools/publish:requirements.txt", + requirements_windows = "//tools/publish:requirements_windows.txt", +) + +load("@publish_deps//:requirements.bzl", "install_deps") + +install_deps() + +##################### + +# This wheel is purely here to validate the wheel extraction code. It's not +# intended for anything else. +http_file( + name = "wheel_for_testing", + downloaded_file_path = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", + sha256 = "0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2", + urls = [ + "https://files.pythonhosted.org/packages/50/67/3e966d99a07d60a21a21d7ec016e9e4c2642a86fea251ec68677daf71d4d/numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", + ], +) + +# rules_proto expects //external:python_headers to point at the python headers. +bind( + name = "python_headers", + actual = "//python/cc:current_py_cc_headers", +) diff --git a/python/private/auth.bzl b/python/private/auth.bzl index 6020edd58c..2b067fd088 100644 --- a/python/private/auth.bzl +++ b/python/private/auth.bzl @@ -42,4 +42,4 @@ def get_auth(rctx, urls): else: netrc = read_user_netrc(rctx) - return use_netrc(netrc, urls, getattr(attr, "auth_patterns", None)) + return use_netrc(netrc, urls, getattr(attr, "auth_patterns", "")) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 97ae1a6f2e..71367fd049 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -206,14 +206,9 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): for dist in index_json if dist.filename.endswith(".whl") and version_segment in dist.filename ] - any_whls = [ - dist - for dist in whls - if dist.filename.endswith("-none-any.whl") or dist.filename.endswith("-abi3-any.whl") - ] - if len(any_whls) == len(whls) and len(whls) == 1: - whl_file = any_whls[0].label + if len(whls) == 1 and whls[0].filename.endswith("-any.whl"): + whl_file = whls[0].label else: whl_file = None diff --git a/python/private/bzlmod/pypi_index.bzl b/python/private/bzlmod/pypi_index.bzl index 81a471bcfb..c1d9ea1c34 100644 --- a/python/private/bzlmod/pypi_index.bzl +++ b/python/private/bzlmod/pypi_index.bzl @@ -84,6 +84,12 @@ load("//python/private:envsubst.bzl", "envsubst") load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:text_util.bzl", "render") +_PYPI_INDEX = "pypi_index" +_BUILD_TEMPLATE = """\ +package(default_visibility = ["//visibility:public"]) +exports_files(["{}"]) +""" + def _impl(module_ctx): want_packages = {} for mod in module_ctx.modules: @@ -108,7 +114,7 @@ def _impl(module_ctx): downloads = {} outs = {} for pkg, args in want_packages.items(): - outs[pkg] = module_ctx.path("pypi_index/{}.html".format(pkg)) + outs[pkg] = module_ctx.path("{}/{}.html".format(_PYPI_INDEX, pkg)) all_urls = list(args["urls"].keys()) downloads[pkg] = module_ctx.download( @@ -137,14 +143,17 @@ def _impl(module_ctx): content = module_ctx.read(outs[pkg]) # TODO @aignas 2024-03-08: pass in the index urls, so that we can correctly work - packages[pkg] = _get_packages(args["urls"].keys()[0].rpartition("/")[0], content, args["want_shas"]) - - prefix = "pypi_index" + packages[pkg] = _get_packages( + args["urls"].keys()[0].rpartition("/")[0], + pkg, + content, + args["want_shas"], + ) repos = {} for pkg, urls in packages.items(): for url in urls: - pkg_name = "{}__{}_{}".format(prefix, pkg, url.sha256) + pkg_name = "{}__{}_{}".format(_PYPI_INDEX, pkg, url.sha256) _archive_repo( name = pkg_name, urls = [url.url], @@ -161,19 +170,17 @@ def _impl(module_ctx): sha256 = url.metadata_sha256, ) elif url.filename.endswith(".whl"): - _metadata_repo( + _whl_metadata_repo( name = pkg_name + ".METADATA", - prefix = prefix, whl = "@{}//{}:{}".format( - prefix, + _PYPI_INDEX, pkg_name, url.filename, ), ) _hub( - name = prefix, - repo = prefix, + name = _PYPI_INDEX, repos = repos, ) @@ -196,14 +203,28 @@ def _get_packages_from_requirements(module_ctx, requirements_files): return want_packages -def _get_packages(index_url, content, want_shas): +def _get_packages(index_url, pkg, content, want_shas): + want_shas = {sha: True for sha in want_shas} packages = [] - for line in content.split("") filename, _, tail = tail.partition("<") @@ -228,8 +249,14 @@ def _get_packages(index_url, content, want_shas): ), ) - if len(packages) != len(want_shas): - fail("Could not get all of the shas") + if len(want_shas): + fail( + "Missing artifacts for '{}' with shas: {}\n{}".format( + pkg, + ", ".join(want_shas.keys()), + content, + ), + ) return packages @@ -250,8 +277,17 @@ pypi_index = module_extension( tag_classes = { "add_requirements": tag_class( attrs = { + "extra_index_urls": attr.string_list( + doc = """\ +Extra indexes to read for the given files. The indexes should support introspection via HTML simple API standard. + +See https://packaging.python.org/en/latest/specifications/simple-repository-api/ +""", + ), "index_url": attr.string( - doc = "We will substitute the env variable value PIP_INDEX_URL if present.", + doc = """\ +By default rules_python will use the env variable value of PIP_INDEX_URL if present. +""", default = "${PIP_INDEX_URL:-https://pypi.org/simple}", ), "srcs": attr.label_list(), @@ -273,7 +309,7 @@ def _hub_impl(repository_ctx): head, _, sha256 = repo.rpartition("_") _, _, pkg = head.rpartition("__") - prefix = repository_ctx.name[:-len(repository_ctx.attr.repo)] + prefix = repository_ctx.name[:-len(_PYPI_INDEX)] packages.setdefault(pkg, []).append( struct( sha256 = sha256, @@ -294,20 +330,19 @@ def _hub_impl(repository_ctx): repository_ctx.file( "{}/BUILD.bazel".format(pkg), "\n\n".join([ - """package(default_visibility = ["//visibility:public"])""", - """exports_files(["index.json"])""", + _BUILD_TEMPLATE.format("index.json"), ] + [ render.alias( name = r.filename, actual = repr(r.label), - visibility = ["//visibility:public"], + visibility = ["//visibility:private"], ) for r in filenames ] + [ render.alias( name = r.filename + ".METADATA", actual = repr(r.label.split("//:")[0] + ".METADATA//:METADATA"), - visibility = ["//visibility:public"], + visibility = ["//visibility:private"], ) for r in filenames if r.filename.endswith(".whl") @@ -315,60 +350,68 @@ def _hub_impl(repository_ctx): ) _hub = repository_rule( + doc = """\ +This hub repository allows for easy passing of wheel labels to the pip extension. + +The layout of this repo is similar to the simple API: +//:BUILD.bazel +// - normalized to rules_python scheme - lowercase snake-case) + :index.json - contains all labels in the bazel package + :BUILD.bazel - contains aliases to the repos created by the extension for easy + introspection using `bazel query`. Visibility is private for now. + Change it to `public` if needed. +""", implementation = _hub_impl, attrs = { - "repo": attr.string(mandatory = True), "repos": attr.string_dict(mandatory = True), }, ) def _archive_repo_impl(repository_ctx): filename = repository_ctx.attr.filename + repository_ctx.file("BUILD.bazel", _BUILD_TEMPLATE.format(filename)) + if repository_ctx.attr.file: repository_ctx.symlink(repository_ctx.path(repository_ctx.attr.file), filename) - else: - # Download the wheel using the downloader - result = repository_ctx.download( - url = repository_ctx.attr.urls, - output = filename, - auth = get_auth( - repository_ctx, - repository_ctx.attr.urls, - ), - ) + return - if not result.success: - fail(result) + result = repository_ctx.download( + url = repository_ctx.attr.urls, + output = filename, + auth = get_auth( + repository_ctx, + repository_ctx.attr.urls, + ), + ) - repository_ctx.file("BUILD.bazel", """\ -exports_files( - ["{}"], - visibility = ["//visibility:public"], -) -""".format(filename)) + if not result.success: + fail(result) _archive_repo = repository_rule( implementation = _archive_repo_impl, attrs = { - "file": attr.label(mandatory = False), + "file": attr.label( + doc = "Used for indexing wheels on the local filesystem", + allow_single_file = [".whl", ".tar.gz", ".zip"], + ), "filename": attr.string(mandatory = True), "sha256": attr.string(), "urls": attr.string_list(), }, ) -# this allows to work with other implementations of Indexes that do not serve METADATA like PyPI -# or with patched METADATA in patched and re-zipped wheels. -def _metadata_repo_impl(repository_ctx): +def _whl_metadata_repo_impl(repository_ctx): whl_label = repository_ctx.attr.whl - prefix = repository_ctx.attr.prefix - if whl_label.repo_name.endswith(prefix): - # Here we have a hub repo label which we need to rewrite to the thing that the label - # is pointing to. We can do this because we own everything - # - # NOTE @aignas 2024-03-08: if we see restarts, then it could mean that we are not constructing - # the right label here. - whl_label = Label("@@{}//:{}".format(repository_ctx.name[:-len(".METADATA")], whl_label.name)) + + if not whl_label.workspace_name.endswith(_PYPI_INDEX): + # Here we should have a hub repo label which we need to rewrite to the + # thing that the label is pointing to. We can do this because we own + # the construction of the labels. + fail("Expected the label to this rule to be from the '{}' hub repo".format(_PYPI_INDEX)) + + # NOTE @aignas 2024-03-08: if we see restarts, then it could mean that we are not constructing + # the right label as an input file. + whl_label = Label("@@{}//:{}".format(repository_ctx.name[:-len(".METADATA")], whl_label.name)) repository_ctx.symlink(repository_ctx.path(whl_label), "wheel.zip") repository_ctx.extract("wheel.zip") @@ -383,17 +426,16 @@ def _metadata_repo_impl(repository_ctx): fail("Could not find a METADATA file") repository_ctx.file("METADATA", content) - repository_ctx.file("BUILD.bazel", """\ -exports_files( - ["METADATA"], - visibility = ["//visibility:public"], -) -""") + repository_ctx.file("BUILD.bazel", _BUILD_TEMPLATE.format("METADATA")) + +_whl_metadata_repo = repository_rule( + doc = """Extract METADATA from a '.whl' file in repository context. -_metadata_repo = repository_rule( - implementation = _metadata_repo_impl, +This allows to work with other implementations of Indexes that do not serve +METADATA like PyPI or with patched METADATA in patched and re-zipped wheels. +""", + implementation = _whl_metadata_repo_impl, attrs = { - "prefix": attr.string(), - "whl": attr.label(), + "whl": attr.label(mandatory = True, allow_single_file = [".whl"]), }, ) From 11f94c79d69c17abd05c27525503ab62591dd846 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Mon, 11 Mar 2024 13:27:48 +0900 Subject: [PATCH 06/70] add WORKSPACE.bzlmod to distribution --- BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/BUILD.bazel b/BUILD.bazel index cd4cbc544a..c97f41dee2 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -35,6 +35,7 @@ filegroup( "BUILD.bazel", "MODULE.bazel", "WORKSPACE", + "WORKSPACE.bzlmod", "internal_deps.bzl", "internal_setup.bzl", "version.bzl", From 3efd55c00511b0686f2fff84bf14e3c60d32e10d Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Mon, 11 Mar 2024 13:29:55 +0900 Subject: [PATCH 07/70] chore: add rules_testing to MODULE.bazel --- MODULE.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/MODULE.bazel b/MODULE.bazel index ca3f29e921..d414042561 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -60,6 +60,7 @@ use_repo(pypi_index, "pypi_index") # ===== DEV ONLY DEPS AND SETUP BELOW HERE ===== bazel_dep(name = "stardoc", version = "0.6.2", dev_dependency = True, repo_name = "io_bazel_stardoc") bazel_dep(name = "rules_bazel_integration_test", version = "0.20.0", dev_dependency = True) +bazel_dep(name = "rules_testing", version = "0.5.0", dev_dependency = True) # Extra gazelle deps bazel_dep(name = "rules_go", version = "0.41.0", dev_dependency = True, repo_name = "io_bazel_rules_go") From 186a2c9ce8b675a57efe76621f74f02938e23eb3 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Mon, 11 Mar 2024 13:35:31 +0900 Subject: [PATCH 08/70] add rules_cc dev dep --- MODULE.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/MODULE.bazel b/MODULE.bazel index d414042561..9eb058477c 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -61,6 +61,7 @@ use_repo(pypi_index, "pypi_index") bazel_dep(name = "stardoc", version = "0.6.2", dev_dependency = True, repo_name = "io_bazel_stardoc") bazel_dep(name = "rules_bazel_integration_test", version = "0.20.0", dev_dependency = True) bazel_dep(name = "rules_testing", version = "0.5.0", dev_dependency = True) +bazel_dep(name = "rules_cc", version = "0.0.9", dev_dependency = True) # Extra gazelle deps bazel_dep(name = "rules_go", version = "0.41.0", dev_dependency = True, repo_name = "io_bazel_rules_go") From 506d83823a0611bef802281f8a0f719dd38c7c39 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Thu, 14 Mar 2024 09:39:12 +0900 Subject: [PATCH 09/70] Add a function for getting shas for simple API packages --- python/private/bzlmod/pypi_index.bzl | 34 ++------- python/private/pypi_index.bzl | 73 +++++++++++++++++++ tests/private/pypi_index/BUILD.bazel | 3 + tests/private/pypi_index/pypi_index_tests.bzl | 60 +++++++++++++++ 4 files changed, 144 insertions(+), 26 deletions(-) create mode 100644 python/private/pypi_index.bzl create mode 100644 tests/private/pypi_index/BUILD.bazel create mode 100644 tests/private/pypi_index/pypi_index_tests.bzl diff --git a/python/private/bzlmod/pypi_index.bzl b/python/private/bzlmod/pypi_index.bzl index c1d9ea1c34..d1d22b8e5a 100644 --- a/python/private/bzlmod/pypi_index.bzl +++ b/python/private/bzlmod/pypi_index.bzl @@ -78,10 +78,10 @@ $ bazel query @pypi_index//requests/... """ load("@bazel_features//:features.bzl", "bazel_features") -load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") load("//python/private:auth.bzl", "get_auth") load("//python/private:envsubst.bzl", "envsubst") load("//python/private:normalize_name.bzl", "normalize_name") +load("//python/private:pypi_index.bzl", "get_packages_from_requirements") load("//python/private:text_util.bzl", "render") _PYPI_INDEX = "pypi_index" @@ -91,7 +91,7 @@ exports_files(["{}"]) """ def _impl(module_ctx): - want_packages = {} + simpleapi_srcs = {} for mod in module_ctx.modules: for reqs in mod.tags.add_requirements: env_vars = ["PIP_INDEX_URL"] @@ -100,10 +100,11 @@ def _impl(module_ctx): env_vars, module_ctx.os.environ.get, ) - pkgs = _get_packages_from_requirements(module_ctx, reqs.srcs) - for pkg, want_shas in pkgs.items(): + requirements_files = [module_ctx.read(module_ctx.path(src)) for src in reqs.srcs] + sources = get_packages_from_requirements(requirements_files) + for pkg, want_shas in sources.simpleapi.items(): pkg = normalize_name(pkg) - entry = want_packages.setdefault(pkg, {"urls": {}, "want_shas": {}}) + entry = simpleapi_srcs.setdefault(pkg, {"urls": {}, "want_shas": {}}) entry["urls"]["{}/{}/".format(index_url.rstrip("/"), pkg)] = True entry["want_shas"].update(want_shas) @@ -113,7 +114,7 @@ def _impl(module_ctx): downloads = {} outs = {} - for pkg, args in want_packages.items(): + for pkg, args in simpleapi_srcs.items(): outs[pkg] = module_ctx.path("{}/{}.html".format(_PYPI_INDEX, pkg)) all_urls = list(args["urls"].keys()) @@ -132,7 +133,7 @@ def _impl(module_ctx): ) packages = {} - for pkg, args in want_packages.items(): + for pkg, args in simpleapi_srcs.items(): result = downloads[pkg] if download_kwargs.get("block") == False: result = result.wait() @@ -184,25 +185,6 @@ def _impl(module_ctx): repos = repos, ) -def _get_packages_from_requirements(module_ctx, requirements_files): - want_packages = {} - for file in requirements_files: - contents = module_ctx.read(module_ctx.path(file)) - parse_result = parse_requirements(contents) - for distribution, line in parse_result.requirements: - # NOTE @aignas 2024-03-08: this only supports Simple API, - # more complex cases may need to rely on the usual methods. - # - # if we don't have `sha256` values then we will not add this - # to our index. - want_packages.setdefault(distribution, {}).update({ - # TODO @aignas 2024-03-07: use sets - sha.strip(): True - for sha in line.split("--hash=sha256:")[1:] - }) - - return want_packages - def _get_packages(index_url, pkg, content, want_shas): want_shas = {sha: True for sha in want_shas} packages = [] diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl new file mode 100644 index 0000000000..2593a0d418 --- /dev/null +++ b/python/private/pypi_index.bzl @@ -0,0 +1,73 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +A file that houses private functions used in the `bzlmod` extension with the same name. + +The functions here should not depend on the `module_ctx` for easy unit testing. +""" + +load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") +load("//python/private:normalize_name.bzl", "normalize_name") + +def get_packages_from_requirements(requirements_files): + """Get Simple API sources from a list of requirements files and merge them. + + Args: + requirements_files(list[str]): A list of requirements files contents. + + Returns: + A struct with `simpleapi` attribute that contains a dict of normalized package + name to a list of shas that we should index. + """ + want_packages = {} + for contents in requirements_files: + parse_result = parse_requirements(contents) + for distribution, line in parse_result.requirements: + want_packages.setdefault(normalize_name(distribution), {}).update({ + # TODO @aignas 2024-03-07: use sets + sha: True + for sha in get_simpleapi_sources(line).shas + }) + + return struct( + simpleapi = want_packages, + ) + +def get_simpleapi_sources(line): + """Get PyPI sources from a requirements.txt line. + + We interpret the spec described in + https://pip.pypa.io/en/stable/reference/requirement-specifiers/#requirement-specifiers + + Args: + line(str): The requirements.txt entry. + + Returns: + A struct with shas attribute containing a list of shas to download from pypi_index. + """ + head, _, maybe_hashes = line.partition(";") + _, _, version = head.partition("==") + version = version.partition(" ")[0].strip() + + if "@" in head: + shas = [] + else: + maybe_hashes = maybe_hashes or line + shas = [ + sha.strip() + for sha in maybe_hashes.split("--hash=sha256:")[1:] + ] + + return struct(version = version, shas = sorted(shas)) diff --git a/tests/private/pypi_index/BUILD.bazel b/tests/private/pypi_index/BUILD.bazel new file mode 100644 index 0000000000..d365896cd3 --- /dev/null +++ b/tests/private/pypi_index/BUILD.bazel @@ -0,0 +1,3 @@ +load(":pypi_index_tests.bzl", "pypi_index_test_suite") + +pypi_index_test_suite(name = "pypi_index_tests") diff --git a/tests/private/pypi_index/pypi_index_tests.bzl b/tests/private/pypi_index/pypi_index_tests.bzl new file mode 100644 index 0000000000..4320c7c2c5 --- /dev/null +++ b/tests/private/pypi_index/pypi_index_tests.bzl @@ -0,0 +1,60 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"" + +load("@rules_testing//lib:test_suite.bzl", "test_suite") +load("//python/private:pypi_index.bzl", "get_simpleapi_sources") # buildifier: disable=bzl-visibility + +_tests = [] + +def _test_no_simple_api_sources(env): + inputs = [ + "foo==0.0.1", + "foo==0.0.1 @ https://someurl.org", + "foo==0.0.1 @ https://someurl.org --hash=sha256:deadbeef", + "foo==0.0.1 @ https://someurl.org; python_version < 2.7 --hash=sha256:deadbeef", + ] + for input in inputs: + got = get_simpleapi_sources(input) + env.expect.that_collection(got.shas).contains_exactly([]) + env.expect.that_str(got.version).equals("0.0.1") + +_tests.append(_test_no_simple_api_sources) + +def _test_simple_api_sources(env): + tests = { + "foo==0.0.2 --hash=sha256:deafbeef --hash=sha256:deadbeef": [ + "deadbeef", + "deafbeef", + ], + "foo[extra]==0.0.2; (python_version < 2.7 or something_else == \"@\") --hash=sha256:deafbeef --hash=sha256:deadbeef": [ + "deadbeef", + "deafbeef", + ], + } + for input, want_shas in tests.items(): + got = get_simpleapi_sources(input) + env.expect.that_collection(got.shas).contains_exactly(want_shas) + env.expect.that_str(got.version).equals("0.0.2") + +_tests.append(_test_simple_api_sources) + +def pypi_index_test_suite(name): + """Create the test suite. + + Args: + name: the name of the test suite + """ + test_suite(name = name, basic_tests = _tests) From 956213a576203050541fafb8cd51c97014f8b3da Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Thu, 14 Mar 2024 10:03:56 +0900 Subject: [PATCH 10/70] use a better detection if we should use the index --- python/private/bzlmod/pip.bzl | 34 +++++++++++++--------------- python/private/bzlmod/pypi_index.bzl | 2 -- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 71367fd049..ff6ccdb081 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -27,6 +27,7 @@ load( load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:parse_whl_name.bzl", "parse_whl_name") +load("//python/private:pypi_index.bzl", "get_simpleapi_sources") load("//python/private:render_pkg_aliases.bzl", "whl_alias") load("//python/private:version_label.bzl", "version_label") load(":pip_repository.bzl", "pip_repository") @@ -184,29 +185,26 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): pkg_pypi_index = pypi_index_repo.get_child(whl_name, "index.json") if not pkg_pypi_index.exists: - # The wheel index for a package does not exist, so not using bazel downloader... + # The index for a package does not exist, so not using bazel downloader... whl_file = None else: - # Ensure that we have a wheel for a particular version. - # FIXME @aignas 2024-03-10: Maybe the index structure should be: - # pypi_index//:index.json? - # - # We expect the `requirement_line to be of shape '== ...' - _, _, version_tail = requirement_line.partition("==") - version, _, _ = version_tail.partition(" ") - version_segment = "-{}-".format(version.strip("\" ")) - - index_json = [struct(**v) for v in json.decode(module_ctx.read(pkg_pypi_index))] - - # For now only use the whl_file if it is a cross-platform wheel. - # This is very conservative and does that only thing that we have - # in the whl list is the cross-platform wheel. + srcs = get_simpleapi_sources(requirement_line) + + index_json = { + v.sha256: v + for v in [ + struct(**encoded) + for encoded in json.decode(module_ctx.read(pkg_pypi_index)) + ] + } whls = [ - dist - for dist in index_json - if dist.filename.endswith(".whl") and version_segment in dist.filename + index_json[sha] + for sha in srcs.shas + if index_json[sha].filename.endswith(".whl") ] + # For now only use the bazel downloader only whl file is a + # cross-platform wheel. if len(whls) == 1 and whls[0].filename.endswith("-any.whl"): whl_file = whls[0].label else: diff --git a/python/private/bzlmod/pypi_index.bzl b/python/private/bzlmod/pypi_index.bzl index d1d22b8e5a..0849213d90 100644 --- a/python/private/bzlmod/pypi_index.bzl +++ b/python/private/bzlmod/pypi_index.bzl @@ -80,7 +80,6 @@ $ bazel query @pypi_index//requests/... load("@bazel_features//:features.bzl", "bazel_features") load("//python/private:auth.bzl", "get_auth") load("//python/private:envsubst.bzl", "envsubst") -load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:pypi_index.bzl", "get_packages_from_requirements") load("//python/private:text_util.bzl", "render") @@ -103,7 +102,6 @@ def _impl(module_ctx): requirements_files = [module_ctx.read(module_ctx.path(src)) for src in reqs.srcs] sources = get_packages_from_requirements(requirements_files) for pkg, want_shas in sources.simpleapi.items(): - pkg = normalize_name(pkg) entry = simpleapi_srcs.setdefault(pkg, {"urls": {}, "want_shas": {}}) entry["urls"]["{}/{}/".format(index_url.rstrip("/"), pkg)] = True entry["want_shas"].update(want_shas) From cdf0510bb74d1e49c1948f44d06e0c73a8c61ae8 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Thu, 14 Mar 2024 12:06:00 +0900 Subject: [PATCH 11/70] Use a better detection if we should use the index --- python/private/bzlmod/pypi_index.bzl | 311 ++++----------------------- python/private/pypi_index.bzl | 287 ++++++++++++++++++++++++ 2 files changed, 324 insertions(+), 274 deletions(-) diff --git a/python/private/bzlmod/pypi_index.bzl b/python/private/bzlmod/pypi_index.bzl index 0849213d90..496ad171cc 100644 --- a/python/private/bzlmod/pypi_index.bzl +++ b/python/private/bzlmod/pypi_index.bzl @@ -80,14 +80,14 @@ $ bazel query @pypi_index//requests/... load("@bazel_features//:features.bzl", "bazel_features") load("//python/private:auth.bzl", "get_auth") load("//python/private:envsubst.bzl", "envsubst") -load("//python/private:pypi_index.bzl", "get_packages_from_requirements") -load("//python/private:text_util.bzl", "render") +load( + "//python/private:pypi_index.bzl", + "create_spoke_repos", + "get_packages_from_requirements", + "pypi_index_hub", +) _PYPI_INDEX = "pypi_index" -_BUILD_TEMPLATE = """\ -package(default_visibility = ["//visibility:public"]) -exports_files(["{}"]) -""" def _impl(module_ctx): simpleapi_srcs = {} @@ -97,7 +97,7 @@ def _impl(module_ctx): index_url = envsubst( reqs.index_url, env_vars, - module_ctx.os.environ.get, + module_ctx.getenv if hasattr(module_ctx, "getenv") else module_ctx.os.environ.get, ) requirements_files = [module_ctx.read(module_ctx.path(src)) for src in reqs.srcs] sources = get_packages_from_requirements(requirements_files) @@ -111,146 +111,53 @@ def _impl(module_ctx): download_kwargs["block"] = False downloads = {} - outs = {} for pkg, args in simpleapi_srcs.items(): - outs[pkg] = module_ctx.path("{}/{}.html".format(_PYPI_INDEX, pkg)) + output = module_ctx.path("{}/{}.html".format(_PYPI_INDEX, pkg)) all_urls = list(args["urls"].keys()) - - downloads[pkg] = module_ctx.download( - url = all_urls, - output = outs[pkg], - auth = get_auth( - struct( - os = module_ctx.os, - path = module_ctx.path, - read = module_ctx.read, + downloads[pkg] = struct( + out = output, + urls = all_urls, + download = module_ctx.download( + url = all_urls, + output = output, + auth = get_auth( + # Simulate the repository_ctx so that `get_auth` works. + struct( + os = module_ctx.os, + path = module_ctx.path, + read = module_ctx.read, + ), + all_urls, ), - all_urls, + **download_kwargs ), - **download_kwargs ) - packages = {} + repos = {} for pkg, args in simpleapi_srcs.items(): - result = downloads[pkg] + download = downloads[pkg] + result = download.download if download_kwargs.get("block") == False: result = result.wait() if not result.success: - fail(result) - - content = module_ctx.read(outs[pkg]) - - # TODO @aignas 2024-03-08: pass in the index urls, so that we can correctly work - packages[pkg] = _get_packages( - args["urls"].keys()[0].rpartition("/")[0], - pkg, - content, - args["want_shas"], + fail("Failed to download from {}: {}".format(download.urls, result)) + + repos.update( + create_spoke_repos( + simple_api_urls = download.urls, + pkg = pkg, + html_contents = module_ctx.read(download.out), + want_shas = args["want_shas"], + prefix = _PYPI_INDEX, + ), ) - repos = {} - for pkg, urls in packages.items(): - for url in urls: - pkg_name = "{}__{}_{}".format(_PYPI_INDEX, pkg, url.sha256) - _archive_repo( - name = pkg_name, - urls = [url.url], - filename = url.filename, - sha256 = url.sha256, - ) - repos[pkg_name] = url.filename - - if url.metadata_sha256: - _archive_repo( - name = pkg_name + ".METADATA", - urls = [url.metadata_url], - filename = "METADATA", - sha256 = url.metadata_sha256, - ) - elif url.filename.endswith(".whl"): - _whl_metadata_repo( - name = pkg_name + ".METADATA", - whl = "@{}//{}:{}".format( - _PYPI_INDEX, - pkg_name, - url.filename, - ), - ) - - _hub( + pypi_index_hub( name = _PYPI_INDEX, repos = repos, ) -def _get_packages(index_url, pkg, content, want_shas): - want_shas = {sha: True for sha in want_shas} - packages = [] - lines = content.split("") - filename, _, tail = tail.partition("<") - - metadata_marker = "data-core-metadata=\"sha256=" - if metadata_marker in maybe_metadata: - # Implement https://peps.python.org/pep-0714/ - _, _, tail = maybe_metadata.partition(metadata_marker) - metadata_sha256, _, _ = tail.partition("\"") - metadata_url = url + ".metadata" - else: - metadata_sha256 = "" - metadata_url = "" - - packages.append( - struct( - filename = filename, - url = _absolute_urls(index_url, url), - sha256 = sha256, - metadata_sha256 = metadata_sha256, - metadata_url = metadata_url, - ), - ) - - if len(want_shas): - fail( - "Missing artifacts for '{}' with shas: {}\n{}".format( - pkg, - ", ".join(want_shas.keys()), - content, - ), - ) - - return packages - -def _absolute_urls(index_url, candidate): - if not candidate.startswith(".."): - return candidate - - candidate_parts = candidate.split("..") - last = candidate_parts[-1] - for _ in range(len(candidate_parts) - 1): - index_url, _, _ = index_url.rstrip("/").rpartition("/") - - return "{}/{}".format(index_url, last.strip("/")) - pypi_index = module_extension( doc = "", implementation = _impl, @@ -275,147 +182,3 @@ By default rules_python will use the env variable value of PIP_INDEX_URL if pres ), }, ) - -def _hub_impl(repository_ctx): - # This is so that calling the following in rules_python works: - # $ bazel query $pypi_index/... --ignore_dev_dependency - repository_ctx.file("BUILD.bazel", "") - - if not repository_ctx.attr.repos: - return - - packages = {} - for repo, filename in repository_ctx.attr.repos.items(): - head, _, sha256 = repo.rpartition("_") - _, _, pkg = head.rpartition("__") - - prefix = repository_ctx.name[:-len(_PYPI_INDEX)] - packages.setdefault(pkg, []).append( - struct( - sha256 = sha256, - filename = filename, - label = str(Label("@@{}{}//:{}".format(prefix, repo, filename))), - ), - ) - - for pkg, filenames in packages.items(): - # This contains the labels that should be used in the `pip` extension - # to get the labels that can be used by `whl_library`. - repository_ctx.file( - "{}/index.json".format(pkg), - json.encode(filenames), - ) - - # These labels should be used to be passed to `whl_library`. - repository_ctx.file( - "{}/BUILD.bazel".format(pkg), - "\n\n".join([ - _BUILD_TEMPLATE.format("index.json"), - ] + [ - render.alias( - name = r.filename, - actual = repr(r.label), - visibility = ["//visibility:private"], - ) - for r in filenames - ] + [ - render.alias( - name = r.filename + ".METADATA", - actual = repr(r.label.split("//:")[0] + ".METADATA//:METADATA"), - visibility = ["//visibility:private"], - ) - for r in filenames - if r.filename.endswith(".whl") - ]), - ) - -_hub = repository_rule( - doc = """\ -This hub repository allows for easy passing of wheel labels to the pip extension. - -The layout of this repo is similar to the simple API: -//:BUILD.bazel -// - normalized to rules_python scheme - lowercase snake-case) - :index.json - contains all labels in the bazel package - :BUILD.bazel - contains aliases to the repos created by the extension for easy - introspection using `bazel query`. Visibility is private for now. - Change it to `public` if needed. -""", - implementation = _hub_impl, - attrs = { - "repos": attr.string_dict(mandatory = True), - }, -) - -def _archive_repo_impl(repository_ctx): - filename = repository_ctx.attr.filename - repository_ctx.file("BUILD.bazel", _BUILD_TEMPLATE.format(filename)) - - if repository_ctx.attr.file: - repository_ctx.symlink(repository_ctx.path(repository_ctx.attr.file), filename) - return - - result = repository_ctx.download( - url = repository_ctx.attr.urls, - output = filename, - auth = get_auth( - repository_ctx, - repository_ctx.attr.urls, - ), - ) - - if not result.success: - fail(result) - -_archive_repo = repository_rule( - implementation = _archive_repo_impl, - attrs = { - "file": attr.label( - doc = "Used for indexing wheels on the local filesystem", - allow_single_file = [".whl", ".tar.gz", ".zip"], - ), - "filename": attr.string(mandatory = True), - "sha256": attr.string(), - "urls": attr.string_list(), - }, -) - -def _whl_metadata_repo_impl(repository_ctx): - whl_label = repository_ctx.attr.whl - - if not whl_label.workspace_name.endswith(_PYPI_INDEX): - # Here we should have a hub repo label which we need to rewrite to the - # thing that the label is pointing to. We can do this because we own - # the construction of the labels. - fail("Expected the label to this rule to be from the '{}' hub repo".format(_PYPI_INDEX)) - - # NOTE @aignas 2024-03-08: if we see restarts, then it could mean that we are not constructing - # the right label as an input file. - whl_label = Label("@@{}//:{}".format(repository_ctx.name[:-len(".METADATA")], whl_label.name)) - - repository_ctx.symlink(repository_ctx.path(whl_label), "wheel.zip") - repository_ctx.extract("wheel.zip") - - content = None - for p in repository_ctx.path(".").readdir(): - if p.basename.endswith(".dist-info"): - content = repository_ctx.read(p.get_child("METADATA")) - repository_ctx.delete(p) - - if content == None: - fail("Could not find a METADATA file") - - repository_ctx.file("METADATA", content) - repository_ctx.file("BUILD.bazel", _BUILD_TEMPLATE.format("METADATA")) - -_whl_metadata_repo = repository_rule( - doc = """Extract METADATA from a '.whl' file in repository context. - -This allows to work with other implementations of Indexes that do not serve -METADATA like PyPI or with patched METADATA in patched and re-zipped wheels. -""", - implementation = _whl_metadata_repo_impl, - attrs = { - "whl": attr.label(mandatory = True, allow_single_file = [".whl"]), - }, -) diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index 2593a0d418..d39c68a019 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -19,7 +19,16 @@ The functions here should not depend on the `module_ctx` for easy unit testing. """ load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") +load("//python/private:auth.bzl", "get_auth") load("//python/private:normalize_name.bzl", "normalize_name") +load("//python/private:text_util.bzl", "render") + +_BUILD_TEMPLATE = """\ +# generated by @rules_python//python/private:pypi_index.bzl + +package(default_visibility = ["//visibility:public"]) +exports_files(["{}"]) +""" def get_packages_from_requirements(requirements_files): """Get Simple API sources from a list of requirements files and merge them. @@ -71,3 +80,281 @@ def get_simpleapi_sources(line): ] return struct(version = version, shas = sorted(shas)) + +def create_spoke_repos(simple_api_urls, pkg, html_contents, want_shas, prefix): + """Create spoke repos for the hub repo. + + Args: + simple_api_urls(list[str]): The URLs that were used to download the + HTML contents. + pkg(str): The name of the package. + html_contents(str): The contents of the simple API index. + want_shas(list[str]): The shas that we expect to find in the simple API metadata. + prefix(str): The prefix of all spoke repos. + + Returns: + A dict with the created repository names and the whl filenames that + they download. Note, that extra `.METADATA` repos for each whl + are also created, but they will not be in the returned dictionary. + """ + repos = {} + urls = _get_packages( + simple_api_urls, + html_contents, + want_shas, + ) + + for url in urls: + pkg_name = "{}__{}_{}".format(prefix, pkg, url.sha256) + _pypi_archive( + name = pkg_name, + urls = [url.url], + filename = url.filename, + sha256 = url.sha256, + prefix = prefix, + ) + repos[pkg_name[len(prefix) + 2:]] = url.filename + + if url.metadata_sha256: + _pypi_archive( + name = pkg_name + ".METADATA", + urls = [url.metadata_url], + filename = "METADATA", + sha256 = url.metadata_sha256, + prefix = prefix, + ) + elif url.filename.endswith(".whl"): + _pypi_archive_metadata( + name = pkg_name + ".METADATA", + prefix = prefix, + whl = "@{}//{}:{}".format( + prefix, + pkg_name, + url.filename, + ), + ) + + return repos + +def _get_packages(index_urls, content, want_shas): + """Get the package URLs for given shas by parsing the Simple API HTML.""" + want_shas = {sha: True for sha in want_shas} + packages = [] + lines = content.split("= (2, 0): + # We don't expect to have version 2.0 here, but have this check in place just in case. + # https://packaging.python.org/en/latest/specifications/simple-repository-api/#versioning-pypi-s-simple-api + fail("Unsupported API version: {}".format(api_version)) + + for line in lines[1:]: + url, _, tail = line.partition("#sha256=") + sha256, _, tail = tail.partition("\"") + + if sha256 not in want_shas: + continue + elif "data-yanked" in line: + # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api + # + # For now we just fail and inform the user to relock the requirements with a + # different version. + fail("The package with '--hash=sha256:{}' was yanked, relock your requirements".format(sha256)) + else: + want_shas.pop(sha256) + + maybe_metadata, _, tail = tail.partition(">") + filename, _, tail = tail.partition("<") + + metadata_marker = "data-core-metadata=\"sha256=" + if metadata_marker in maybe_metadata: + # Implement https://peps.python.org/pep-0714/ + _, _, tail = maybe_metadata.partition(metadata_marker) + metadata_sha256, _, _ = tail.partition("\"") + metadata_url = url + ".metadata" + else: + metadata_sha256 = "" + metadata_url = "" + + packages.append( + struct( + filename = filename, + url = _absolute_urls(index_urls[0], url), + sha256 = sha256, + metadata_sha256 = metadata_sha256, + metadata_url = metadata_url, + ), + ) + + if len(want_shas): + fail( + "Indexes {} did not provide packages with all shas: {}".format( + index_urls, + ", ".join(want_shas.keys()), + ), + ) + + return packages + +def _absolute_urls(index_url, candidate): + if not candidate.startswith(".."): + return candidate + + candidate_parts = candidate.split("..") + last = candidate_parts[-1] + for _ in range(len(candidate_parts) - 1): + index_url, _, _ = index_url.rstrip("/").rpartition("/") + + return "{}/{}".format(index_url, last.strip("/")) + +def _hub_impl(rctx): + # This is so that calling the following in rules_python works: + # $ bazel query $pypi_index/... --ignore_dev_dependency + rctx.file("BUILD.bazel", "") + + if not rctx.attr.repos: + return + + packages = {} + for repo, filename in rctx.attr.repos.items(): + pkg, _, sha256 = repo.rpartition("_") + + packages.setdefault(pkg, []).append( + struct( + sha256 = sha256, + filename = filename, + label = str(Label("@@{}__{}//:{}".format(rctx.attr.name, repo, filename))), + ), + ) + + for pkg, filenames in packages.items(): + # This contains the labels that should be used in the `pip` extension + # to get the labels that can be used by `whl_library`. + rctx.file( + "{}/index.json".format(pkg), + json.encode(filenames), + ) + + # These labels should be used to be passed to `whl_library`. + rctx.file( + "{}/BUILD.bazel".format(pkg), + "\n\n".join([ + _BUILD_TEMPLATE.format("index.json"), + ] + [ + render.alias( + name = r.filename, + actual = repr(r.label), + visibility = ["//visibility:private"], + ) + for r in filenames + ] + [ + render.alias( + name = r.filename + ".METADATA", + actual = repr(r.label.split("//:")[0] + ".METADATA//:METADATA"), + visibility = ["//visibility:private"], + ) + for r in filenames + if r.filename.endswith(".whl") + ]), + ) + +pypi_index_hub = repository_rule( + doc = """\ +This hub repository allows for easy passing of wheel labels to the pip extension. + +The layout of this repo is similar to the simple API: +//:BUILD.bazel +// - normalized to rules_python scheme - lowercase snake-case) + :index.json - contains all labels in the bazel package + :BUILD.bazel - contains aliases to the repos created by the extension for easy + introspection using `bazel query`. Visibility is private for now. + Change it to `public` if needed. +""", + implementation = _hub_impl, + attrs = { + "repos": attr.string_dict(mandatory = True), + }, +) + +def _impl_archive(rctx): + filename = rctx.attr.filename + rctx.file("BUILD.bazel", _BUILD_TEMPLATE.format(filename)) + + if rctx.attr.file: + rctx.symlink(rctx.path(rctx.attr.file), filename) + return + + result = rctx.download( + url = rctx.attr.urls, + output = filename, + auth = get_auth( + rctx, + rctx.attr.urls, + ), + ) + + if not result.success: + fail(result) + +_pypi_archive = repository_rule( + implementation = _impl_archive, + attrs = { + "file": attr.label( + doc = "Used for indexing wheels on the local filesystem", + allow_single_file = [".whl", ".tar.gz", ".zip"], + ), + "filename": attr.string(mandatory = True), + "prefix": attr.string(mandatory = True), + "sha256": attr.string(), + "urls": attr.string_list(), + }, +) + +def _impl_metadata(rctx): + whl_label = rctx.attr.whl + prefix = rctx.attr.prefix + + if not whl_label.workspace_name.endswith(prefix): + # Here we should have a hub repo label which we need to rewrite to the + # thing that the label is pointing to. We can do this because we own + # the construction of the labels. + fail("Expected the label to this rule to be from the '{}' hub repo".format(prefix)) + + # NOTE @aignas 2024-03-08: if we see restarts, then it could mean that we are not constructing + # the right label as an input file. + whl_label = Label("@@{}//:{}".format(rctx.name[:-len(".METADATA")], whl_label.name)) + + rctx.symlink(rctx.path(whl_label), "wheel.zip") + rctx.extract("wheel.zip") + + content = None + for p in rctx.path(".").readdir(): + if p.basename.endswith(".dist-info"): + content = rctx.read(p.get_child("METADATA")) + rctx.delete(p) + + if content == None: + fail("Could not find a METADATA file") + + rctx.file("METADATA", content) + rctx.file("BUILD.bazel", _BUILD_TEMPLATE.format("METADATA")) + +_pypi_archive_metadata = repository_rule( + doc = """Extract METADATA from a '.whl' file in repository context. + +This allows to work with other implementations of Indexes that do not serve +METADATA like PyPI or with patched METADATA in patched and re-zipped wheels. +""", + implementation = _impl_metadata, + attrs = { + "prefix": attr.string(mandatory = True), + "whl": attr.label(mandatory = True, allow_single_file = [".whl"]), + }, +) From 6651bc928e9e58593e0fdbd83412feef148dc5c7 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Mon, 18 Mar 2024 22:28:10 +0900 Subject: [PATCH 12/70] docs: add pypi_index docs --- docs/sphinx/BUILD.bazel | 1 + python/extensions/BUILD.bazel | 7 +++++++ python/private/BUILD.bazel | 11 +++++++++++ python/private/bzlmod/BUILD.bazel | 12 ++++++++++++ python/private/pypi_index.bzl | 6 +++--- 5 files changed, 34 insertions(+), 3 deletions(-) diff --git a/docs/sphinx/BUILD.bazel b/docs/sphinx/BUILD.bazel index 8912f2cfb6..76ba21ea03 100644 --- a/docs/sphinx/BUILD.bazel +++ b/docs/sphinx/BUILD.bazel @@ -91,6 +91,7 @@ sphinx_stardocs( } if IS_BAZEL_7_OR_HIGHER else {}) | ({ # This depends on @pythons_hub, which is only created under bzlmod, "api/extensions/pip.md": "//python/extensions:pip_bzl", + "api/extensions/pypi_index.md": "//python/extensions:pypi_index_bzl", } if IS_BAZEL_7_OR_HIGHER and BZLMOD_ENABLED else {}), footer = "_stardoc_footer.md", tags = ["docs"], diff --git a/python/extensions/BUILD.bazel b/python/extensions/BUILD.bazel index a9dede44ec..b0dcae0f9c 100644 --- a/python/extensions/BUILD.bazel +++ b/python/extensions/BUILD.bazel @@ -31,6 +31,13 @@ bzl_library( deps = ["//python/private/bzlmod:pip_bzl"], ) +bzl_library( + name = "pypi_index_bzl", + srcs = ["pypi_index.bzl"], + visibility = ["//:__subpackages__"], + deps = ["//python/private/bzlmod:pypi_index_bzl"], +) + bzl_library( name = "python_bzl", srcs = ["python.bzl"], diff --git a/python/private/BUILD.bazel b/python/private/BUILD.bazel index 221c3b7a65..234d642f6a 100644 --- a/python/private/BUILD.bazel +++ b/python/private/BUILD.bazel @@ -110,6 +110,17 @@ bzl_library( srcs = ["parse_whl_name.bzl"], ) +bzl_library( + name = "pypi_index_bzl", + srcs = ["pypi_index.bzl"], + deps = [ + ":auth_bzl", + ":normalize_name_bzl", + ":text_util_bzl", + "//python/pip_install:requirements_parser_bzl", + ], +) + bzl_library( name = "py_cc_toolchain_bzl", srcs = [ diff --git a/python/private/bzlmod/BUILD.bazel b/python/private/bzlmod/BUILD.bazel index b636cca1a2..f5df9cebcb 100644 --- a/python/private/bzlmod/BUILD.bazel +++ b/python/private/bzlmod/BUILD.bazel @@ -32,6 +32,7 @@ bzl_library( ":pip_repository_bzl", "//python/pip_install:pip_repository_bzl", "//python/pip_install:requirements_parser_bzl", + "//python/private:pypi_index_bzl", "//python/private:full_version_bzl", "//python/private:normalize_name_bzl", "//python/private:parse_whl_name_bzl", @@ -57,6 +58,17 @@ bzl_library( ], ) +bzl_library( + name = "pypi_index_bzl", + srcs = ["pypi_index.bzl"], + deps = [ + ":bazel_features_bzl", + "//python/private:auth_bzl", + "//python/private:envsubst_bzl", + "//python/private:pypi_index_bzl", + ], +) + bzl_library( name = "python_bzl", srcs = ["python.bzl"], diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index d39c68a019..64c3589d7d 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -19,9 +19,9 @@ The functions here should not depend on the `module_ctx` for easy unit testing. """ load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") -load("//python/private:auth.bzl", "get_auth") -load("//python/private:normalize_name.bzl", "normalize_name") -load("//python/private:text_util.bzl", "render") +load(":auth.bzl", "get_auth") +load(":normalize_name.bzl", "normalize_name") +load(":text_util.bzl", "render") _BUILD_TEMPLATE = """\ # generated by @rules_python//python/private:pypi_index.bzl From 7d7abd1e6395de1ebdb2e665afa227cff5d44381 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Mon, 18 Mar 2024 22:32:16 +0900 Subject: [PATCH 13/70] Add CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e29f84d85..a3ecd50258 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,10 @@ A brief description of the categories of changes: * New Python versions available: `3.11.8`, `3.12.2` using https://github.com/indygreg/python-build-standalone/releases/tag/20240224. +* (bzlmod) New **experimental** `pypi_index` extension that can be used to + instruct the `pip.parse` tag class to use the bazel downloader to fetch + wheels. Note, the API is very unstable and may be changed at any time. + [0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0 ## [0.31.0] - 2024-02-12 From ec8dbc469230f3d7ec33b3063103ca6ce6982636 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Mon, 18 Mar 2024 22:33:35 +0900 Subject: [PATCH 14/70] fixup changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2aacb32d12..dd12bd6b77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ A brief description of the categories of changes: ## Unreleased +[0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0 + ### Changed ### Fixed @@ -32,12 +34,10 @@ A brief description of the categories of changes: * (gazelle) Added a new `python_default_visibility` directive to control the _default_ visibility of generated targets. See the [docs][python_default_visibility] for details. - * (bzlmod) New **experimental** `pypi_index` extension that can be used to instruct the `pip.parse` tag class to use the bazel downloader to fetch wheels. Note, the API is very unstable and may be changed at any time. -[0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0 [python_default_visibility]: gazelle/README.md#directive-python_default_visibility ### Changed From 6f7a42c49d8c07691a03489f271528cffb1199b8 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Thu, 21 Mar 2024 11:59:03 +0900 Subject: [PATCH 15/70] refactor to try a different pip.parse usage of the simple api --- python/private/bzlmod/pypi_index.bzl | 89 ++++++++++++++++------------ python/private/pypi_index.bzl | 21 ++++++- 2 files changed, 70 insertions(+), 40 deletions(-) diff --git a/python/private/bzlmod/pypi_index.bzl b/python/private/bzlmod/pypi_index.bzl index 496ad171cc..ea9001b36e 100644 --- a/python/private/bzlmod/pypi_index.bzl +++ b/python/private/bzlmod/pypi_index.bzl @@ -106,49 +106,14 @@ def _impl(module_ctx): entry["urls"]["{}/{}/".format(index_url.rstrip("/"), pkg)] = True entry["want_shas"].update(want_shas) - download_kwargs = {} - if bazel_features.external_deps.download_has_block_param: - download_kwargs["block"] = False - - downloads = {} - for pkg, args in simpleapi_srcs.items(): - output = module_ctx.path("{}/{}.html".format(_PYPI_INDEX, pkg)) - all_urls = list(args["urls"].keys()) - downloads[pkg] = struct( - out = output, - urls = all_urls, - download = module_ctx.download( - url = all_urls, - output = output, - auth = get_auth( - # Simulate the repository_ctx so that `get_auth` works. - struct( - os = module_ctx.os, - path = module_ctx.path, - read = module_ctx.read, - ), - all_urls, - ), - **download_kwargs - ), - ) - repos = {} - for pkg, args in simpleapi_srcs.items(): - download = downloads[pkg] - result = download.download - if download_kwargs.get("block") == False: - result = result.wait() - - if not result.success: - fail("Failed to download from {}: {}".format(download.urls, result)) - + for pkg, download in simpleapi_download(module_ctx, simpleapi_srcs).items(): repos.update( create_spoke_repos( simple_api_urls = download.urls, pkg = pkg, html_contents = module_ctx.read(download.out), - want_shas = args["want_shas"], + want_shas = simpleapi_srcs[pkg]["want_shas"], prefix = _PYPI_INDEX, ), ) @@ -182,3 +147,53 @@ By default rules_python will use the env variable value of PIP_INDEX_URL if pres ), }, ) + +def simpleapi_download(module_ctx, srcs): + """Download Simple API HTML. + + Args: + module_ctx: The bzlmod module_ctx. + srcs: The sources to download things for. + + Returns: + dict of pkg name to struct with download information containing + * urls - the URLs used for downloading the file. + * out - the output file to which HTML has been written. + """ + download_kwargs = {} + if bazel_features.external_deps.download_has_block_param: + download_kwargs["block"] = False + + downloads = {} + for pkg, args in srcs.items(): + output = module_ctx.path("{}/{}.html".format(_PYPI_INDEX, pkg)) + all_urls = list(args["urls"].keys()) + downloads[pkg] = struct( + out = output, + urls = all_urls, + download = module_ctx.download( + url = all_urls, + output = output, + auth = get_auth( + # Simulate the repository_ctx so that `get_auth` works. + struct( + os = module_ctx.os, + path = module_ctx.path, + read = module_ctx.read, + ), + all_urls, + ), + **download_kwargs + ), + ) + + for pkg, download in downloads.items(): + if download_kwargs.get("block") == False: + result = download.download.wait() + else: + result = download.download + + if not result.success: + fail("Failed to download from {}: {}".format(download.urls, result)) + + return downloads diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index 64c3589d7d..6704ac18ba 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -98,7 +98,7 @@ def create_spoke_repos(simple_api_urls, pkg, html_contents, want_shas, prefix): are also created, but they will not be in the returned dictionary. """ repos = {} - urls = _get_packages( + urls = get_packages( simple_api_urls, html_contents, want_shas, @@ -136,8 +136,23 @@ def create_spoke_repos(simple_api_urls, pkg, html_contents, want_shas, prefix): return repos -def _get_packages(index_urls, content, want_shas): - """Get the package URLs for given shas by parsing the Simple API HTML.""" +def get_packages(index_urls, content, want_shas): + """Get the package URLs for given shas by parsing the Simple API HTML. + + Args: + index_urls(list[str]): The URLs that the HTML content can be downloaded from. + content(str): The Simple API HTML content. + want_shas(list[str]): The list of shas that we need to get. + + Returns: + A list of structs with: + * filename: The filename of the artifact. + * url: The URL to download the artifact. + * sha256: The sha256 of the artifact. + * metadata_sha256: The whl METADATA sha256 if we can download it. If this is + present, then the 'metadata_url' is also present. Defaults to "". + * metadata_url: The URL for the METADATA if we can download it. Defaults to "". + """ want_shas = {sha: True for sha in want_shas} packages = [] lines = content.split(" Date: Thu, 21 Mar 2024 14:59:10 +0900 Subject: [PATCH 16/70] use downloader without an extra extension --- MODULE.bazel | 2 + examples/bzlmod/MODULE.bazel | 33 +++------- python/pip_install/pip_repository.bzl | 34 +++++++---- python/private/bzlmod/pip.bzl | 87 +++++++++++++++------------ 4 files changed, 83 insertions(+), 73 deletions(-) diff --git a/MODULE.bazel b/MODULE.bazel index 98dc5cd702..0d96fead76 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -90,6 +90,8 @@ dev_pip = use_extension( dev_dependency = True, ) dev_pip.parse( + envsubst = ["PIP_INDEX_URL"], + experimental_index_url = "${PIP_INDEX_URL:-https://pypi.org/simple}", experimental_requirement_cycles = { "sphinx": [ "sphinx", diff --git a/examples/bzlmod/MODULE.bazel b/examples/bzlmod/MODULE.bazel index 2b7a870d57..e53e01d29b 100644 --- a/examples/bzlmod/MODULE.bazel +++ b/examples/bzlmod/MODULE.bazel @@ -43,30 +43,6 @@ python.toolchain( # rules based on the `python_version` arg values. use_repo(python, "python_3_10", "python_3_9", "python_versions") -# This extension allows rules_python to optimize downloading for packages by checking -# for available artifacts on PyPI Simple API compatible mirrors. -pypi_index = use_extension("@rules_python//python/extensions:pypi_index.bzl", "pypi_index") -pypi_index.add_requirements( - srcs = [ - "//:requirements_lock_3_10.txt", - "//:requirements_lock_3_9.txt", - "//:requirements_windows_3_10.txt", - "//:requirements_windows_3_9.txt", - ], -) - -# We can also initialize the extension in dev mode. -dev_pypi_index = use_extension( - "@rules_python//python/extensions:pypi_index.bzl", - "pypi_index", - dev_dependency = True, -) -dev_pypi_index.add_requirements( - srcs = [ - "//tests/dupe_requirements:requirements.txt", - ], -) - # This extension allows a user to create modifications to how rules_python # creates different wheel repositories. Different attributes allow the user # to modify the BUILD file, and copy files. @@ -118,6 +94,11 @@ use_repo(pip, "whl_mods_hub") # Alternatively, `python_interpreter_target` can be used to directly specify # the Python interpreter to run to resolve dependencies. pip.parse( + # We can use `envsubst in the above + envsubst = ["PIP_INDEX_URL"], + # Use the bazel downloader to query the simple API for downloading the sources + # Note, that we can use envsubst for this value. + experimental_index_url = "${PIP_INDEX_URL:-https://pypi.org/simple}", experimental_requirement_cycles = { "sphinx": [ "sphinx", @@ -136,6 +117,10 @@ pip.parse( "cp39_linux_*", "cp39_*", ], + # TODO @aignas 2024-03-21: how do we handle overrides for the index urls per package? + # index_url_per_package = { + # foo: bar + # }, hub_name = "pip", python_version = "3.9", requirements_lock = "//:requirements_lock_3_9.txt", diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index a91311c822..24069cd52d 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -22,6 +22,7 @@ load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse load("//python/pip_install/private:generate_group_library_build_bazel.bzl", "generate_group_library_build_bazel") load("//python/pip_install/private:generate_whl_library_build_bazel.bzl", "generate_whl_library_build_bazel") load("//python/pip_install/private:srcs.bzl", "PIP_INSTALL_PY_SRCS") +load("//python/private:auth.bzl", "get_auth") load("//python/private:envsubst.bzl", "envsubst") load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:parse_whl_name.bzl", "parse_whl_name") @@ -765,14 +766,20 @@ def _whl_library_impl(rctx): # Manually construct the PYTHONPATH since we cannot use the toolchain here environment = _create_repository_execution_environment(rctx, python_interpreter) - if rctx.attr.whl_file: - whl_path = rctx.path(rctx.attr.whl_file) - if not whl_path.exists: - fail("The given whl '{}' does not exist".format(rctx.attr.whl_file)) + if rctx.attr.urls: + result = rctx.download( + url = rctx.attr.urls, + output = rctx.attr.filename, + auth = get_auth( + rctx, + rctx.attr.urls, + ), + ) + + if not result.success: + fail(result) - # Simulate the behaviour where the whl is present in the current directory. - rctx.symlink(whl_path, whl_path.basename) - whl_path = rctx.path(whl_path.basename) + whl_path = rctx.path(rctx.attr.filename) else: repo_utils.execute_checked( rctx, @@ -904,6 +911,9 @@ whl_library_attrs = { ), allow_files = True, ), + "filename": attr.string( + doc = "Download the whl file to this filename.", + ), "group_deps": attr.string_list( doc = "List of dependencies to skip in order to break the cycles within a dependency group.", default = [], @@ -919,11 +929,11 @@ whl_library_attrs = { mandatory = True, doc = "Python requirement string describing the package to make available", ), - "whl_file": attr.label( - doc = """\ -The wheel file label to be used for this installation. This will not use pip to download the -whl and instead use the supplied file. Note that the label needs to point to a single file. -""", + "sha256": attr.string( + doc = "The sha256 of the downloaded whl", + ), + "urls": attr.string_list( + doc = "The url of the whl to be downloaded using bazel downloader", ), "whl_patches": attr.label_keyed_string_dict( doc = """a label-keyed-string dict that has diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index ff6ccdb081..9b22b409f0 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -25,12 +25,14 @@ load( "whl_library", ) load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") +load("//python/private:envsubst.bzl", "envsubst") load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:parse_whl_name.bzl", "parse_whl_name") -load("//python/private:pypi_index.bzl", "get_simpleapi_sources") +load("//python/private:pypi_index.bzl", "get_packages", "get_packages_from_requirements", "get_simpleapi_sources") load("//python/private:render_pkg_aliases.bzl", "whl_alias") load("//python/private:version_label.bzl", "version_label") load(":pip_repository.bzl", "pip_repository") +load(":pypi_index.bzl", "simpleapi_download") def _parse_version(version): major, _, version = version.partition(".") @@ -102,8 +104,6 @@ You cannot use both the additive_build_content and additive_build_content_file a def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): python_interpreter_target = pip_attr.python_interpreter_target - pypi_index_repo = module_ctx.path(pip_attr._pypi_index_repo).dirname - # if we do not have the python_interpreter set in the attributes # we programmatically find it. hub_name = pip_attr.hub_name @@ -126,11 +126,11 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): hub_name, version_label(pip_attr.python_version), ) - requrements_lock = locked_requirements_label(module_ctx, pip_attr) + requirements_lock = locked_requirements_label(module_ctx, pip_attr) # Parse the requirements file directly in starlark to get the information # needed for the whl_libary declarations below. - requirements_lock_content = module_ctx.read(requrements_lock) + requirements_lock_content = module_ctx.read(requirements_lock) parse_result = parse_requirements(requirements_lock_content) # Replicate a surprising behavior that WORKSPACE builds allowed: @@ -173,6 +173,32 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): groups = pip_attr.experimental_requirement_cycles, ) + # TODO @aignas 2024-03-21: do this outside this function so that we can + # decrease the number of times we call the simple API. + index_urls = {} + if pip_attr.experimental_index_url: + index_url = envsubst( + pip_attr.experimental_index_url, + pip_attr.envsubst, + module_ctx.getenv if hasattr(module_ctx, "getenv") else module_ctx.os.environ.get, + ) + sources = get_packages_from_requirements([requirements_lock_content]) + simpleapi_srcs = {} + for pkg, want_shas in sources.simpleapi.items(): + entry = simpleapi_srcs.setdefault(pkg, {"urls": {}, "want_shas": {}}) + + # ensure that we have a trailing slash, because we will otherwise get redirects + # which may not work on private indexes with netrc authentication. + entry["urls"]["{}/{}/".format(index_url.rstrip("/"), pkg)] = True + entry["want_shas"].update(want_shas) + + for pkg, download in simpleapi_download(module_ctx, simpleapi_srcs).items(): + index_urls[pkg] = get_packages( + download.urls, + module_ctx.read(download.out), + want_shas = simpleapi_srcs[pkg]["want_shas"], + ) + # Create a new wheel library for each of the different whls for whl_name, requirement_line in requirements: # We are not using the "sanitized name" because the user @@ -183,38 +209,32 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): group_name = whl_group_mapping.get(whl_name) group_deps = requirement_cycles.get(group_name, []) - pkg_pypi_index = pypi_index_repo.get_child(whl_name, "index.json") - if not pkg_pypi_index.exists: - # The index for a package does not exist, so not using bazel downloader... - whl_file = None - else: + urls = [] + sha256 = None + filename = None + if index_urls: srcs = get_simpleapi_sources(requirement_line) - index_json = { - v.sha256: v - for v in [ - struct(**encoded) - for encoded in json.decode(module_ctx.read(pkg_pypi_index)) - ] - } whls = [ - index_json[sha] - for sha in srcs.shas - if index_json[sha].filename.endswith(".whl") + src + for src in index_urls[whl_name] + if src.sha256 in srcs.shas and src.filename.endswith(".whl") ] # For now only use the bazel downloader only whl file is a # cross-platform wheel. if len(whls) == 1 and whls[0].filename.endswith("-any.whl"): - whl_file = whls[0].label - else: - whl_file = None + urls.append(whls[0].url) + sha256 = whls[0].sha256 + filename = whls[0].filename repo_name = "{}_{}".format(pip_name, whl_name) whl_library( name = repo_name, requirement = requirement_line, - whl_file = whl_file, + filename = filename, + urls = urls, + sha256 = sha256, repo = pip_name, repo_prefix = pip_name + "_", annotation = annotation, @@ -405,6 +425,12 @@ def _pip_impl(module_ctx): def _pip_parse_ext_attrs(): attrs = dict({ + "experimental_index_url": attr.string( + doc = """\ +The index URL to use for downloading wheels using bazel downloader. This value is going +to be subject to `envsubst` substitutions if necessary. +""", + ), "hub_name": attr.string( mandatory = True, doc = """ @@ -445,19 +471,6 @@ a corresponding `python.toolchain()` configured. doc = """\ A dict of labels to wheel names that is typically generated by the whl_modifications. The labels are JSON config files describing the modifications. -""", - ), - "_pypi_index_repo": attr.label( - default = "@pypi_index//:BUILD.bazel", - doc = """\ -The label to the root of the pypi_index repository to be used for this particular -call of the `pip.parse`. This ensures that we can work with isolated usage of the -pip.parse tag class, where the user may want to also have the `pypi_index` usage -isolated as well. - -This also makes the code cleaner and ensures there are no cyclic dependencies. - -NOTE: For now this is internal and will be exposed if needed. """, ), }, **pip_repository_attrs) From 0313f75c2f2c1b4f104959f7782c243f7782d233 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Fri, 22 Mar 2024 18:01:46 +0900 Subject: [PATCH 17/70] refactor and cleanup --- python/pip_install/pip_repository.bzl | 32 +++-- python/private/bzlmod/pip.bzl | 32 +++-- python/private/bzlmod/pypi_index.bzl | 173 ++++---------------------- 3 files changed, 72 insertions(+), 165 deletions(-) diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index 24069cd52d..a92c8e5752 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -766,14 +766,24 @@ def _whl_library_impl(rctx): # Manually construct the PYTHONPATH since we cannot use the toolchain here environment = _create_repository_execution_environment(rctx, python_interpreter) - if rctx.attr.urls: + if rctx.attr.whl_file: + whl_path = rctx.path(rctx.attr.whl_file) + + # Simulate the behaviour where the whl is present in the current directory. + rctx.symlink(whl_path, whl_path.basename) + whl_path = rctx.path(whl_path.basename) + elif rctx.attr.urls: + filename = rctx.attr.filename + if not filename: + _, _, filename = rctx.attr.urls[0].rpartition("/") + if not (filename.endswith(".whl") or filename.endswith("tar.gz") or filename.endswith("zip")): + fail("'filename' needs to be provided when passing if it is not possible to auto-detect filename extension from URL:" + rctx.attr.urls[0]) + result = rctx.download( url = rctx.attr.urls, output = rctx.attr.filename, - auth = get_auth( - rctx, - rctx.attr.urls, - ), + sha256 = rctx.attr.sha256, + auth = get_auth(rctx, rctx.attr.urls), ) if not result.success: @@ -903,7 +913,8 @@ if __name__ == "__main__": ) return contents -whl_library_attrs = { +# NOTE @aignas 2024-03-21: The usage of dict({}, **common) ensures that all args to `dict` are unique +whl_library_attrs = dict({ "annotation": attr.label( doc = ( "Optional json encoded file containing annotation to apply to the extracted wheel. " + @@ -927,7 +938,7 @@ whl_library_attrs = { ), "requirement": attr.string( mandatory = True, - doc = "Python requirement string describing the package to make available", + doc = "Python requirement string describing the package to make available, if 'urls' or 'whl_file' is given, then this only needs to include foo[any_extras] as a bare minimum.", ), "sha256": attr.string( doc = "The sha256 of the downloaded whl", @@ -935,6 +946,9 @@ whl_library_attrs = { "urls": attr.string_list( doc = "The url of the whl to be downloaded using bazel downloader", ), + "whl_file": attr.label( + doc = "The whl file that should be used instead of downloading", + ), "whl_patches": attr.label_keyed_string_dict( doc = """a label-keyed-string dict that has json.encode(struct([whl_file], patch_strip]) as values. This @@ -955,9 +969,7 @@ whl_library_attrs = { for repo in all_requirements ], ), -} - -whl_library_attrs.update(**common_attrs) +}, **common_attrs) whl_library = repository_rule( attrs = whl_library_attrs, diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 9b22b409f0..4ddb9f5cfd 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -101,7 +101,7 @@ You cannot use both the additive_build_content and additive_build_content_file a whl_mods = whl_mods, ) -def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): +def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_cache): python_interpreter_target = pip_attr.python_interpreter_target # if we do not have the python_interpreter set in the attributes @@ -126,12 +126,21 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): hub_name, version_label(pip_attr.python_version), ) - requirements_lock = locked_requirements_label(module_ctx, pip_attr) # Parse the requirements file directly in starlark to get the information # needed for the whl_libary declarations below. - requirements_lock_content = module_ctx.read(requirements_lock) - parse_result = parse_requirements(requirements_lock_content) + requirements_locks = { + key: module_ctx.read(file) + for key, file in { + "default": pip_attr.requirements_lock, + "host": locked_requirements_label(module_ctx, pip_attr), + "linux": pip_attr.requirements_linux, + "osx": pip_attr.requirements_darwin, + "windows": pip_attr.requirements_windows, + }.items() + if file + } + parse_result = parse_requirements(requirements_locks["host"]) # Replicate a surprising behavior that WORKSPACE builds allowed: # Defining a repo with the same name multiple times, but only the last @@ -182,7 +191,7 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): pip_attr.envsubst, module_ctx.getenv if hasattr(module_ctx, "getenv") else module_ctx.os.environ.get, ) - sources = get_packages_from_requirements([requirements_lock_content]) + sources = get_packages_from_requirements(requirements_locks.values()) simpleapi_srcs = {} for pkg, want_shas in sources.simpleapi.items(): entry = simpleapi_srcs.setdefault(pkg, {"urls": {}, "want_shas": {}}) @@ -192,10 +201,10 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): entry["urls"]["{}/{}/".format(index_url.rstrip("/"), pkg)] = True entry["want_shas"].update(want_shas) - for pkg, download in simpleapi_download(module_ctx, simpleapi_srcs).items(): + for pkg, download in simpleapi_download(module_ctx, simpleapi_srcs, simpleapi_cache).items(): index_urls[pkg] = get_packages( download.urls, - module_ctx.read(download.out), + download.html, want_shas = simpleapi_srcs[pkg]["want_shas"], ) @@ -227,6 +236,9 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): urls.append(whls[0].url) sha256 = whls[0].sha256 filename = whls[0].filename + else: + pass + #print("Would use the following for {}: {}".format(whl_name, whls)) repo_name = "{}_{}".format(pip_name, whl_name) whl_library( @@ -375,6 +387,10 @@ def _pip_impl(module_ctx): # Where hub, whl, and pip are the repo names hub_whl_map = {} + # We don't use the `module_ctx.download` mechanisms because we don't want to persist + # this across the evaluations of the extension. + simpleapi_cache = {} + for mod in module_ctx.modules: for pip_attr in mod.tags.parse: hub_name = pip_attr.hub_name @@ -410,7 +426,7 @@ def _pip_impl(module_ctx): else: pip_hub_map[pip_attr.hub_name].python_versions.append(pip_attr.python_version) - _create_whl_repos(module_ctx, pip_attr, hub_whl_map, whl_overrides) + _create_whl_repos(module_ctx, pip_attr, hub_whl_map, whl_overrides, simpleapi_cache) for hub_name, whl_map in hub_whl_map.items(): pip_repository( diff --git a/python/private/bzlmod/pypi_index.bzl b/python/private/bzlmod/pypi_index.bzl index ea9001b36e..4b170205df 100644 --- a/python/private/bzlmod/pypi_index.bzl +++ b/python/private/bzlmod/pypi_index.bzl @@ -12,177 +12,47 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" -PyPI index reading extension. - -This allows us to translate the lock file to URLs and labels, that we can use to set up the -rest of the packages in the hub repos. This is created as a separate repository to allow -`pip.parse` to be used in an isolated mode. - -NOTE: for now the repos resulting from this extension are only supposed to be used in the -rules_python repository until this notice is removed. - -I want the usage to be: -```starlark -pypi_index = use_extension("@rules_python//python/extensions:pypi_index.bzl", "pypi_index") -pypi_index.from_requirements( - srcs = [ - "my_requirement", - ], -) -``` - -The main index URL can be overriden with an env var PIP_INDEX_URL by default. What is more, -the user should be able to specify specific package locations to be obtained from elsewhere. - -The most important thing to support would be to also support local wheel locations, where we -could read all of the wheels from a specific folder and construct the same repo. Like: -```starlark -pypi_index.from_dirs( - srcs = [ - "my_folder1", - "my_folder2", - ], -) -``` - -The implementation is left for a future PR. - -This can be later used by `pip` extension when constructing the `whl_library` hubs by passing -the right `whl_file` to the rule. - -This `pypi_index` extension provides labels for reading the `METADATA` from wheels and downloads -metadata only if the Simple API of the PyPI compatible mirror is exposing it. Otherwise, it -falls back to downloading the whl file and then extracting the `METADATA` file so that the users -of the artifacts created by the extension do not have to care about it being any different. -Whilst this may make the downloading of the whl METADATA somewhat slower, because it will be -in the repository cache, it may be a minor hit to the performance. - -The presence of this `METADATA` allows us to essentially get the full graph of the dependencies -within a `hub` repo and contract any dependency cycles in the future as is shown in the -`pypi_install` extension PR. - -Whilst this design has been crafted for `bzlmod`, we could in theory just port this back to -WORKSPACE without too many issues. - -If you do: -```console -$ bazel query @pypi_index//requests/... -@pypi_index//requests:requests-2.28.2-py3-none-any.whl -@pypi_index//requests:requests-2.28.2-py3-none-any.whl.METADATA -@pypi_index//requests:requests-2.28.2.tar.gz -@pypi_index//requests:requests-2.31.0-py3-none-any.whl -@pypi_index//requests:requests-2.31.0-py3-none-any.whl.METADATA -@pypi_index//requests:requests-2.31.0.tar.gz -``` -""" +"""TODO""" load("@bazel_features//:features.bzl", "bazel_features") load("//python/private:auth.bzl", "get_auth") -load("//python/private:envsubst.bzl", "envsubst") -load( - "//python/private:pypi_index.bzl", - "create_spoke_repos", - "get_packages_from_requirements", - "pypi_index_hub", -) - -_PYPI_INDEX = "pypi_index" -def _impl(module_ctx): - simpleapi_srcs = {} - for mod in module_ctx.modules: - for reqs in mod.tags.add_requirements: - env_vars = ["PIP_INDEX_URL"] - index_url = envsubst( - reqs.index_url, - env_vars, - module_ctx.getenv if hasattr(module_ctx, "getenv") else module_ctx.os.environ.get, - ) - requirements_files = [module_ctx.read(module_ctx.path(src)) for src in reqs.srcs] - sources = get_packages_from_requirements(requirements_files) - for pkg, want_shas in sources.simpleapi.items(): - entry = simpleapi_srcs.setdefault(pkg, {"urls": {}, "want_shas": {}}) - entry["urls"]["{}/{}/".format(index_url.rstrip("/"), pkg)] = True - entry["want_shas"].update(want_shas) - - repos = {} - for pkg, download in simpleapi_download(module_ctx, simpleapi_srcs).items(): - repos.update( - create_spoke_repos( - simple_api_urls = download.urls, - pkg = pkg, - html_contents = module_ctx.read(download.out), - want_shas = simpleapi_srcs[pkg]["want_shas"], - prefix = _PYPI_INDEX, - ), - ) - - pypi_index_hub( - name = _PYPI_INDEX, - repos = repos, - ) - -pypi_index = module_extension( - doc = "", - implementation = _impl, - tag_classes = { - "add_requirements": tag_class( - attrs = { - "extra_index_urls": attr.string_list( - doc = """\ -Extra indexes to read for the given files. The indexes should support introspection via HTML simple API standard. - -See https://packaging.python.org/en/latest/specifications/simple-repository-api/ -""", - ), - "index_url": attr.string( - doc = """\ -By default rules_python will use the env variable value of PIP_INDEX_URL if present. -""", - default = "${PIP_INDEX_URL:-https://pypi.org/simple}", - ), - "srcs": attr.label_list(), - }, - ), - }, -) - -def simpleapi_download(module_ctx, srcs): +def simpleapi_download(module_ctx, srcs, cache = None): """Download Simple API HTML. Args: module_ctx: The bzlmod module_ctx. srcs: The sources to download things for. + cache: A dictionary that can be used as a cache between calls during a + single evaluation of the extension. Returns: - dict of pkg name to struct with download information containing - * urls - the URLs used for downloading the file. - * out - the output file to which HTML has been written. + dict of pkg name to the HTML contents. """ download_kwargs = {} if bazel_features.external_deps.download_has_block_param: download_kwargs["block"] = False downloads = {} + contents = {} for pkg, args in srcs.items(): - output = module_ctx.path("{}/{}.html".format(_PYPI_INDEX, pkg)) + output = module_ctx.path("{}/{}.html".format("pypi_index", pkg)) all_urls = list(args["urls"].keys()) + cache_key = "" + if cache != None: + cache_key = ",".join(all_urls) + if cache_key in cache: + contents[pkg] = cache[cache_key] + continue + downloads[pkg] = struct( out = output, urls = all_urls, + cache_key = cache_key, download = module_ctx.download( url = all_urls, output = output, - auth = get_auth( - # Simulate the repository_ctx so that `get_auth` works. - struct( - os = module_ctx.os, - path = module_ctx.path, - read = module_ctx.read, - ), - all_urls, - ), + auth = get_auth(module_ctx, all_urls), **download_kwargs ), ) @@ -196,4 +66,13 @@ def simpleapi_download(module_ctx, srcs): if not result.success: fail("Failed to download from {}: {}".format(download.urls, result)) - return downloads + content = module_ctx.read(download.out) + contents[pkg] = struct( + html = content, + urls = download.all_urls, + ) + + if cache != None and download.cache_key: + cache[download.cache_key] = contents[pkg] + + return contents From 307819ea1dae745a817fbfcf275dd86edaf6396d Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Fri, 22 Mar 2024 18:05:10 +0900 Subject: [PATCH 18/70] wip --- .bazelrc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.bazelrc b/.bazelrc index 3f16396659..61fd0e7601 100644 --- a/.bazelrc +++ b/.bazelrc @@ -30,5 +30,4 @@ build:rtd --stamp # Some bzl files contain repos only available under bzlmod build:rtd --enable_bzlmod -# Disabled due to https://github.com/bazelbuild/bazel/issues/20942 -build --lockfile_mode=off +build --lockfile_mode=update From 6ca8a4bdc9d650a25ef2e82b21f8e0cc49ccd523 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Fri, 22 Mar 2024 18:14:10 +0900 Subject: [PATCH 19/70] fixup the cleanup --- python/private/bzlmod/pypi_index.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/private/bzlmod/pypi_index.bzl b/python/private/bzlmod/pypi_index.bzl index 4b170205df..ff213be16d 100644 --- a/python/private/bzlmod/pypi_index.bzl +++ b/python/private/bzlmod/pypi_index.bzl @@ -43,7 +43,7 @@ def simpleapi_download(module_ctx, srcs, cache = None): cache_key = ",".join(all_urls) if cache_key in cache: contents[pkg] = cache[cache_key] - continue + continue downloads[pkg] = struct( out = output, @@ -69,7 +69,7 @@ def simpleapi_download(module_ctx, srcs, cache = None): content = module_ctx.read(download.out) contents[pkg] = struct( html = content, - urls = download.all_urls, + urls = download.urls, ) if cache != None and download.cache_key: From d21a29e6ceedc405e3dc094be602b8498a3fb544 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Tue, 26 Mar 2024 13:01:24 +0900 Subject: [PATCH 20/70] cleanup --- python/extensions/pypi_index.bzl | 19 -- python/private/bzlmod/pip.bzl | 4 + python/private/bzlmod/pypi_index.bzl | 78 -------- python/private/pypi_index.bzl | 264 ++++++--------------------- 4 files changed, 62 insertions(+), 303 deletions(-) delete mode 100644 python/extensions/pypi_index.bzl delete mode 100644 python/private/bzlmod/pypi_index.bzl diff --git a/python/extensions/pypi_index.bzl b/python/extensions/pypi_index.bzl deleted file mode 100644 index f8a48d6a99..0000000000 --- a/python/extensions/pypi_index.bzl +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright 2024 The Bazel Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""See the doc in the implementation file.""" - -load("//python/private/bzlmod:pypi_index.bzl", _pypi_index = "pypi_index") - -pypi_index = _pypi_index diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 4ddb9f5cfd..0fa307e96f 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -221,6 +221,10 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca urls = [] sha256 = None filename = None + python = python_interpreter + if not python: + python = module_ctx.path(python_interpreter_target) + if index_urls: srcs = get_simpleapi_sources(requirement_line) diff --git a/python/private/bzlmod/pypi_index.bzl b/python/private/bzlmod/pypi_index.bzl deleted file mode 100644 index ff213be16d..0000000000 --- a/python/private/bzlmod/pypi_index.bzl +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright 2024 The Bazel Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""TODO""" - -load("@bazel_features//:features.bzl", "bazel_features") -load("//python/private:auth.bzl", "get_auth") - -def simpleapi_download(module_ctx, srcs, cache = None): - """Download Simple API HTML. - - Args: - module_ctx: The bzlmod module_ctx. - srcs: The sources to download things for. - cache: A dictionary that can be used as a cache between calls during a - single evaluation of the extension. - - Returns: - dict of pkg name to the HTML contents. - """ - download_kwargs = {} - if bazel_features.external_deps.download_has_block_param: - download_kwargs["block"] = False - - downloads = {} - contents = {} - for pkg, args in srcs.items(): - output = module_ctx.path("{}/{}.html".format("pypi_index", pkg)) - all_urls = list(args["urls"].keys()) - cache_key = "" - if cache != None: - cache_key = ",".join(all_urls) - if cache_key in cache: - contents[pkg] = cache[cache_key] - continue - - downloads[pkg] = struct( - out = output, - urls = all_urls, - cache_key = cache_key, - download = module_ctx.download( - url = all_urls, - output = output, - auth = get_auth(module_ctx, all_urls), - **download_kwargs - ), - ) - - for pkg, download in downloads.items(): - if download_kwargs.get("block") == False: - result = download.download.wait() - else: - result = download.download - - if not result.success: - fail("Failed to download from {}: {}".format(download.urls, result)) - - content = module_ctx.read(download.out) - contents[pkg] = struct( - html = content, - urls = download.urls, - ) - - if cache != None and download.cache_key: - cache[download.cache_key] = contents[pkg] - - return contents diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index 6704ac18ba..58f17b4a53 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -21,14 +21,66 @@ The functions here should not depend on the `module_ctx` for easy unit testing. load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") load(":auth.bzl", "get_auth") load(":normalize_name.bzl", "normalize_name") -load(":text_util.bzl", "render") -_BUILD_TEMPLATE = """\ -# generated by @rules_python//python/private:pypi_index.bzl +def simpleapi_download(module_ctx, srcs, cache = None): + """Download Simple API HTML. -package(default_visibility = ["//visibility:public"]) -exports_files(["{}"]) -""" + Args: + module_ctx: The bzlmod module_ctx. + srcs: The sources to download things for. + cache: A dictionary that can be used as a cache between calls during a + single evaluation of the extension. + + Returns: + dict of pkg name to the HTML contents. + """ + download_kwargs = {} + if bazel_features.external_deps.download_has_block_param: + download_kwargs["block"] = False + + downloads = {} + contents = {} + for pkg, args in srcs.items(): + output = module_ctx.path("{}/{}.html".format("pypi_index", pkg)) + all_urls = list(args["urls"].keys()) + cache_key = "" + if cache != None: + cache_key = ",".join(all_urls) + if cache_key in cache: + contents[pkg] = cache[cache_key] + continue + + downloads[pkg] = struct( + out = output, + urls = all_urls, + cache_key = cache_key, + download = module_ctx.download( + url = all_urls, + output = output, + auth = get_auth(module_ctx, all_urls), + **download_kwargs + ), + ) + + for pkg, download in downloads.items(): + if download_kwargs.get("block") == False: + result = download.download.wait() + else: + result = download.download + + if not result.success: + fail("Failed to download from {}: {}".format(download.urls, result)) + + content = module_ctx.read(download.out) + contents[pkg] = struct( + html = content, + urls = download.urls, + ) + + if cache != None and download.cache_key: + cache[download.cache_key] = contents[pkg] + + return contents def get_packages_from_requirements(requirements_files): """Get Simple API sources from a list of requirements files and merge them. @@ -81,61 +133,6 @@ def get_simpleapi_sources(line): return struct(version = version, shas = sorted(shas)) -def create_spoke_repos(simple_api_urls, pkg, html_contents, want_shas, prefix): - """Create spoke repos for the hub repo. - - Args: - simple_api_urls(list[str]): The URLs that were used to download the - HTML contents. - pkg(str): The name of the package. - html_contents(str): The contents of the simple API index. - want_shas(list[str]): The shas that we expect to find in the simple API metadata. - prefix(str): The prefix of all spoke repos. - - Returns: - A dict with the created repository names and the whl filenames that - they download. Note, that extra `.METADATA` repos for each whl - are also created, but they will not be in the returned dictionary. - """ - repos = {} - urls = get_packages( - simple_api_urls, - html_contents, - want_shas, - ) - - for url in urls: - pkg_name = "{}__{}_{}".format(prefix, pkg, url.sha256) - _pypi_archive( - name = pkg_name, - urls = [url.url], - filename = url.filename, - sha256 = url.sha256, - prefix = prefix, - ) - repos[pkg_name[len(prefix) + 2:]] = url.filename - - if url.metadata_sha256: - _pypi_archive( - name = pkg_name + ".METADATA", - urls = [url.metadata_url], - filename = "METADATA", - sha256 = url.metadata_sha256, - prefix = prefix, - ) - elif url.filename.endswith(".whl"): - _pypi_archive_metadata( - name = pkg_name + ".METADATA", - prefix = prefix, - whl = "@{}//{}:{}".format( - prefix, - pkg_name, - url.filename, - ), - ) - - return repos - def get_packages(index_urls, content, want_shas): """Get the package URLs for given shas by parsing the Simple API HTML. @@ -228,148 +225,3 @@ def _absolute_urls(index_url, candidate): index_url, _, _ = index_url.rstrip("/").rpartition("/") return "{}/{}".format(index_url, last.strip("/")) - -def _hub_impl(rctx): - # This is so that calling the following in rules_python works: - # $ bazel query $pypi_index/... --ignore_dev_dependency - rctx.file("BUILD.bazel", "") - - if not rctx.attr.repos: - return - - packages = {} - for repo, filename in rctx.attr.repos.items(): - pkg, _, sha256 = repo.rpartition("_") - - packages.setdefault(pkg, []).append( - struct( - sha256 = sha256, - filename = filename, - label = str(Label("@@{}__{}//:{}".format(rctx.attr.name, repo, filename))), - ), - ) - - for pkg, filenames in packages.items(): - # This contains the labels that should be used in the `pip` extension - # to get the labels that can be used by `whl_library`. - rctx.file( - "{}/index.json".format(pkg), - json.encode(filenames), - ) - - # These labels should be used to be passed to `whl_library`. - rctx.file( - "{}/BUILD.bazel".format(pkg), - "\n\n".join([ - _BUILD_TEMPLATE.format("index.json"), - ] + [ - render.alias( - name = r.filename, - actual = repr(r.label), - visibility = ["//visibility:private"], - ) - for r in filenames - ] + [ - render.alias( - name = r.filename + ".METADATA", - actual = repr(r.label.split("//:")[0] + ".METADATA//:METADATA"), - visibility = ["//visibility:private"], - ) - for r in filenames - if r.filename.endswith(".whl") - ]), - ) - -pypi_index_hub = repository_rule( - doc = """\ -This hub repository allows for easy passing of wheel labels to the pip extension. - -The layout of this repo is similar to the simple API: -//:BUILD.bazel -// - normalized to rules_python scheme - lowercase snake-case) - :index.json - contains all labels in the bazel package - :BUILD.bazel - contains aliases to the repos created by the extension for easy - introspection using `bazel query`. Visibility is private for now. - Change it to `public` if needed. -""", - implementation = _hub_impl, - attrs = { - "repos": attr.string_dict(mandatory = True), - }, -) - -def _impl_archive(rctx): - filename = rctx.attr.filename - rctx.file("BUILD.bazel", _BUILD_TEMPLATE.format(filename)) - - if rctx.attr.file: - rctx.symlink(rctx.path(rctx.attr.file), filename) - return - - result = rctx.download( - url = rctx.attr.urls, - output = filename, - auth = get_auth( - rctx, - rctx.attr.urls, - ), - ) - - if not result.success: - fail(result) - -_pypi_archive = repository_rule( - implementation = _impl_archive, - attrs = { - "file": attr.label( - doc = "Used for indexing wheels on the local filesystem", - allow_single_file = [".whl", ".tar.gz", ".zip"], - ), - "filename": attr.string(mandatory = True), - "prefix": attr.string(mandatory = True), - "sha256": attr.string(), - "urls": attr.string_list(), - }, -) - -def _impl_metadata(rctx): - whl_label = rctx.attr.whl - prefix = rctx.attr.prefix - - if not whl_label.workspace_name.endswith(prefix): - # Here we should have a hub repo label which we need to rewrite to the - # thing that the label is pointing to. We can do this because we own - # the construction of the labels. - fail("Expected the label to this rule to be from the '{}' hub repo".format(prefix)) - - # NOTE @aignas 2024-03-08: if we see restarts, then it could mean that we are not constructing - # the right label as an input file. - whl_label = Label("@@{}//:{}".format(rctx.name[:-len(".METADATA")], whl_label.name)) - - rctx.symlink(rctx.path(whl_label), "wheel.zip") - rctx.extract("wheel.zip") - - content = None - for p in rctx.path(".").readdir(): - if p.basename.endswith(".dist-info"): - content = rctx.read(p.get_child("METADATA")) - rctx.delete(p) - - if content == None: - fail("Could not find a METADATA file") - - rctx.file("METADATA", content) - rctx.file("BUILD.bazel", _BUILD_TEMPLATE.format("METADATA")) - -_pypi_archive_metadata = repository_rule( - doc = """Extract METADATA from a '.whl' file in repository context. - -This allows to work with other implementations of Indexes that do not serve -METADATA like PyPI or with patched METADATA in patched and re-zipped wheels. -""", - implementation = _impl_metadata, - attrs = { - "prefix": attr.string(mandatory = True), - "whl": attr.label(mandatory = True, allow_single_file = [".whl"]), - }, -) From affbb32c1c2f59a26e7f1162cd7902ca01c05000 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Tue, 26 Mar 2024 13:03:32 +0900 Subject: [PATCH 21/70] fixup! cleanup --- python/private/bzlmod/pip.bzl | 7 +------ python/private/pypi_index.bzl | 1 + 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 0fa307e96f..858a55d656 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -28,11 +28,10 @@ load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse load("//python/private:envsubst.bzl", "envsubst") load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:parse_whl_name.bzl", "parse_whl_name") -load("//python/private:pypi_index.bzl", "get_packages", "get_packages_from_requirements", "get_simpleapi_sources") +load("//python/private:pypi_index.bzl", "get_packages", "get_packages_from_requirements", "get_simpleapi_sources", "simpleapi_download") load("//python/private:render_pkg_aliases.bzl", "whl_alias") load("//python/private:version_label.bzl", "version_label") load(":pip_repository.bzl", "pip_repository") -load(":pypi_index.bzl", "simpleapi_download") def _parse_version(version): major, _, version = version.partition(".") @@ -221,10 +220,6 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca urls = [] sha256 = None filename = None - python = python_interpreter - if not python: - python = module_ctx.path(python_interpreter_target) - if index_urls: srcs = get_simpleapi_sources(requirement_line) diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index 58f17b4a53..a8cb52cd39 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -18,6 +18,7 @@ A file that houses private functions used in the `bzlmod` extension with the sam The functions here should not depend on the `module_ctx` for easy unit testing. """ +load("@bazel_features//:features.bzl", "bazel_features") load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") load(":auth.bzl", "get_auth") load(":normalize_name.bzl", "normalize_name") From 2da0b2906e4e8ecf2fa3df2cc976e8a9b05e0303 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Tue, 26 Mar 2024 13:09:47 +0900 Subject: [PATCH 22/70] fixup! fixup! cleanup --- docs/sphinx/BUILD.bazel | 1 - python/extensions/BUILD.bazel | 7 ------- python/private/BUILD.bazel | 1 + python/private/bzlmod/BUILD.bazel | 11 ----------- 4 files changed, 1 insertion(+), 19 deletions(-) diff --git a/docs/sphinx/BUILD.bazel b/docs/sphinx/BUILD.bazel index 76ba21ea03..8912f2cfb6 100644 --- a/docs/sphinx/BUILD.bazel +++ b/docs/sphinx/BUILD.bazel @@ -91,7 +91,6 @@ sphinx_stardocs( } if IS_BAZEL_7_OR_HIGHER else {}) | ({ # This depends on @pythons_hub, which is only created under bzlmod, "api/extensions/pip.md": "//python/extensions:pip_bzl", - "api/extensions/pypi_index.md": "//python/extensions:pypi_index_bzl", } if IS_BAZEL_7_OR_HIGHER and BZLMOD_ENABLED else {}), footer = "_stardoc_footer.md", tags = ["docs"], diff --git a/python/extensions/BUILD.bazel b/python/extensions/BUILD.bazel index b0dcae0f9c..a9dede44ec 100644 --- a/python/extensions/BUILD.bazel +++ b/python/extensions/BUILD.bazel @@ -31,13 +31,6 @@ bzl_library( deps = ["//python/private/bzlmod:pip_bzl"], ) -bzl_library( - name = "pypi_index_bzl", - srcs = ["pypi_index.bzl"], - visibility = ["//:__subpackages__"], - deps = ["//python/private/bzlmod:pypi_index_bzl"], -) - bzl_library( name = "python_bzl", srcs = ["python.bzl"], diff --git a/python/private/BUILD.bazel b/python/private/BUILD.bazel index b048a6ea83..1bcdc6f314 100644 --- a/python/private/BUILD.bazel +++ b/python/private/BUILD.bazel @@ -127,6 +127,7 @@ bzl_library( ":normalize_name_bzl", ":text_util_bzl", "//python/pip_install:requirements_parser_bzl", + "//python/private/bzlmod:bazel_features_bzl", ], ) diff --git a/python/private/bzlmod/BUILD.bazel b/python/private/bzlmod/BUILD.bazel index f5df9cebcb..8a4bf1a724 100644 --- a/python/private/bzlmod/BUILD.bazel +++ b/python/private/bzlmod/BUILD.bazel @@ -58,17 +58,6 @@ bzl_library( ], ) -bzl_library( - name = "pypi_index_bzl", - srcs = ["pypi_index.bzl"], - deps = [ - ":bazel_features_bzl", - "//python/private:auth_bzl", - "//python/private:envsubst_bzl", - "//python/private:pypi_index_bzl", - ], -) - bzl_library( name = "python_bzl", srcs = ["python.bzl"], From 5ba7881e8b00d42c01d2cfb9c6aff5457143f6a2 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Thu, 28 Mar 2024 13:04:05 +0900 Subject: [PATCH 23/70] wip --- .bazelversion | 2 +- MODULE.bazel | 4 +- python/private/bzlmod/pip.bzl | 89 +++++++++------ python/private/pypi_index.bzl | 145 ++++++++++++++++++------ python/private/whl_target_platforms.bzl | 62 ++++++++++ 5 files changed, 227 insertions(+), 75 deletions(-) diff --git a/.bazelversion b/.bazelversion index 66ce77b7ea..21c8c7b46b 100644 --- a/.bazelversion +++ b/.bazelversion @@ -1 +1 @@ -7.0.0 +7.1.1 diff --git a/MODULE.bazel b/MODULE.bazel index 8a8a5dd925..f8d1b30330 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -6,7 +6,7 @@ module( bazel_dep(name = "bazel_features", version = "1.9.0") bazel_dep(name = "bazel_skylib", version = "1.3.0") -bazel_dep(name = "platforms", version = "0.0.4") +bazel_dep(name = "platforms", version = "0.0.9") # Those are loaded only when using py_proto_library bazel_dep(name = "rules_proto", version = "5.3.0-21.7") @@ -99,6 +99,8 @@ dev_pip.parse( dev_pip.parse( hub_name = "publish_deps", python_version = "3.11", + experimental_index_url = "${PIP_INDEX_URL:-https://pypi.org/simple}", + envsubst = ["PIP_INDEX_URL"], requirements_darwin = "//tools/publish:requirements_darwin.txt", requirements_lock = "//tools/publish:requirements.txt", requirements_windows = "//tools/publish:requirements_windows.txt", diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 858a55d656..8cea83348b 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -15,6 +15,7 @@ "pip module extension for use with bzlmod" load("@bazel_features//:features.bzl", "bazel_features") +load("@platforms//host:constraints.bzl", "HOST_CONSTRAINTS") load("@pythons_hub//:interpreters.bzl", "DEFAULT_PYTHON_VERSION", "INTERPRETER_LABELS") load( "//python/pip_install:pip_repository.bzl", @@ -26,6 +27,7 @@ load( ) load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") load("//python/private:envsubst.bzl", "envsubst") +load("//python/private:whl_target_platforms.bzl", "select_whl") load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:parse_whl_name.bzl", "parse_whl_name") load("//python/private:pypi_index.bzl", "get_packages", "get_packages_from_requirements", "get_simpleapi_sources", "simpleapi_download") @@ -185,27 +187,29 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca # decrease the number of times we call the simple API. index_urls = {} if pip_attr.experimental_index_url: - index_url = envsubst( - pip_attr.experimental_index_url, - pip_attr.envsubst, - module_ctx.getenv if hasattr(module_ctx, "getenv") else module_ctx.os.environ.get, + index_urls = simpleapi_download( + module_ctx, + index_url = pip_attr.experimental_index_url, + # TODO @aignas 2024-03-28: support index overrides for specific packages + # We should never attempt to join index contents ourselves. + index_url_overrides = pip_attr.experimental_index_url_overrides, + sources = requirements_locks.values(), + envsubst = pip_attr.envsubst, + cache =simpleapi_cache, ) - sources = get_packages_from_requirements(requirements_locks.values()) - simpleapi_srcs = {} - for pkg, want_shas in sources.simpleapi.items(): - entry = simpleapi_srcs.setdefault(pkg, {"urls": {}, "want_shas": {}}) - - # ensure that we have a trailing slash, because we will otherwise get redirects - # which may not work on private indexes with netrc authentication. - entry["urls"]["{}/{}/".format(index_url.rstrip("/"), pkg)] = True - entry["want_shas"].update(want_shas) - - for pkg, download in simpleapi_download(module_ctx, simpleapi_srcs, simpleapi_cache).items(): - index_urls[pkg] = get_packages( - download.urls, - download.html, - want_shas = simpleapi_srcs[pkg]["want_shas"], - ) + + major_minor = _major_minor_version(pip_attr.python_version) + + host_cpu, host_os = None, None + for constraint in HOST_CONSTRAINTS: + if "@platforms//cpu:" in constraint: + _, _, host_cpu = constraint.partition(":") + elif "@platforms//os:" in constraint: + _, _, host_os = constraint.partition(":") + + if not (host_os and host_cpu): + fail("Don't have host information") + # Create a new wheel library for each of the different whls for whl_name, requirement_line in requirements: @@ -220,24 +224,26 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca urls = [] sha256 = None filename = None + extra_whl_pip_args = extra_pip_args if index_urls: srcs = get_simpleapi_sources(requirement_line) - whls = [ - src - for src in index_urls[whl_name] - if src.sha256 in srcs.shas and src.filename.endswith(".whl") - ] - - # For now only use the bazel downloader only whl file is a - # cross-platform wheel. - if len(whls) == 1 and whls[0].filename.endswith("-any.whl"): - urls.append(whls[0].url) - sha256 = whls[0].sha256 - filename = whls[0].filename - else: - pass - #print("Would use the following for {}: {}".format(whl_name, whls)) + whl = select_whl( + whls=[ + src + for src in index_urls[whl_name] + if src.sha256 in srcs.shas and src.filename.endswith(".whl") + ], + want_abis=["none", "abi3", "cp" + major_minor.replace(".", "")], + want_platform="{}_{}".format(host_os, host_cpu), + ) + + if whl: + requirement_line = srcs.wo_shas + urls.append(whl.url) + sha256 = whl.sha256 + filename = whl.filename + extra_whl_pip_args = None repo_name = "{}_{}".format(pip_name, whl_name) whl_library( @@ -259,7 +265,7 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca quiet = pip_attr.quiet, timeout = pip_attr.timeout, isolated = use_isolated(module_ctx, pip_attr), - extra_pip_args = extra_pip_args, + extra_pip_args = extra_whl_pip_args, download_only = pip_attr.download_only, pip_data_exclude = pip_attr.pip_data_exclude, enable_implicit_namespace_pkgs = pip_attr.enable_implicit_namespace_pkgs, @@ -269,7 +275,6 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca group_deps = group_deps, ) - major_minor = _major_minor_version(pip_attr.python_version) whl_map[hub_name].setdefault(whl_name, []).append( whl_alias( repo = repo_name, @@ -444,6 +449,16 @@ def _pip_parse_ext_attrs(): doc = """\ The index URL to use for downloading wheels using bazel downloader. This value is going to be subject to `envsubst` substitutions if necessary. +""", + ), + "experimental_index_url_overrides": attr.string_dict( + doc = """\ +The index URL overrides for each package to use for downloading wheels using +bazel downloader. This value is going to be subject to `envsubst` substitutions +if necessary. + +The key is the package name (will be normalized before usage) and the value is the +index URL. """, ), "hub_name": attr.string( diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index a8cb52cd39..35783c24fe 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -21,68 +21,131 @@ The functions here should not depend on the `module_ctx` for easy unit testing. load("@bazel_features//:features.bzl", "bazel_features") load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") load(":auth.bzl", "get_auth") +load(":envsubst.bzl", "envsubst") load(":normalize_name.bzl", "normalize_name") -def simpleapi_download(module_ctx, srcs, cache = None): +def simpleapi_download(module_ctx, *, index_url, index_url_overrides, sources, envsubst, cache = None): """Download Simple API HTML. Args: module_ctx: The bzlmod module_ctx. - srcs: The sources to download things for. + index_url: The index. + index_url_overrides: The index overrides for separate packages. + sources: The sources to download things for. + envsubst: The envsubst vars. cache: A dictionary that can be used as a cache between calls during a single evaluation of the extension. Returns: dict of pkg name to the HTML contents. """ + sources = get_packages_from_requirements(sources) + index_url_overrides = { + normalize_name(p): i + for p, i in (index_url_overrides or {}).items() + } + + srcs = {} + for pkg, want_shas in sources.simpleapi.items(): + entry = srcs.setdefault(pkg, {"urls": {}, "want_shas": {}}) + + # ensure that we have a trailing slash, because we will otherwise get redirects + # which may not work on private indexes with netrc authentication. + entry["urls"]["{}/{}/".format(index_url_overrides.get(pkg, index_url).rstrip("/"), pkg)] = True + entry["want_shas"].update(want_shas) + download_kwargs = {} if bazel_features.external_deps.download_has_block_param: download_kwargs["block"] = False + # Download in parallel if possible downloads = {} contents = {} for pkg, args in srcs.items(): - output = module_ctx.path("{}/{}.html".format("pypi_index", pkg)) all_urls = list(args["urls"].keys()) cache_key = "" if cache != None: + # FIXME @aignas 2024-03-28: should I envsub this? cache_key = ",".join(all_urls) if cache_key in cache: contents[pkg] = cache[cache_key] continue downloads[pkg] = struct( - out = output, - urls = all_urls, cache_key = cache_key, - download = module_ctx.download( + urls = all_urls, + packages = read_simple_api( + module_ctx = module_ctx, url = all_urls, - output = output, - auth = get_auth(module_ctx, all_urls), - **download_kwargs + pkg = pkg, + envsubst_vars = envsubst, + **download_kwargs, ), ) for pkg, download in downloads.items(): - if download_kwargs.get("block") == False: - result = download.download.wait() - else: - result = download.download - - if not result.success: - fail("Failed to download from {}: {}".format(download.urls, result)) - - content = module_ctx.read(download.out) - contents[pkg] = struct( - html = content, - urls = download.urls, - ) + contents[pkg] = download.packages.contents() if cache != None and download.cache_key: cache[download.cache_key] = contents[pkg] return contents +def read_simple_api(module_ctx, url, pkg, envsubst_vars, **download_kwargs): + """Read SimpleAPI. + + Args: + module_ctx: TODO + url: The url parameter that can be passed to module_ctx.download. + pkg: The pkg to fetch the data for. + envsubst_vars: The env vars to do env sub before downloading. + **download_kwargs: Any extra params to module_ctx.download. + Note that output and auth will be passed for you. + + Returns: + A similar object to what `download` would return except that in result.out + will be the parsed simple api contents. + """ + # TODO @aignas 2024-03-26: use a unique path to avoid clashes + output = module_ctx.path("{}/{}.html".format("pypi_index", pkg)) + + # TODO: Add a test that env subbed index urls do not leak into the lock file. + if type(url) == type(""): + fail("TODO") + else: + real_url = [ + envsubst( + u, + envsubst_vars, + module_ctx.getenv if hasattr(module_ctx, "getenv") else module_ctx.os.environ.get, + ) + for u in url + ] + + download = module_ctx.download( + url = real_url, + output = output, + auth = get_auth(module_ctx, real_url), + **download_kwargs + ) + + return struct( + contents=lambda: _read_contents( + module_ctx, + download.wait() if download_kwargs.get("block") == False else download, + output, + url, + ), + ) + + +def _read_contents(module_ctx, result, output, url): + if not result.success: + fail("Failed to download from {}: {}".format(url, result)) + + html = module_ctx.read(output) + return get_packages(url, html) + def get_packages_from_requirements(requirements_files): """Get Simple API sources from a list of requirements files and merge them. @@ -132,15 +195,24 @@ def get_simpleapi_sources(line): for sha in maybe_hashes.split("--hash=sha256:")[1:] ] - return struct(version = version, shas = sorted(shas)) + if head == line: + head = line.partition("--hash=")[0].strip() + else: + head = head + ";" + maybe_hashes.partition("--hash=")[0].strip() + + return struct( + wo_shas = line if not shas else head, + version = version, + shas = sorted(shas), + ) -def get_packages(index_urls, content, want_shas): +def get_packages(index_urls, content, want_shas = None): """Get the package URLs for given shas by parsing the Simple API HTML. Args: index_urls(list[str]): The URLs that the HTML content can be downloaded from. content(str): The Simple API HTML content. - want_shas(list[str]): The list of shas that we need to get. + want_shas(list[str], optional): The list of shas that we need to get, otherwise we'll get all. Returns: A list of structs with: @@ -151,7 +223,7 @@ def get_packages(index_urls, content, want_shas): present, then the 'metadata_url' is also present. Defaults to "". * metadata_url: The URL for the METADATA if we can download it. Defaults to "". """ - want_shas = {sha: True for sha in want_shas} + want_shas = {sha: True for sha in want_shas} if want_shas else {} packages = [] lines = content.split("") filename, _, tail = tail.partition("<") diff --git a/python/private/whl_target_platforms.bzl b/python/private/whl_target_platforms.bzl index 30e4dd4c7a..e5f278afff 100644 --- a/python/private/whl_target_platforms.bzl +++ b/python/private/whl_target_platforms.bzl @@ -16,6 +16,22 @@ A starlark implementation of the wheel platform tag parsing to get the target platform. """ +load(":parse_whl_name.bzl", "parse_whl_name") + +_LEGACY_ALIASES = { + "manylinux1_x86_64": "manylinux_2_5_x86_64", + "manylinux1_i686": "manylinux_2_5_i686", + "manylinux2010_x86_64": "manylinux_2_12_x86_64", + "manylinux2010_i686": "manylinux_2_12_i686", + "manylinux2014_x86_64": "manylinux_2_17_x86_64", + "manylinux2014_i686": "manylinux_2_17_i686", + "manylinux2014_aarch64": "manylinux_2_17_aarch64", + "manylinux2014_armv7l": "manylinux_2_17_armv7l", + "manylinux2014_ppc64": "manylinux_2_17_ppc64", + "manylinux2014_ppc64le": "manylinux_2_17_ppc64le", + "manylinux2014_s390x": "manylinux_2_17_s390x", +} + # The order of the dictionaries is to keep definitions with their aliases next to each # other _CPU_ALIASES = { @@ -40,6 +56,52 @@ _OS_PREFIXES = { "win": "windows", } # buildifier: disable=unsorted-dict-items +def select_whl(*, whls, want_abis, want_platforms): + if not whls: + return None + + candidates = {} + for whl in whls: + parsed = parse_whl_name(whl.filename) + if parsed.abi_tag not in want_abis: + # Filter out incompatible ABIs + continue + + candidates[parsed.platform_tag] = whl + + # For now only use the bazel downloader only whl file is a + # cross-platform wheel. + if len(candidates) == 1 and "any" in candidates: + return struct( + url = candidates["any"].url, + sha256 = candidates["any"].sha256, + filename = candidates["any"].filename, + ) + + target_plats = {} + has_any = "any" in candidates + for platform_tag, whl in candidates.items(): + if platform_tag == "any": + continue + + platform_tag = ".".join({_LEGACY_ALIASES.get(p, p): True for p in platform_tag.split(".")}) + platforms = whl_target_platforms(platform_tag) + for p in platforms: + target_plats.setdefault("{}_{}".format(p.os, p.cpu), []).append(platform_tag) + + for p in platform_tag.split("."): + target_plats.setdefault(p, []).append(p) + + want = target_plats.get(want_platform) or (["any"] if has_any else []) + if len(want) == 1: + return candidates[want[0]] + + fail("\n".join([want])) + + # todo what todo here? + print("Multiple matches found: {}".format(want)) + return candidates[sorted(want[0])] + def whl_target_platforms(platform_tag, abi_tag = ""): """Parse the wheel abi and platform tags and return (os, cpu) tuples. From bb30b41137a64b08a72ce6ad543c57d11aff7316 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Fri, 29 Mar 2024 11:39:00 +0900 Subject: [PATCH 24/70] wip --- CHANGELOG.md | 7 +- python/private/bzlmod/pip.bzl | 46 +++-- python/private/whl_target_platforms.bzl | 102 ++++++++++-- .../private/whl_target_platforms/BUILD.bazel | 3 + .../whl_target_platforms/select_whl_tests.bzl | 157 ++++++++++++++++++ 5 files changed, 278 insertions(+), 37 deletions(-) create mode 100644 tests/private/whl_target_platforms/select_whl_tests.bzl diff --git a/CHANGELOG.md b/CHANGELOG.md index 348d8d38bd..f90d8bc035 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,11 +41,12 @@ A brief description of the categories of changes: * (gazelle) Added a new `python_default_visibility` directive to control the _default_ visibility of generated targets. See the [docs][python_default_visibility] for details. -* (bzlmod) New **experimental** `pypi_index` extension that can be used to - instruct the `pip.parse` tag class to use the bazel downloader to fetch - wheels. Note, the API is very unstable and may be changed at any time. * (wheel) Add support for `data_files` attributes in py_wheel rule ([#1777](https://github.com/bazelbuild/rules_python/issues/1777)) +* (bzlmod) New `experimental_index_url` and `experimental_index_url_overrides` to + `pip.parse` for using the bazel downloader. This is currently only working for + `whl-only` setups and may contain bugs. If you see any issues, report in + [#1357](https://github.com/bazelbuild/rules_python/issues/1357). [python_default_visibility]: gazelle/README.md#directive-python_default_visibility diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 8cea83348b..a8ea075571 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -26,13 +26,12 @@ load( "whl_library", ) load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") -load("//python/private:envsubst.bzl", "envsubst") -load("//python/private:whl_target_platforms.bzl", "select_whl") load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:parse_whl_name.bzl", "parse_whl_name") -load("//python/private:pypi_index.bzl", "get_packages", "get_packages_from_requirements", "get_simpleapi_sources", "simpleapi_download") +load("//python/private:pypi_index.bzl", "get_simpleapi_sources", "simpleapi_download") load("//python/private:render_pkg_aliases.bzl", "whl_alias") load("//python/private:version_label.bzl", "version_label") +load("//python/private:whl_target_platforms.bzl", "select_whl") load(":pip_repository.bzl", "pip_repository") def _parse_version(version): @@ -186,7 +185,11 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca # TODO @aignas 2024-03-21: do this outside this function so that we can # decrease the number of times we call the simple API. index_urls = {} + host_cpu, host_os = None, None if pip_attr.experimental_index_url: + if pip_attr.download_only: + fail("Currently unsupported") + index_urls = simpleapi_download( module_ctx, index_url = pip_attr.experimental_index_url, @@ -195,21 +198,19 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca index_url_overrides = pip_attr.experimental_index_url_overrides, sources = requirements_locks.values(), envsubst = pip_attr.envsubst, - cache =simpleapi_cache, + cache = simpleapi_cache, ) - major_minor = _major_minor_version(pip_attr.python_version) - - host_cpu, host_os = None, None - for constraint in HOST_CONSTRAINTS: - if "@platforms//cpu:" in constraint: - _, _, host_cpu = constraint.partition(":") - elif "@platforms//os:" in constraint: - _, _, host_os = constraint.partition(":") + for constraint in HOST_CONSTRAINTS: + if "@platforms//cpu:" in constraint: + _, _, host_cpu = constraint.partition(":") + elif "@platforms//os:" in constraint: + _, _, host_os = constraint.partition(":") - if not (host_os and host_cpu): - fail("Don't have host information") + if not (host_os and host_cpu): + fail("Don't have host information") + major_minor = _major_minor_version(pip_attr.python_version) # Create a new wheel library for each of the different whls for whl_name, requirement_line in requirements: @@ -229,13 +230,19 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca srcs = get_simpleapi_sources(requirement_line) whl = select_whl( - whls=[ + whls = [ src for src in index_urls[whl_name] if src.sha256 in srcs.shas and src.filename.endswith(".whl") ], - want_abis=["none", "abi3", "cp" + major_minor.replace(".", "")], - want_platform="{}_{}".format(host_os, host_cpu), + want_abis = [ + "none", + "abi3", + "cp" + major_minor.replace(".", ""), + # Older python versions have wheels for the `*m` ABI. + "cp" + major_minor.replace(".", "") + "m", + ], + want_platform = "{}_{}".format(host_os, host_cpu), ) if whl: @@ -244,6 +251,11 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca sha256 = whl.sha256 filename = whl.filename extra_whl_pip_args = None + else: + # TODO @aignas 2024-03-29: in the future we should probably just + # use an `sdist` but having this makes it easy to debug issues + # in early development stages. + fail("Could not find whl for: {}".format(requirement_line)) repo_name = "{}_{}".format(pip_name, whl_name) whl_library( diff --git a/python/private/whl_target_platforms.bzl b/python/private/whl_target_platforms.bzl index e5f278afff..daba9f2044 100644 --- a/python/private/whl_target_platforms.bzl +++ b/python/private/whl_target_platforms.bzl @@ -19,17 +19,17 @@ A starlark implementation of the wheel platform tag parsing to get the target pl load(":parse_whl_name.bzl", "parse_whl_name") _LEGACY_ALIASES = { - "manylinux1_x86_64": "manylinux_2_5_x86_64", "manylinux1_i686": "manylinux_2_5_i686", - "manylinux2010_x86_64": "manylinux_2_12_x86_64", + "manylinux1_x86_64": "manylinux_2_5_x86_64", "manylinux2010_i686": "manylinux_2_12_i686", - "manylinux2014_x86_64": "manylinux_2_17_x86_64", - "manylinux2014_i686": "manylinux_2_17_i686", + "manylinux2010_x86_64": "manylinux_2_12_x86_64", "manylinux2014_aarch64": "manylinux_2_17_aarch64", "manylinux2014_armv7l": "manylinux_2_17_armv7l", + "manylinux2014_i686": "manylinux_2_17_i686", "manylinux2014_ppc64": "manylinux_2_17_ppc64", "manylinux2014_ppc64le": "manylinux_2_17_ppc64le", "manylinux2014_s390x": "manylinux_2_17_s390x", + "manylinux2014_x86_64": "manylinux_2_17_x86_64", } # The order of the dictionaries is to keep definitions with their aliases next to each @@ -56,7 +56,64 @@ _OS_PREFIXES = { "win": "windows", } # buildifier: disable=unsorted-dict-items -def select_whl(*, whls, want_abis, want_platforms): +def _whl_priority(value): + """Return a number for sorting whl lists, larger number means lower priority. + + TODO @aignas 2024-03-29: In the future we should create a repo for each + repo that matches the abi and then we could have config flags for the + preference of `any` wheels or `sdist` or `manylinux` vs `musllinux` or + `universal2`. Ideally we use `select` statements in the hub repo to do + the selection based on the config, but for now this is the best way to + get this working for the host platform. + """ + if "." in value: + value, _, _ = value.partition(".") + + if "any" == value: + # This is just a big value that should be larger than any other value returned by this function + return 100000 + + # The offset is for ensuring that the universal wheels are less + # preferred. + offset = (len(whl_target_platforms(value)) - 1) * 10000 + + if "linux" in value: + os, _, tail = value.partition("_") + if os == "linux": + # If the platform tag starts with 'linux', then return something less than what 'any' returns + version = 99 + else: + _major, _, tail = tail.partition("_") # We don't need to use that because it's the same for all candidates now + version, _, _ = tail.partition("_") + + return int(version) + offset + + if "mac" in value or "osx" in value: + _, _, tail = value.partition("_") + major, _, tail = tail.partition("_") + minor, _, _ = tail.partition("_") + + # the major is >= 10, so let's just multiply by 10 + version = int(major) * 100 + int(minor) + return version + offset + + if not "win" in value: + fail("BUG") + + # Windows does not have multiple wheels for the same target platform + return offset + +def select_whl(*, whls, want_abis, want_platform): + """Select a suitable wheel from a list. + + Args: + whls(list[struct]): A list of candidates. + want_abis(list[str]): A list of ABIs that are supported. + want_platform(str): A string platform that can be derived from `{os}_{cpu}` values. + + Returns: + A struct with `url`, `sha256` and `filename` attributes for the selected whl. + """ if not whls: return None @@ -65,12 +122,20 @@ def select_whl(*, whls, want_abis, want_platforms): parsed = parse_whl_name(whl.filename) if parsed.abi_tag not in want_abis: # Filter out incompatible ABIs + # print("Skipping {} because {} is not in {}".format( + # whl.filename, + # parsed.abi_tag, + # want_abis, + # )) continue - candidates[parsed.platform_tag] = whl + platform_tags = list({_LEGACY_ALIASES.get(p, p): True for p in parsed.platform_tag.split(".")}) - # For now only use the bazel downloader only whl file is a - # cross-platform wheel. + for tag in platform_tags: + candidates[tag] = whl + + # For most packages - if they supply 'any' wheel and there are no other + # compatible wheels with the selected abis, we can just return the value. if len(candidates) == 1 and "any" in candidates: return struct( url = candidates["any"].url, @@ -84,23 +149,26 @@ def select_whl(*, whls, want_abis, want_platforms): if platform_tag == "any": continue + if "musl" in platform_tag: + # Ignore musl wheels for now + continue + platform_tag = ".".join({_LEGACY_ALIASES.get(p, p): True for p in platform_tag.split(".")}) platforms = whl_target_platforms(platform_tag) for p in platforms: target_plats.setdefault("{}_{}".format(p.os, p.cpu), []).append(platform_tag) - for p in platform_tag.split("."): - target_plats.setdefault(p, []).append(p) + for p, platform_tags in target_plats.items(): + if has_any: + platform_tags.append("any") - want = target_plats.get(want_platform) or (["any"] if has_any else []) - if len(want) == 1: - return candidates[want[0]] + target_plats[p] = sorted(platform_tags, key = _whl_priority) - fail("\n".join([want])) + want = target_plats.get(want_platform) + if not want: + return want - # todo what todo here? - print("Multiple matches found: {}".format(want)) - return candidates[sorted(want[0])] + return candidates[want[0]] def whl_target_platforms(platform_tag, abi_tag = ""): """Parse the wheel abi and platform tags and return (os, cpu) tuples. diff --git a/tests/private/whl_target_platforms/BUILD.bazel b/tests/private/whl_target_platforms/BUILD.bazel index fec25af033..6c35b08d32 100644 --- a/tests/private/whl_target_platforms/BUILD.bazel +++ b/tests/private/whl_target_platforms/BUILD.bazel @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +load(":select_whl_tests.bzl", "select_whl_test_suite") load(":whl_target_platforms_tests.bzl", "whl_target_platforms_test_suite") +select_whl_test_suite(name = "select_whl_tests") + whl_target_platforms_test_suite(name = "whl_target_platforms_tests") diff --git a/tests/private/whl_target_platforms/select_whl_tests.bzl b/tests/private/whl_target_platforms/select_whl_tests.bzl new file mode 100644 index 0000000000..69d4b3500e --- /dev/null +++ b/tests/private/whl_target_platforms/select_whl_tests.bzl @@ -0,0 +1,157 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"" + +load("@rules_testing//lib:test_suite.bzl", "test_suite") +load("//python/private:whl_target_platforms.bzl", "select_whl") # buildifier: disable=bzl-visibility + +WHL_LIST = [ + struct( + filename = f, + url = "https://" + f, + sha256 = "sha256://" + f, + ) + for f in [ + "pkg-0.0.1-cp310-cp310-macosx_10_9_universal2.whl", + "pkg-0.0.1-cp310-cp310-macosx_10_9_x86_64.whl", + "pkg-0.0.1-cp310-cp310-macosx_11_0_arm64.whl", + "pkg-0.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", + "pkg-0.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", + "pkg-0.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", + "pkg-0.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + "pkg-0.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", + "pkg-0.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", + "pkg-0.0.1-cp310-cp310-musllinux_1_1_i686.whl", + "pkg-0.0.1-cp310-cp310-musllinux_1_1_ppc64le.whl", + "pkg-0.0.1-cp310-cp310-musllinux_1_1_s390x.whl", + "pkg-0.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", + "pkg-0.0.1-cp310-cp310-win32.whl", + "pkg-0.0.1-cp310-cp310-win_amd64.whl", + "pkg-0.0.1-cp311-cp311-macosx_10_9_universal2.whl", + "pkg-0.0.1-cp311-cp311-macosx_10_9_x86_64.whl", + "pkg-0.0.1-cp311-cp311-macosx_11_0_arm64.whl", + "pkg-0.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", + "pkg-0.0.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", + "pkg-0.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", + "pkg-0.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + "pkg-0.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", + "pkg-0.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", + "pkg-0.0.1-cp311-cp311-musllinux_1_1_i686.whl", + "pkg-0.0.1-cp311-cp311-musllinux_1_1_ppc64le.whl", + "pkg-0.0.1-cp311-cp311-musllinux_1_1_s390x.whl", + "pkg-0.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", + "pkg-0.0.1-cp311-cp311-win32.whl", + "pkg-0.0.1-cp311-cp311-win_amd64.whl", + "pkg-0.0.1-cp312-cp312-macosx_10_9_universal2.whl", + "pkg-0.0.1-cp312-cp312-macosx_10_9_x86_64.whl", + "pkg-0.0.1-cp312-cp312-macosx_11_0_arm64.whl", + "pkg-0.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", + "pkg-0.0.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", + "pkg-0.0.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", + "pkg-0.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + "pkg-0.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", + "pkg-0.0.1-cp312-cp312-musllinux_1_1_aarch64.whl", + "pkg-0.0.1-cp312-cp312-musllinux_1_1_i686.whl", + "pkg-0.0.1-cp312-cp312-musllinux_1_1_ppc64le.whl", + "pkg-0.0.1-cp312-cp312-musllinux_1_1_s390x.whl", + "pkg-0.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", + "pkg-0.0.1-cp312-cp312-win32.whl", + "pkg-0.0.1-cp312-cp312-win_amd64.whl", + "pkg-0.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", + "pkg-0.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", + "pkg-0.0.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", + "pkg-0.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", + "pkg-0.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + "pkg-0.0.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", + "pkg-0.0.1-cp37-cp37m-musllinux_1_1_aarch64.whl", + "pkg-0.0.1-cp37-cp37m-musllinux_1_1_i686.whl", + "pkg-0.0.1-cp37-cp37m-musllinux_1_1_ppc64le.whl", + "pkg-0.0.1-cp37-cp37m-musllinux_1_1_s390x.whl", + "pkg-0.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", + "pkg-0.0.1-cp37-cp37m-win32.whl", + "pkg-0.0.1-cp37-cp37m-win_amd64.whl", + "pkg-0.0.1-cp39-cp39-macosx_10_9_universal2.whl", + "pkg-0.0.1-cp39-cp39-macosx_10_9_x86_64.whl", + "pkg-0.0.1-cp39-cp39-macosx_11_0_arm64.whl", + "pkg-0.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", + "pkg-0.0.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", + "pkg-0.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", + "pkg-0.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + "pkg-0.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", + "pkg-0.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", + "pkg-0.0.1-cp39-cp39-musllinux_1_1_i686.whl", + "pkg-0.0.1-cp39-cp39-musllinux_1_1_ppc64le.whl", + "pkg-0.0.1-cp39-cp39-musllinux_1_1_s390x.whl", + "pkg-0.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", + "pkg-0.0.1-cp39-cp39-win32.whl", + "pkg-0.0.1-cp39-cp39-win_amd64.whl", + "pkg-0.0.1-py3-abi3-any.whl", + "pkg-0.0.1-py3-none-any.whl", + ] +] + +def _match(env, got, want_filename): + if want_filename: + env.expect.that_str(got.filename).equals(want_filename) + env.expect.that_str(got.sha256).equals("sha256://" + want_filename) + env.expect.that_str(got.url).equals("https://" + want_filename) + else: + env.expect.that_int(got).equals(None) + +_tests = [] + +def _test_selecting(env): + got = select_whl(whls = WHL_LIST, want_abis = ["none"], want_platform = "ignored") + _match(env, got, "pkg-0.0.1-py3-none-any.whl") + + got = select_whl(whls = WHL_LIST, want_abis = ["abi3"], want_platform = "ignored") + _match(env, got, "pkg-0.0.1-py3-abi3-any.whl") + + # Check the selection failure + got = select_whl(whls = WHL_LIST, want_abis = ["cp39"], want_platform = "does-not-exist") + _match(env, got, None) + + # Check we match the ABI and not the py version + got = select_whl(whls = WHL_LIST, want_abis = ["cp37m"], want_platform = "linux_x86_64") + _match(env, got, "pkg-0.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl") + + # Check we can select a filename with many platform tags + got = select_whl(whls = WHL_LIST, want_abis = ["cp39"], want_platform = "linux_x86_32") + _match(env, got, "pkg-0.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl") + + # Check that we prefer the specific wheel + got = select_whl(whls = WHL_LIST, want_abis = ["cp311"], want_platform = "osx_x86_64") + _match(env, got, "pkg-0.0.1-cp311-cp311-macosx_10_9_x86_64.whl") + + got = select_whl(whls = WHL_LIST, want_abis = ["cp311"], want_platform = "osx_aarch64") + _match(env, got, "pkg-0.0.1-cp311-cp311-macosx_11_0_arm64.whl") + + # Check that we can use the universal2 if the arm wheel is not available + got = select_whl(whls = [w for w in WHL_LIST if "arm64" not in w.filename], want_abis = ["cp311"], want_platform = "osx_aarch64") + _match(env, got, "pkg-0.0.1-cp311-cp311-macosx_10_9_universal2.whl") + + # Check we prefer platform specific wheels + got = select_whl(whls = WHL_LIST, want_abis = ["none", "abi3", "cp39"], want_platform = "linux_x86_64") + _match(env, got, "pkg-0.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl") + +_tests.append(_test_selecting) + +def select_whl_test_suite(name): + """Create the test suite. + + Args: + name: the name of the test suite + """ + test_suite(name = name, basic_tests = _tests) From 186af97675124de1692a6ce40954f85be166d663 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Fri, 29 Mar 2024 12:13:56 +0900 Subject: [PATCH 25/70] more cleanup for the lock file --- examples/bzlmod/MODULE.bazel | 7 ++- python/private/BUILD.bazel | 3 + python/private/bzlmod/pip.bzl | 110 +++++++++++++++++----------------- 3 files changed, 63 insertions(+), 57 deletions(-) diff --git a/examples/bzlmod/MODULE.bazel b/examples/bzlmod/MODULE.bazel index e53e01d29b..c1edaca17a 100644 --- a/examples/bzlmod/MODULE.bazel +++ b/examples/bzlmod/MODULE.bazel @@ -117,9 +117,10 @@ pip.parse( "cp39_linux_*", "cp39_*", ], - # TODO @aignas 2024-03-21: how do we handle overrides for the index urls per package? - # index_url_per_package = { - # foo: bar + # One can also select a particular index for a particular package. + # This ensures that the setup is resistant against confusion attacks. + # experimental_index_url_overrides = { + # 'my_package": "https://different-index-url.com", # }, hub_name = "pip", python_version = "3.9", diff --git a/python/private/BUILD.bazel b/python/private/BUILD.bazel index 1bcdc6f314..b105c470aa 100644 --- a/python/private/BUILD.bazel +++ b/python/private/BUILD.bazel @@ -272,6 +272,9 @@ bzl_library( name = "whl_target_platforms_bzl", srcs = ["whl_target_platforms.bzl"], visibility = ["//:__subpackages__"], + deps = [ + "parse_whl_name_bzl", + ], ) bzl_library( diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 24d4a26819..afb4ee3864 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -127,20 +127,12 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca version_label(pip_attr.python_version), ) + requirements_lock = locked_requirements_label(module_ctx, pip_attr) + # Parse the requirements file directly in starlark to get the information # needed for the whl_libary declarations below. - requirements_locks = { - key: module_ctx.read(file) - for key, file in { - "default": pip_attr.requirements_lock, - "host": locked_requirements_label(module_ctx, pip_attr), - "linux": pip_attr.requirements_linux, - "osx": pip_attr.requirements_darwin, - "windows": pip_attr.requirements_windows, - }.items() - if file - } - parse_result = parse_requirements(requirements_locks["host"]) + requirements_lock_content = module_ctx.read(requirements_lock) + parse_result = parse_requirements(requirements_lock_content) # Replicate a surprising behavior that WORKSPACE builds allowed: # Defining a repo with the same name multiple times, but only the last @@ -192,15 +184,15 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca host_cpu, host_os = None, None if pip_attr.experimental_index_url: if pip_attr.download_only: - fail("Currently unsupported") + fail("Currently unsupported to use `download_only` and `experimental_index_url`") index_urls = simpleapi_download( module_ctx, index_url = pip_attr.experimental_index_url, # TODO @aignas 2024-03-28: support index overrides for specific packages # We should never attempt to join index contents ourselves. - index_url_overrides = pip_attr.experimental_index_url_overrides, - sources = requirements_locks.values(), + index_url_overrides = pip_attr.experimental_index_url_overrides or {}, + sources = [requirements_lock_content], envsubst = pip_attr.envsubst, cache = simpleapi_cache, ) @@ -227,10 +219,40 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca group_name = whl_group_mapping.get(whl_name) group_deps = requirement_cycles.get(group_name, []) - urls = [] - sha256 = None - filename = None - extra_whl_pip_args = extra_pip_args + # Construct args separately so that the lock file can be smaller and does not include unused + # attrs. + repo_name = "{}_{}".format(pip_name, whl_name) + whl_library_args = dict( + repo = pip_name, + repo_prefix = pip_name + "_", + requirement = requirement_line, + isolated = use_isolated(module_ctx, pip_attr), + quiet = pip_attr.quiet, + timeout = pip_attr.timeout, + ) | { + key: value + for key, value in dict( + # The following values are safe to omit if they have false like values + annotation = annotation, + download_only = pip_attr.download_only, + enable_implicit_namespace_pkgs = pip_attr.enable_implicit_namespace_pkgs, + environment = pip_attr.environment, + envsubst = pip_attr.envsubst, + experimental_target_platforms = pip_attr.experimental_target_platforms, + extra_pip_args = extra_pip_args, + group_deps = group_deps, + group_name = group_name, + pip_data_exclude = pip_attr.pip_data_exclude, + python_interpreter = pip_attr.python_interpreter, + python_interpreter_target = python_interpreter_target, + whl_patches = { + p: json.encode(args) + for p, args in whl_overrides.get(whl_name, {}).items() + }, + ).items() + if value + } + if index_urls: srcs = get_simpleapi_sources(requirement_line) @@ -251,47 +273,27 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca ) if whl: - requirement_line = srcs.wo_shas - urls.append(whl.url) - sha256 = whl.sha256 - filename = whl.filename - extra_whl_pip_args = None + for unused_attr in [ + # pip is not used to download wheels and the python `whl_library` helpers are only extracting things + "extra_pip_args", + # This is no-op because pip is not used to download the wheel. + "download_only", + ]: + whl_library_args.pop(unused_attr, None) + + whl_library_args.update(dict( + requirement = srcs.wo_shas, + urls = [whl.url], + sha256 = whl.sha256, + filename = whl.filename, + )) else: # TODO @aignas 2024-03-29: in the future we should probably just # use an `sdist` but having this makes it easy to debug issues # in early development stages. fail("Could not find whl for: {}".format(requirement_line)) - repo_name = "{}_{}".format(pip_name, whl_name) - whl_library( - name = repo_name, - requirement = requirement_line, - filename = filename, - urls = urls, - sha256 = sha256, - repo = pip_name, - repo_prefix = pip_name + "_", - annotation = annotation, - whl_patches = { - p: json.encode(args) - for p, args in whl_overrides.get(whl_name, {}).items() - }, - experimental_target_platforms = pip_attr.experimental_target_platforms, - python_interpreter = pip_attr.python_interpreter, - python_interpreter_target = python_interpreter_target, - quiet = pip_attr.quiet, - timeout = pip_attr.timeout, - isolated = use_isolated(module_ctx, pip_attr), - extra_pip_args = extra_whl_pip_args, - download_only = pip_attr.download_only, - pip_data_exclude = pip_attr.pip_data_exclude, - enable_implicit_namespace_pkgs = pip_attr.enable_implicit_namespace_pkgs, - environment = pip_attr.environment, - envsubst = pip_attr.envsubst, - group_name = group_name, - group_deps = group_deps, - ) - + whl_library(name = repo_name, **dict(sorted(whl_library_args.items()))) whl_map[hub_name].setdefault(whl_name, []).append( whl_alias( repo = repo_name, From 88ce003626a5ddca14b0c71da33358eb397bda50 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Fri, 29 Mar 2024 12:51:34 +0900 Subject: [PATCH 26/70] further cleanup --- CHANGELOG.md | 7 +- python/private/bzlmod/pip.bzl | 10 +- python/private/pypi_index.bzl | 95 ++++++++++--------- python/private/whl_target_platforms.bzl | 11 +++ .../whl_target_platforms/select_whl_tests.bzl | 30 ------ 5 files changed, 71 insertions(+), 82 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 512444d748..d3fa5f1fef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,10 @@ A brief description of the categories of changes: ### Changed +* (bzlmod): The `MODULE.bazel.lock` `whl_library` rule attributes are now + sorted in the attributes section. We are also removing values that are not + default in order to reduce the noise. + ### Fixed * (whl_library): Fix the experimental_target_platforms overriding for platform @@ -49,7 +53,8 @@ A brief description of the categories of changes: * (bzlmod) New `experimental_index_url` and `experimental_index_url_overrides` to `pip.parse` for using the bazel downloader. This is currently only working for `whl-only` setups and may contain bugs. If you see any issues, report in - [#1357](https://github.com/bazelbuild/rules_python/issues/1357). + [#1357](https://github.com/bazelbuild/rules_python/issues/1357). The URLs for + the whl files will be written to the lock file. [0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0 [python_default_visibility]: gazelle/README.md#directive-python_default_visibility diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index afb4ee3864..de6db2e049 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -178,8 +178,6 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca whl_group_mapping = {} requirement_cycles = {} - # TODO @aignas 2024-03-21: do this outside this function so that we can - # decrease the number of times we call the simple API. index_urls = {} host_cpu, host_os = None, None if pip_attr.experimental_index_url: @@ -410,8 +408,12 @@ def _pip_impl(module_ctx): # Where hub, whl, and pip are the repo names hub_whl_map = {} - # We don't use the `module_ctx.download` mechanisms because we don't want to persist - # this across the evaluations of the extension. + # We use a dictionary as a cache so that we can reuse calls to the simple + # API when evaluating the extension. Using the canonical_id parameter of + # the module_ctx would deposit the simple API responses to the bazel cache + # and that is undesirable because additions to the PyPI index would not be + # reflected when re-evaluating the extension unless we do + # `bazel clean --expunge`. simpleapi_cache = {} for mod in module_ctx.modules: diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index 35783c24fe..5aa95b6d80 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -14,21 +14,20 @@ """ A file that houses private functions used in the `bzlmod` extension with the same name. - -The functions here should not depend on the `module_ctx` for easy unit testing. """ load("@bazel_features//:features.bzl", "bazel_features") +load("@bazel_skylib//lib:sets.bzl", "sets") load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") load(":auth.bzl", "get_auth") load(":envsubst.bzl", "envsubst") load(":normalize_name.bzl", "normalize_name") -def simpleapi_download(module_ctx, *, index_url, index_url_overrides, sources, envsubst, cache = None): +def simpleapi_download(ctx, *, index_url, index_url_overrides, sources, envsubst, cache = None): """Download Simple API HTML. Args: - module_ctx: The bzlmod module_ctx. + ctx: The module_ctx or repository_ctx. index_url: The index. index_url_overrides: The index overrides for separate packages. sources: The sources to download things for. @@ -47,12 +46,12 @@ def simpleapi_download(module_ctx, *, index_url, index_url_overrides, sources, e srcs = {} for pkg, want_shas in sources.simpleapi.items(): - entry = srcs.setdefault(pkg, {"urls": {}, "want_shas": {}}) + entry = srcs.setdefault(pkg, {"urls": {}, "want_shas": sets.make()}) # ensure that we have a trailing slash, because we will otherwise get redirects # which may not work on private indexes with netrc authentication. entry["urls"]["{}/{}/".format(index_url_overrides.get(pkg, index_url).rstrip("/"), pkg)] = True - entry["want_shas"].update(want_shas) + entry["want_shas"] = sets.union(entry["want_shas"], want_shas) download_kwargs = {} if bazel_features.external_deps.download_has_block_param: @@ -75,11 +74,10 @@ def simpleapi_download(module_ctx, *, index_url, index_url_overrides, sources, e cache_key = cache_key, urls = all_urls, packages = read_simple_api( - module_ctx = module_ctx, + ctx = ctx, url = all_urls, - pkg = pkg, envsubst_vars = envsubst, - **download_kwargs, + **download_kwargs ), ) @@ -91,59 +89,60 @@ def simpleapi_download(module_ctx, *, index_url, index_url_overrides, sources, e return contents -def read_simple_api(module_ctx, url, pkg, envsubst_vars, **download_kwargs): +def read_simple_api(ctx, url, envsubst_vars, **download_kwargs): """Read SimpleAPI. Args: - module_ctx: TODO - url: The url parameter that can be passed to module_ctx.download. - pkg: The pkg to fetch the data for. + ctx: The module_ctx or repository_ctx. + url: The url parameter that can be passed to ctx.download. envsubst_vars: The env vars to do env sub before downloading. - **download_kwargs: Any extra params to module_ctx.download. + **download_kwargs: Any extra params to ctx.download. Note that output and auth will be passed for you. Returns: A similar object to what `download` would return except that in result.out will be the parsed simple api contents. """ - # TODO @aignas 2024-03-26: use a unique path to avoid clashes - output = module_ctx.path("{}/{}.html".format("pypi_index", pkg)) - # TODO: Add a test that env subbed index urls do not leak into the lock file. - if type(url) == type(""): - fail("TODO") - else: - real_url = [ - envsubst( - u, - envsubst_vars, - module_ctx.getenv if hasattr(module_ctx, "getenv") else module_ctx.os.environ.get, - ) - for u in url - ] - download = module_ctx.download( - url = real_url, + if type(url) == type([]) and len(url) > 1: + fail("Only a single url is supported") + + url = url if type(url) == type("") else url[0] + + output_str = url + for char in [".", ":", "/", "\\", "$", "[", "]", "{", "}", "'", "\"", "-"]: + output_str = output_str.replace(char, "_") + + output = ctx.path(output_str.strip("_").lower() + ".html") + + real_url = envsubst( + url, + envsubst_vars, + ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get, + ) + + download = ctx.download( + url = [real_url], output = output, - auth = get_auth(module_ctx, real_url), + auth = get_auth(ctx, [real_url]), **download_kwargs ) return struct( - contents=lambda: _read_contents( - module_ctx, + contents = lambda: _read_contents( + ctx, download.wait() if download_kwargs.get("block") == False else download, output, url, ), ) - -def _read_contents(module_ctx, result, output, url): +def _read_contents(ctx, result, output, url): if not result.success: - fail("Failed to download from {}: {}".format(url, result)) + fail("Failed to download from {}: {}".format(url, result)) - html = module_ctx.read(output) + html = ctx.read(output) return get_packages(url, html) def get_packages_from_requirements(requirements_files): @@ -160,11 +159,14 @@ def get_packages_from_requirements(requirements_files): for contents in requirements_files: parse_result = parse_requirements(contents) for distribution, line in parse_result.requirements: - want_packages.setdefault(normalize_name(distribution), {}).update({ - # TODO @aignas 2024-03-07: use sets - sha: True - for sha in get_simpleapi_sources(line).shas - }) + distribution = normalize_name(distribution) + shas = want_packages.get(distribution) + if not shas: + shas = sets.make() + want_packages[distribution] = shas + + for sha in get_simpleapi_sources(line).shas: + sets.insert(shas, sha) return struct( simpleapi = want_packages, @@ -212,7 +214,7 @@ def get_packages(index_urls, content, want_shas = None): Args: index_urls(list[str]): The URLs that the HTML content can be downloaded from. content(str): The Simple API HTML content. - want_shas(list[str], optional): The list of shas that we need to get, otherwise we'll get all. + want_shas(set[str], optional): The list of shas that we need to get, otherwise we'll get all. Returns: A list of structs with: @@ -223,7 +225,6 @@ def get_packages(index_urls, content, want_shas = None): present, then the 'metadata_url' is also present. Defaults to "". * metadata_url: The URL for the METADATA if we can download it. Defaults to "". """ - want_shas = {sha: True for sha in want_shas} if want_shas else {} packages = [] lines = content.split("") filename, _, tail = tail.partition("<") @@ -279,7 +280,7 @@ def get_packages(index_urls, content, want_shas = None): ), ) - if len(want_shas): + if want_shas: fail( "Indexes {} did not provide packages with all shas: {}".format( index_urls, diff --git a/python/private/whl_target_platforms.bzl b/python/private/whl_target_platforms.bzl index daba9f2044..6208b7851e 100644 --- a/python/private/whl_target_platforms.bzl +++ b/python/private/whl_target_platforms.bzl @@ -18,6 +18,7 @@ A starlark implementation of the wheel platform tag parsing to get the target pl load(":parse_whl_name.bzl", "parse_whl_name") +# Taken from https://peps.python.org/pep-0600/ _LEGACY_ALIASES = { "manylinux1_i686": "manylinux_2_5_i686", "manylinux1_x86_64": "manylinux_2_5_x86_64", @@ -65,6 +66,16 @@ def _whl_priority(value): `universal2`. Ideally we use `select` statements in the hub repo to do the selection based on the config, but for now this is the best way to get this working for the host platform. + + In the future the right thing would be to have `bool_flag` or something + similar to be able to have select statements that does the right thing: + * select whls vs sdists. + * select manylinux vs musllinux + * select universal2 vs arch-specific whls + + All of these can be expressed as configuration settings and included in the + select statements in the `whl` repo. This means that the user can configure + for a particular target what they need. """ if "." in value: value, _, _ = value.partition(".") diff --git a/tests/private/whl_target_platforms/select_whl_tests.bzl b/tests/private/whl_target_platforms/select_whl_tests.bzl index 69d4b3500e..bea5e037fe 100644 --- a/tests/private/whl_target_platforms/select_whl_tests.bzl +++ b/tests/private/whl_target_platforms/select_whl_tests.bzl @@ -24,21 +24,6 @@ WHL_LIST = [ sha256 = "sha256://" + f, ) for f in [ - "pkg-0.0.1-cp310-cp310-macosx_10_9_universal2.whl", - "pkg-0.0.1-cp310-cp310-macosx_10_9_x86_64.whl", - "pkg-0.0.1-cp310-cp310-macosx_11_0_arm64.whl", - "pkg-0.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", - "pkg-0.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", - "pkg-0.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", - "pkg-0.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", - "pkg-0.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", - "pkg-0.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", - "pkg-0.0.1-cp310-cp310-musllinux_1_1_i686.whl", - "pkg-0.0.1-cp310-cp310-musllinux_1_1_ppc64le.whl", - "pkg-0.0.1-cp310-cp310-musllinux_1_1_s390x.whl", - "pkg-0.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", - "pkg-0.0.1-cp310-cp310-win32.whl", - "pkg-0.0.1-cp310-cp310-win_amd64.whl", "pkg-0.0.1-cp311-cp311-macosx_10_9_universal2.whl", "pkg-0.0.1-cp311-cp311-macosx_10_9_x86_64.whl", "pkg-0.0.1-cp311-cp311-macosx_11_0_arm64.whl", @@ -54,21 +39,6 @@ WHL_LIST = [ "pkg-0.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", "pkg-0.0.1-cp311-cp311-win32.whl", "pkg-0.0.1-cp311-cp311-win_amd64.whl", - "pkg-0.0.1-cp312-cp312-macosx_10_9_universal2.whl", - "pkg-0.0.1-cp312-cp312-macosx_10_9_x86_64.whl", - "pkg-0.0.1-cp312-cp312-macosx_11_0_arm64.whl", - "pkg-0.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", - "pkg-0.0.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", - "pkg-0.0.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", - "pkg-0.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", - "pkg-0.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", - "pkg-0.0.1-cp312-cp312-musllinux_1_1_aarch64.whl", - "pkg-0.0.1-cp312-cp312-musllinux_1_1_i686.whl", - "pkg-0.0.1-cp312-cp312-musllinux_1_1_ppc64le.whl", - "pkg-0.0.1-cp312-cp312-musllinux_1_1_s390x.whl", - "pkg-0.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", - "pkg-0.0.1-cp312-cp312-win32.whl", - "pkg-0.0.1-cp312-cp312-win_amd64.whl", "pkg-0.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", "pkg-0.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", "pkg-0.0.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", From 41b23af96a9f8956014040e130325d28e1a797e2 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Fri, 29 Mar 2024 13:56:41 +0900 Subject: [PATCH 27/70] fixup: add platforms_bzl for pip_bzl --- python/private/bzlmod/BUILD.bazel | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/private/bzlmod/BUILD.bazel b/python/private/bzlmod/BUILD.bazel index 8a4bf1a724..a9917b4765 100644 --- a/python/private/bzlmod/BUILD.bazel +++ b/python/private/bzlmod/BUILD.bazel @@ -38,6 +38,7 @@ bzl_library( "//python/private:parse_whl_name_bzl", "//python/private:version_label_bzl", ":bazel_features_bzl", + ":platforms_bzl", ] + [ "@pythons_hub//:interpreters_bzl", ] if BZLMOD_ENABLED else [], @@ -48,6 +49,11 @@ bzl_library( srcs = ["@bazel_features//:bzl_files"] if BZLMOD_ENABLED else [], ) +bzl_library( + name = "platforms_bzl", + srcs = ["@platforms//:constraints.bzl"], +) + bzl_library( name = "pip_repository_bzl", srcs = ["pip_repository.bzl"], From fcf270013ffb84fce9780fc42c2984d06fd61d95 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Fri, 29 Mar 2024 14:33:07 +0900 Subject: [PATCH 28/70] chore: use rules-python-docs as the docs project name --- docs/sphinx/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sphinx/pyproject.toml b/docs/sphinx/pyproject.toml index d36c9f269c..947caf969b 100644 --- a/docs/sphinx/pyproject.toml +++ b/docs/sphinx/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "rules_python_docs" +name = "rules-python-docs" version = "0.0.0" dependencies = [ From b6740f53475f609be9fb4851e1573f4dafd702b8 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Fri, 29 Mar 2024 14:37:29 +0900 Subject: [PATCH 29/70] Revert "chore: use rules-python-docs as the docs project name" This reverts commit fcf270013ffb84fce9780fc42c2984d06fd61d95. --- docs/sphinx/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sphinx/pyproject.toml b/docs/sphinx/pyproject.toml index 947caf969b..d36c9f269c 100644 --- a/docs/sphinx/pyproject.toml +++ b/docs/sphinx/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "rules-python-docs" +name = "rules_python_docs" version = "0.0.0" dependencies = [ From 3c905ae835abf86a8e14a0bdcd2b1594da552974 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Fri, 29 Mar 2024 14:38:25 +0900 Subject: [PATCH 30/70] add bazel_features to WORKSPACE setup --- MODULE.bazel | 4 ++-- internal_deps.bzl | 16 +++++++--------- internal_setup.bzl | 2 ++ python/private/bzlmod/BUILD.bazel | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/MODULE.bazel b/MODULE.bazel index 939f416a20..0ccaed0429 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -4,8 +4,8 @@ module( compatibility_level = 1, ) -bazel_dep(name = "bazel_features", version = "1.9.0") -bazel_dep(name = "bazel_skylib", version = "1.3.0") +bazel_dep(name = "bazel_features", version = "1.9.1") +bazel_dep(name = "bazel_skylib", version = "1.5.0") bazel_dep(name = "platforms", version = "0.0.9") # Those are loaded only when using py_proto_library diff --git a/internal_deps.bzl b/internal_deps.bzl index 9931933396..f3be3247e5 100644 --- a/internal_deps.bzl +++ b/internal_deps.bzl @@ -62,15 +62,6 @@ def rules_python_internal_deps(): url = "https://github.com/bazelbuild/rules_testing/releases/download/v0.5.0/rules_testing-v0.5.0.tar.gz", ) - http_archive( - name = "rules_license", - urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/rules_license/releases/download/0.0.7/rules_license-0.0.7.tar.gz", - "https://github.com/bazelbuild/rules_license/releases/download/0.0.7/rules_license-0.0.7.tar.gz", - ], - sha256 = "4531deccb913639c30e5c7512a054d5d875698daeb75d8cf90f284375fe7c360", - ) - http_archive( name = "io_bazel_stardoc", sha256 = "62bd2e60216b7a6fec3ac79341aa201e0956477e7c8f6ccc286f279ad1d96432", @@ -221,3 +212,10 @@ def rules_python_internal_deps(): ], sha256 = "4531deccb913639c30e5c7512a054d5d875698daeb75d8cf90f284375fe7c360", ) + + http_archive( + name = "bazel_features", + sha256 = "d7787da289a7fb497352211ad200ec9f698822a9e0757a4976fd9f713ff372b3", + strip_prefix = "bazel_features-1.9.1", + url = "https://github.com/bazel-contrib/bazel_features/releases/download/v1.9.1/bazel_features-v1.9.1.tar.gz", + ) diff --git a/internal_setup.bzl b/internal_setup.bzl index a80099f5f5..bb62611213 100644 --- a/internal_setup.bzl +++ b/internal_setup.bzl @@ -14,6 +14,7 @@ """Setup for rules_python tests and tools.""" +load("@bazel_features//:deps.bzl", "bazel_features_deps") load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace") load("@cgrindel_bazel_starlib//:deps.bzl", "bazel_starlib_dependencies") load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") @@ -42,3 +43,4 @@ def rules_python_internal_setup(): bazel_integration_test_rules_dependencies() bazel_starlib_dependencies() bazel_binaries(versions = SUPPORTED_BAZEL_VERSIONS) + bazel_features_deps() diff --git a/python/private/bzlmod/BUILD.bazel b/python/private/bzlmod/BUILD.bazel index a9917b4765..01afd02903 100644 --- a/python/private/bzlmod/BUILD.bazel +++ b/python/private/bzlmod/BUILD.bazel @@ -46,7 +46,7 @@ bzl_library( bzl_library( name = "bazel_features_bzl", - srcs = ["@bazel_features//:bzl_files"] if BZLMOD_ENABLED else [], + srcs = ["@bazel_features//:bzl_files"], ) bzl_library( From 459b1ba7eae1e898db904b7ad985dd57b0d764c6 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Fri, 29 Mar 2024 14:54:14 +0900 Subject: [PATCH 31/70] correct the host platform import --- python/private/bzlmod/BUILD.bazel | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/private/bzlmod/BUILD.bazel b/python/private/bzlmod/BUILD.bazel index 01afd02903..4458f21aeb 100644 --- a/python/private/bzlmod/BUILD.bazel +++ b/python/private/bzlmod/BUILD.bazel @@ -51,7 +51,9 @@ bzl_library( bzl_library( name = "platforms_bzl", - srcs = ["@platforms//:constraints.bzl"], + srcs = [ + "@platforms//host:constraints.bzl", + ], ) bzl_library( From 095222ebc53af09bb22cf8be07a3481c9fa95078 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Fri, 29 Mar 2024 15:11:12 +0900 Subject: [PATCH 32/70] Add an override for testing bazelbuild/platforms#89 --- MODULE.bazel | 8 ++++++++ python/private/bzlmod/BUILD.bazel | 8 +++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/MODULE.bazel b/MODULE.bazel index 0ccaed0429..b7c06345f4 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -8,6 +8,14 @@ bazel_dep(name = "bazel_features", version = "1.9.1") bazel_dep(name = "bazel_skylib", version = "1.5.0") bazel_dep(name = "platforms", version = "0.0.9") +# TODO @aignas 2024-03-29: remove once the PR is merged +# https://github.com/bazelbuild/platforms/pull/89 +archive_override( + module_name = "platforms", + strip_prefix = "platforms-647e63090b1455274e6af2900cf7830cd13ce6bb", + urls = ["https://github.com/aignas/platforms/archive/647e63090b1455274e6af2900cf7830cd13ce6bb.zip"], +) + # Those are loaded only when using py_proto_library bazel_dep(name = "rules_proto", version = "5.3.0-21.7") bazel_dep(name = "protobuf", version = "21.7", repo_name = "com_google_protobuf") diff --git a/python/private/bzlmod/BUILD.bazel b/python/private/bzlmod/BUILD.bazel index 4458f21aeb..07c455f942 100644 --- a/python/private/bzlmod/BUILD.bazel +++ b/python/private/bzlmod/BUILD.bazel @@ -38,7 +38,7 @@ bzl_library( "//python/private:parse_whl_name_bzl", "//python/private:version_label_bzl", ":bazel_features_bzl", - ":platforms_bzl", + ":platforms_host_bzl", ] + [ "@pythons_hub//:interpreters_bzl", ] if BZLMOD_ENABLED else [], @@ -50,10 +50,8 @@ bzl_library( ) bzl_library( - name = "platforms_bzl", - srcs = [ - "@platforms//host:constraints.bzl", - ], + name = "platforms_host_bzl", + srcs = ["@platforms//host:srcs"], ) bzl_library( From 134a8fc18f4ca94d688c4834ce21455fb61ab507 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Fri, 29 Mar 2024 15:21:31 +0900 Subject: [PATCH 33/70] bump bazel version --- version.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.bzl b/version.bzl index 2e8fc0b0f5..762f0279d8 100644 --- a/version.bzl +++ b/version.bzl @@ -17,7 +17,7 @@ # against. # This version should be updated together with the version of Bazel # in .bazelversion. -BAZEL_VERSION = "7.0.0" +BAZEL_VERSION = "7.1.1" # NOTE: Keep in sync with .bazelci/presubmit.yml # This is the minimum supported bazel version, that we have some tests for. From a7b92f55dd7787e3207be4ef5a878939f411eef1 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Fri, 29 Mar 2024 15:22:20 +0900 Subject: [PATCH 34/70] fixup: add the platforms host only under bzlmod --- python/private/bzlmod/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/private/bzlmod/BUILD.bazel b/python/private/bzlmod/BUILD.bazel index 07c455f942..68e3aeb839 100644 --- a/python/private/bzlmod/BUILD.bazel +++ b/python/private/bzlmod/BUILD.bazel @@ -51,7 +51,7 @@ bzl_library( bzl_library( name = "platforms_host_bzl", - srcs = ["@platforms//host:srcs"], + srcs = ["@platforms//host:srcs"] if BZLMOD_ENABLED else [], ) bzl_library( From 27307f0bfe16dd4356f67473e43176d395132227 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 11:39:44 +0900 Subject: [PATCH 35/70] fix(platforms): depend on module_ctx.os for getting host platform Here we copy the translation dictionaries from the host extension on platforms 0.0.9 as this code will be temporary anyway and in the future we will not be selecting the host whl in the extension/repository rule context --- MODULE.bazel | 10 +----- python/private/bzlmod/BUILD.bazel | 6 ---- python/private/bzlmod/pip.bzl | 14 ++------ python/private/whl_target_platforms.bzl | 43 +++++++++++++++++++++++-- 4 files changed, 43 insertions(+), 30 deletions(-) diff --git a/MODULE.bazel b/MODULE.bazel index b7c06345f4..ac4c12cfad 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -6,15 +6,7 @@ module( bazel_dep(name = "bazel_features", version = "1.9.1") bazel_dep(name = "bazel_skylib", version = "1.5.0") -bazel_dep(name = "platforms", version = "0.0.9") - -# TODO @aignas 2024-03-29: remove once the PR is merged -# https://github.com/bazelbuild/platforms/pull/89 -archive_override( - module_name = "platforms", - strip_prefix = "platforms-647e63090b1455274e6af2900cf7830cd13ce6bb", - urls = ["https://github.com/aignas/platforms/archive/647e63090b1455274e6af2900cf7830cd13ce6bb.zip"], -) +bazel_dep(name = "platforms", version = "0.0.4") # Those are loaded only when using py_proto_library bazel_dep(name = "rules_proto", version = "5.3.0-21.7") diff --git a/python/private/bzlmod/BUILD.bazel b/python/private/bzlmod/BUILD.bazel index 68e3aeb839..0ec95e4bed 100644 --- a/python/private/bzlmod/BUILD.bazel +++ b/python/private/bzlmod/BUILD.bazel @@ -38,7 +38,6 @@ bzl_library( "//python/private:parse_whl_name_bzl", "//python/private:version_label_bzl", ":bazel_features_bzl", - ":platforms_host_bzl", ] + [ "@pythons_hub//:interpreters_bzl", ] if BZLMOD_ENABLED else [], @@ -49,11 +48,6 @@ bzl_library( srcs = ["@bazel_features//:bzl_files"], ) -bzl_library( - name = "platforms_host_bzl", - srcs = ["@platforms//host:srcs"] if BZLMOD_ENABLED else [], -) - bzl_library( name = "pip_repository_bzl", srcs = ["pip_repository.bzl"], diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index de6db2e049..ebeb534bcb 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -15,7 +15,6 @@ "pip module extension for use with bzlmod" load("@bazel_features//:features.bzl", "bazel_features") -load("@platforms//host:constraints.bzl", "HOST_CONSTRAINTS") load("@pythons_hub//:interpreters.bzl", "DEFAULT_PYTHON_VERSION", "INTERPRETER_LABELS") load( "//python/pip_install:pip_repository.bzl", @@ -179,7 +178,6 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca requirement_cycles = {} index_urls = {} - host_cpu, host_os = None, None if pip_attr.experimental_index_url: if pip_attr.download_only: fail("Currently unsupported to use `download_only` and `experimental_index_url`") @@ -195,15 +193,6 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca cache = simpleapi_cache, ) - for constraint in HOST_CONSTRAINTS: - if "@platforms//cpu:" in constraint: - _, _, host_cpu = constraint.partition(":") - elif "@platforms//os:" in constraint: - _, _, host_os = constraint.partition(":") - - if not (host_os and host_cpu): - fail("Don't have host information") - major_minor = _major_minor_version(pip_attr.python_version) # Create a new wheel library for each of the different whls @@ -267,7 +256,8 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca # Older python versions have wheels for the `*m` ABI. "cp" + major_minor.replace(".", "") + "m", ], - want_platform = "{}_{}".format(host_os, host_cpu), + want_os = module_ctx.os.name, + want_cpu = module_ctx.os.arch, ) if whl: diff --git a/python/private/whl_target_platforms.bzl b/python/private/whl_target_platforms.bzl index 6208b7851e..ef6030efb7 100644 --- a/python/private/whl_target_platforms.bzl +++ b/python/private/whl_target_platforms.bzl @@ -33,6 +33,39 @@ _LEGACY_ALIASES = { "manylinux2014_x86_64": "manylinux_2_17_x86_64", } +# _translate_cpu and _translate_os from @platforms//host:extension.bzl +def _translate_cpu(arch): + if arch in ["i386", "i486", "i586", "i686", "i786", "x86"]: + return "x86_32" + if arch in ["amd64", "x86_64", "x64"]: + return "x86_64" + if arch in ["ppc", "ppc64", "ppc64le"]: + return "ppc" + if arch in ["arm", "armv7l"]: + return "arm" + if arch in ["aarch64"]: + return "aarch64" + if arch in ["s390x", "s390"]: + return "s390x" + if arch in ["mips64el", "mips64"]: + return "mips64" + if arch in ["riscv64"]: + return "riscv64" + return None + +def _translate_os(os): + if os.startswith("mac os"): + return "osx" + if os.startswith("freebsd"): + return "freebsd" + if os.startswith("openbsd"): + return "openbsd" + if os.startswith("linux"): + return "linux" + if os.startswith("windows"): + return "windows" + return None + # The order of the dictionaries is to keep definitions with their aliases next to each # other _CPU_ALIASES = { @@ -114,13 +147,14 @@ def _whl_priority(value): # Windows does not have multiple wheels for the same target platform return offset -def select_whl(*, whls, want_abis, want_platform): +def select_whl(*, whls, want_abis, want_os, want_cpu): """Select a suitable wheel from a list. Args: whls(list[struct]): A list of candidates. want_abis(list[str]): A list of ABIs that are supported. - want_platform(str): A string platform that can be derived from `{os}_{cpu}` values. + want_os(str): The module_ctx.os.name. + want_cpu(str): The module_ctx.os.arch. Returns: A struct with `url`, `sha256` and `filename` attributes for the selected whl. @@ -175,7 +209,10 @@ def select_whl(*, whls, want_abis, want_platform): target_plats[p] = sorted(platform_tags, key = _whl_priority) - want = target_plats.get(want_platform) + want = target_plats.get("{}_{}".format( + _translate_os(want_os), + _translate_cpu(want_cpu), + )) if not want: return want From 8fe82bae7e6670441e90b168c8f58fac12ee46e7 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 11:42:36 +0900 Subject: [PATCH 36/70] comment: revert bazel to 7.0.0 It is not mandatory to have 7.1.1, I was just testing the new async APIs --- .bazelversion | 2 +- version.bzl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bazelversion b/.bazelversion index 21c8c7b46b..66ce77b7ea 100644 --- a/.bazelversion +++ b/.bazelversion @@ -1 +1 @@ -7.1.1 +7.0.0 diff --git a/version.bzl b/version.bzl index 762f0279d8..2e8fc0b0f5 100644 --- a/version.bzl +++ b/version.bzl @@ -17,7 +17,7 @@ # against. # This version should be updated together with the version of Bazel # in .bazelversion. -BAZEL_VERSION = "7.1.1" +BAZEL_VERSION = "7.0.0" # NOTE: Keep in sync with .bazelci/presubmit.yml # This is the minimum supported bazel version, that we have some tests for. From 1d7a0632f5fb16febca4aaa374a3c01c0827f641 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 12:16:09 +0900 Subject: [PATCH 37/70] comment/improvement: pass around the auth attrs around This also cleans up the get_auth code so that is clearer that we can handle module_ctx and repository_ctx. This makes it possible for the user to pass/configure auth_patterns to pip.parse and setup their own authentication mechanisms. --- .bazelrc | 4 +- examples/bzlmod/MODULE.bazel | 2 +- python/pip_install/pip_repository.bzl | 121 ++++++++++++++------------ python/private/auth.bzl | 83 +++++++++++++++--- python/private/bzlmod/pip.bzl | 24 +++-- python/private/pypi_index.bzl | 36 +++++--- 6 files changed, 180 insertions(+), 90 deletions(-) diff --git a/.bazelrc b/.bazelrc index 61fd0e7601..94cfb93350 100644 --- a/.bazelrc +++ b/.bazelrc @@ -4,8 +4,8 @@ # (Note, we cannot use `common --deleted_packages` because the bazel version command doesn't support it) # To update these lines, execute # `bazel run @rules_bazel_integration_test//tools:update_deleted_packages` -build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered -query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered +build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered +query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered test --test_output=errors diff --git a/examples/bzlmod/MODULE.bazel b/examples/bzlmod/MODULE.bazel index c1edaca17a..1c9524143e 100644 --- a/examples/bzlmod/MODULE.bazel +++ b/examples/bzlmod/MODULE.bazel @@ -120,7 +120,7 @@ pip.parse( # One can also select a particular index for a particular package. # This ensures that the setup is resistant against confusion attacks. # experimental_index_url_overrides = { - # 'my_package": "https://different-index-url.com", + # "my_package": "https://different-index-url.com", # }, hub_name = "pip", python_version = "3.9", diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index ecbab7fb30..3b7bf06290 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -22,7 +22,8 @@ load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse load("//python/pip_install/private:generate_group_library_build_bazel.bzl", "generate_group_library_build_bazel") load("//python/pip_install/private:generate_whl_library_build_bazel.bzl", "generate_whl_library_build_bazel") load("//python/pip_install/private:srcs.bzl", "PIP_INSTALL_PY_SRCS") -load("//python/private:auth.bzl", "get_auth") +load("//python/private:auth.bzl", "AUTH_ATTRS", "get_auth") +load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED") load("//python/private:envsubst.bzl", "envsubst") load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:parse_whl_name.bzl", "parse_whl_name") @@ -917,65 +918,71 @@ if __name__ == "__main__": return contents # NOTE @aignas 2024-03-21: The usage of dict({}, **common) ensures that all args to `dict` are unique -whl_library_attrs = dict({ - "annotation": attr.label( - doc = ( - "Optional json encoded file containing annotation to apply to the extracted wheel. " + - "See `package_annotation`" +def whl_library_attrs(): + attrs = dict({ + "annotation": attr.label( + doc = ( + "Optional json encoded file containing annotation to apply to the extracted wheel. " + + "See `package_annotation`" + ), + allow_files = True, ), - allow_files = True, - ), - "filename": attr.string( - doc = "Download the whl file to this filename.", - ), - "group_deps": attr.string_list( - doc = "List of dependencies to skip in order to break the cycles within a dependency group.", - default = [], - ), - "group_name": attr.string( - doc = "Name of the group, if any.", - ), - "repo": attr.string( - mandatory = True, - doc = "Pointer to parent repo name. Used to make these rules rerun if the parent repo changes.", - ), - "requirement": attr.string( - mandatory = True, - doc = "Python requirement string describing the package to make available, if 'urls' or 'whl_file' is given, then this only needs to include foo[any_extras] as a bare minimum.", - ), - "sha256": attr.string( - doc = "The sha256 of the downloaded whl", - ), - "urls": attr.string_list( - doc = "The url of the whl to be downloaded using bazel downloader", - ), - "whl_file": attr.label( - doc = "The whl file that should be used instead of downloading", - ), - "whl_patches": attr.label_keyed_string_dict( - doc = """a label-keyed-string dict that has - json.encode(struct([whl_file], patch_strip]) as values. This - is to maintain flexibility and correct bzlmod extension interface - until we have a better way to define whl_library and move whl - patching to a separate place. INTERNAL USE ONLY.""", - ), - "_python_path_entries": attr.label_list( - # Get the root directory of these rules and keep them as a default attribute - # in order to avoid unnecessary repository fetching restarts. - # - # This is very similar to what was done in https://github.com/bazelbuild/rules_go/pull/3478 - default = [ - Label("//:BUILD.bazel"), - ] + [ - # Includes all the external dependencies from repositories.bzl - Label("@" + repo + "//:BUILD.bazel") - for repo in all_requirements - ], - ), -}, **common_attrs) + "filename": attr.string( + doc = "Download the whl file to this filename.", + ), + "group_deps": attr.string_list( + doc = "List of dependencies to skip in order to break the cycles within a dependency group.", + default = [], + ), + "group_name": attr.string( + doc = "Name of the group, if any.", + ), + "repo": attr.string( + mandatory = True, + doc = "Pointer to parent repo name. Used to make these rules rerun if the parent repo changes.", + ), + "requirement": attr.string( + mandatory = True, + doc = "Python requirement string describing the package to make available, if 'urls' or 'whl_file' is given, then this only needs to include foo[any_extras] as a bare minimum.", + ), + "sha256": attr.string( + doc = "The sha256 of the downloaded whl", + ), + "urls": attr.string_list( + doc = "The url of the whl to be downloaded using bazel downloader", + ), + "whl_file": attr.label( + doc = "The whl file that should be used instead of downloading", + ), + "whl_patches": attr.label_keyed_string_dict( + doc = """a label-keyed-string dict that has + json.encode(struct([whl_file], patch_strip]) as values. This + is to maintain flexibility and correct bzlmod extension interface + until we have a better way to define whl_library and move whl + patching to a separate place. INTERNAL USE ONLY.""", + ), + "_python_path_entries": attr.label_list( + # Get the root directory of these rules and keep them as a default attribute + # in order to avoid unnecessary repository fetching restarts. + # + # This is very similar to what was done in https://github.com/bazelbuild/rules_go/pull/3478 + default = [ + Label("//:BUILD.bazel"), + ] + [ + # Includes all the external dependencies from repositories.bzl + Label("@" + repo + "//:BUILD.bazel") + for repo in all_requirements + ], + ), + }, **common_attrs) + + if BZLMOD_ENABLED: + attrs.update(AUTH_ATTRS) + + return attrs whl_library = repository_rule( - attrs = whl_library_attrs, + attrs = whl_library_attrs(), doc = """ Download and extracts a single wheel based into a bazel repo based on the requirement string passed in. Instantiated from pip_repository and inherits config options from there.""", diff --git a/python/private/auth.bzl b/python/private/auth.bzl index 2b067fd088..6b612678c8 100644 --- a/python/private/auth.bzl +++ b/python/private/auth.bzl @@ -17,29 +17,90 @@ The implementation below is copied directly from Bazel's implementation of `http_archive`. Accordingly, the return value of this function should be used identically as the `auth` parameter of `http_archive`. Reference: https://github.com/bazelbuild/bazel/blob/6.3.2/tools/build_defs/repo/http.bzl#L109 + +The helpers were further modified to support module_ctx. """ -# TODO @aignas 2023-12-18: use the following instead when available. -# load("@bazel_tools//tools/build_defs/repo:utils.bzl", "get_auth") load("@bazel_tools//tools/build_defs/repo:utils.bzl", "read_netrc", "read_user_netrc", "use_netrc") -def get_auth(rctx, urls): +# Copied from https://sourcegraph.com/github.com/bazelbuild/bazel@26c6add3f9809611ad3795bce1e5c0fb37902902/-/blob/tools/build_defs/repo/http.bzl +_AUTH_PATTERN_DOC = """An optional dict mapping host names to custom authorization patterns. + +If a URL's host name is present in this dict the value will be used as a pattern when +generating the authorization header for the http request. This enables the use of custom +authorization schemes used in a lot of common cloud storage providers. + +The pattern currently supports 2 tokens: <login> and +<password>, which are replaced with their equivalent value +in the netrc file for the same host name. After formatting, the result is set +as the value for the Authorization field of the HTTP request. + +Example attribute and netrc for a http download to an oauth2 enabled API using a bearer token: + +
+auth_patterns = {
+    "storage.cloudprovider.com": "Bearer <password>"
+}
+
+ +netrc: + +
+machine storage.cloudprovider.com
+        password RANDOM-TOKEN
+
+ +The final HTTP request would have the following header: + +
+Authorization: Bearer RANDOM-TOKEN
+
+""" + +# AUTH_ATTRS are used within whl_library and pip bzlmod extension. +AUTH_ATTRS = { + "auth_patterns": attr.string_dict( + doc = _AUTH_PATTERN_DOC, + ), + "netrc": attr.string( + doc = "Location of the .netrc file to use for authentication", + ), +} + +def get_auth(ctx, urls, ctx_attr = None): """Utility for retrieving netrc-based authentication parameters for repository download rules used in python_repository. Args: - rctx (repository_ctx): The repository rule's context object. + ctx(repository_ctx or module_ctx): The extension module_ctx or + repository rule's repository_ctx object. urls: A list of URLs from which assets will be downloaded. + ctx_attr(struct): The attributes to get the netrc from. When ctx is + repository_ctx, then we will attempt to use repository_ctx.attr + if this is not specified, otherwise we will use the specified + field. The module_ctx attributes are located in the tag classes + so it cannot be retrieved from the context. Returns: dict: A map of authentication parameters by URL. """ - attr = getattr(rctx, "attr", None) - if getattr(attr, "netrc", None): - netrc = read_netrc(rctx, getattr(attr, "netrc")) - elif "NETRC" in rctx.os.environ: - netrc = read_netrc(rctx, rctx.os.environ["NETRC"]) + # module_ctx does not have attributes, as they are stored in tag classes. Whilst + # the correct behaviour should be to pass the `attr` to the + ctx_attr = ctx_attr or getattr(ctx, "attr", None) + ctx_attr = struct( + netrc = getattr(ctx_attr, "netrc", None), + auth_patterns = getattr(ctx_attr, "auth_patterns", ""), + ) + + if ctx_attr.netrc: + netrc = read_netrc(ctx, ctx_attr.netrc) + elif "NETRC" in ctx.os.environ: + # This can be used on newer bazel versions + if hasattr(ctx, "getenv"): + netrc = read_netrc(ctx, ctx.getenv("NETRC")) + else: + netrc = read_netrc(ctx, ctx.os.environ["NETRC"]) else: - netrc = read_user_netrc(rctx) + netrc = read_user_netrc(ctx) - return use_netrc(netrc, urls, getattr(attr, "auth_patterns", "")) + return use_netrc(netrc, urls, ctx_attr.auth_patterns) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index ebeb534bcb..190b5c34d5 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -25,6 +25,7 @@ load( "whl_library", ) load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") +load("//python/private:auth.bzl", "AUTH_ATTRS") load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:parse_whl_name.bzl", "parse_whl_name") load("//python/private:pypi_index.bzl", "get_simpleapi_sources", "simpleapi_download") @@ -184,12 +185,17 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca index_urls = simpleapi_download( module_ctx, - index_url = pip_attr.experimental_index_url, - # TODO @aignas 2024-03-28: support index overrides for specific packages - # We should never attempt to join index contents ourselves. - index_url_overrides = pip_attr.experimental_index_url_overrides or {}, - sources = [requirements_lock_content], - envsubst = pip_attr.envsubst, + attr = struct( + index_url = pip_attr.experimental_index_url, + # TODO @aignas 2024-03-28: support index overrides for specific packages + # We should never attempt to join index contents ourselves. + index_url_overrides = pip_attr.experimental_index_url_overrides or {}, + sources = [requirements_lock_content], + envsubst = pip_attr.envsubst, + # Auth related info + netrc = pip_attr.netrc, + auth_patterns = pip_attr.auth_patterns, + ), cache = simpleapi_cache, ) @@ -275,6 +281,11 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca sha256 = whl.sha256, filename = whl.filename, )) + if pip_attr.netrc: + whl_library_args["netrc"] = pip_attr.netrc + if pip_attr.auth_patterns: + whl_library_args["auth_patterns"] = pip_attr.auth_patterns + else: # TODO @aignas 2024-03-29: in the future we should probably just # use an `sdist` but having this makes it easy to debug issues @@ -515,6 +526,7 @@ The labels are JSON config files describing the modifications. """, ), }, **pip_repository_attrs) + attrs.update(AUTH_ATTRS) # Like the pip_repository rule, we end up setting this manually so # don't allow users to override it. diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index 5aa95b6d80..3080df1b08 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -23,25 +23,30 @@ load(":auth.bzl", "get_auth") load(":envsubst.bzl", "envsubst") load(":normalize_name.bzl", "normalize_name") -def simpleapi_download(ctx, *, index_url, index_url_overrides, sources, envsubst, cache = None): +def simpleapi_download(ctx, *, attr, cache = None): """Download Simple API HTML. Args: ctx: The module_ctx or repository_ctx. - index_url: The index. - index_url_overrides: The index overrides for separate packages. - sources: The sources to download things for. - envsubst: The envsubst vars. + attr: Contains the parameters for the download. They are grouped into a + struct for better clarity. It must have attributes: + * index_url: The index. + * index_url_overrides: The index overrides for separate packages. + * sources: The sources to download things for. + * envsubst: The envsubst vars. + * netrc: The netrc parameter for ctx.download, see http_file for docs. + * auth_patterns: The auth_patterns parameter for ctx.download, see + http_file for docs. cache: A dictionary that can be used as a cache between calls during a single evaluation of the extension. Returns: dict of pkg name to the HTML contents. """ - sources = get_packages_from_requirements(sources) + sources = get_packages_from_requirements(attr.sources) index_url_overrides = { normalize_name(p): i - for p, i in (index_url_overrides or {}).items() + for p, i in (attr.index_url_overrides or {}).items() } srcs = {} @@ -50,7 +55,7 @@ def simpleapi_download(ctx, *, index_url, index_url_overrides, sources, envsubst # ensure that we have a trailing slash, because we will otherwise get redirects # which may not work on private indexes with netrc authentication. - entry["urls"]["{}/{}/".format(index_url_overrides.get(pkg, index_url).rstrip("/"), pkg)] = True + entry["urls"]["{}/{}/".format(index_url_overrides.get(pkg, attr.index_url).rstrip("/"), pkg)] = True entry["want_shas"] = sets.union(entry["want_shas"], want_shas) download_kwargs = {} @@ -76,7 +81,7 @@ def simpleapi_download(ctx, *, index_url, index_url_overrides, sources, envsubst packages = read_simple_api( ctx = ctx, url = all_urls, - envsubst_vars = envsubst, + attr = attr, **download_kwargs ), ) @@ -89,13 +94,18 @@ def simpleapi_download(ctx, *, index_url, index_url_overrides, sources, envsubst return contents -def read_simple_api(ctx, url, envsubst_vars, **download_kwargs): +def read_simple_api(ctx, url, attr, **download_kwargs): """Read SimpleAPI. Args: ctx: The module_ctx or repository_ctx. url: The url parameter that can be passed to ctx.download. - envsubst_vars: The env vars to do env sub before downloading. + attr: The attribute that contains necessary info for downloading. The + following attributes must be present: + * envsubst: The env vars to do env sub before downloading. + * netrc: The netrc parameter for ctx.download, see http_file for docs. + * auth_patterns: The auth_patterns parameter for ctx.download, see + http_file for docs. **download_kwargs: Any extra params to ctx.download. Note that output and auth will be passed for you. @@ -118,14 +128,14 @@ def read_simple_api(ctx, url, envsubst_vars, **download_kwargs): real_url = envsubst( url, - envsubst_vars, + attr.envsubst, ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get, ) download = ctx.download( url = [real_url], output = output, - auth = get_auth(ctx, [real_url]), + auth = get_auth(ctx, [real_url], ctx_attr = attr), **download_kwargs ) From 0a6b8507d57c05f17c1ccede1678e50a0c53da56 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 12:18:45 +0900 Subject: [PATCH 38/70] comment: rewrite maybe_args loop --- python/private/bzlmod/pip.bzl | 44 +++++++++++++++++------------------ 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 190b5c34d5..5b1b1f0fb7 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -222,29 +222,27 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca isolated = use_isolated(module_ctx, pip_attr), quiet = pip_attr.quiet, timeout = pip_attr.timeout, - ) | { - key: value - for key, value in dict( - # The following values are safe to omit if they have false like values - annotation = annotation, - download_only = pip_attr.download_only, - enable_implicit_namespace_pkgs = pip_attr.enable_implicit_namespace_pkgs, - environment = pip_attr.environment, - envsubst = pip_attr.envsubst, - experimental_target_platforms = pip_attr.experimental_target_platforms, - extra_pip_args = extra_pip_args, - group_deps = group_deps, - group_name = group_name, - pip_data_exclude = pip_attr.pip_data_exclude, - python_interpreter = pip_attr.python_interpreter, - python_interpreter_target = python_interpreter_target, - whl_patches = { - p: json.encode(args) - for p, args in whl_overrides.get(whl_name, {}).items() - }, - ).items() - if value - } + ) + maybe_args = dict( + # The following values are safe to omit if they have false like values + annotation = annotation, + download_only = pip_attr.download_only, + enable_implicit_namespace_pkgs = pip_attr.enable_implicit_namespace_pkgs, + environment = pip_attr.environment, + envsubst = pip_attr.envsubst, + experimental_target_platforms = pip_attr.experimental_target_platforms, + extra_pip_args = extra_pip_args, + group_deps = group_deps, + group_name = group_name, + pip_data_exclude = pip_attr.pip_data_exclude, + python_interpreter = pip_attr.python_interpreter, + python_interpreter_target = python_interpreter_target, + whl_patches = { + p: json.encode(args) + for p, args in whl_overrides.get(whl_name, {}).items() + }, + ) + whl_library_args.update({k: v for k, v in maybe_args.items() if v}) if index_urls: srcs = get_simpleapi_sources(requirement_line) From f0eff9c511cc1a70c21902c5f559789f55409c61 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 12:19:40 +0900 Subject: [PATCH 39/70] comment: unroll the loop which is popping args --- python/private/bzlmod/pip.bzl | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 5b1b1f0fb7..8e36802bcb 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -265,13 +265,11 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca ) if whl: - for unused_attr in [ - # pip is not used to download wheels and the python `whl_library` helpers are only extracting things - "extra_pip_args", - # This is no-op because pip is not used to download the wheel. - "download_only", - ]: - whl_library_args.pop(unused_attr, None) + # pip is not used to download wheels and the python `whl_library` helpers are only extracting things + whl_library_args.pop("extra_pip_args", None) + + # This is no-op because pip is not used to download the wheel. + whl_library_args.pop("download_only", None) whl_library_args.update(dict( requirement = srcs.wo_shas, From 097009abcbfcc58f24b1590ac26e4514dc448659 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 12:22:23 +0900 Subject: [PATCH 40/70] comment: use dict accesors instead of dict.update --- python/private/bzlmod/pip.bzl | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 8e36802bcb..19cf60a5e2 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -265,23 +265,20 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca ) if whl: - # pip is not used to download wheels and the python `whl_library` helpers are only extracting things - whl_library_args.pop("extra_pip_args", None) - - # This is no-op because pip is not used to download the wheel. - whl_library_args.pop("download_only", None) - - whl_library_args.update(dict( - requirement = srcs.wo_shas, - urls = [whl.url], - sha256 = whl.sha256, - filename = whl.filename, - )) + whl_library_args["requirement"] = srcs.wo_shas + whl_library_args["urls"] = [whl.url] + whl_library_args["sha256"] = whl.sha256 + whl_library_args["filename"] = whl.filename if pip_attr.netrc: whl_library_args["netrc"] = pip_attr.netrc if pip_attr.auth_patterns: whl_library_args["auth_patterns"] = pip_attr.auth_patterns + # pip is not used to download wheels and the python `whl_library` helpers are only extracting things + whl_library_args.pop("extra_pip_args", None) + + # This is no-op because pip is not used to download the wheel. + whl_library_args.pop("download_only", None) else: # TODO @aignas 2024-03-29: in the future we should probably just # use an `sdist` but having this makes it easy to debug issues From ff3983ac473ceed8d59f250d631f8b1a0094ec8a Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 12:22:44 +0900 Subject: [PATCH 41/70] comment: add a note on why we are sorting attrs --- python/private/bzlmod/pip.bzl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 19cf60a5e2..69838f4694 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -268,7 +268,7 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca whl_library_args["requirement"] = srcs.wo_shas whl_library_args["urls"] = [whl.url] whl_library_args["sha256"] = whl.sha256 - whl_library_args["filename"] = whl.filename + whl_library_args["filename"] = whl.filenam2 if pip_attr.netrc: whl_library_args["netrc"] = pip_attr.netrc if pip_attr.auth_patterns: @@ -285,6 +285,9 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca # in early development stages. fail("Could not find whl for: {}".format(requirement_line)) + # We sort so that the lock-file remains the same no matter the order of how the + # args are manipulated in the code going before. Maybe this will not be needed + # in the future. whl_library(name = repo_name, **dict(sorted(whl_library_args.items()))) whl_map[hub_name].setdefault(whl_name, []).append( whl_alias( From 37c3ede94fdcff0d821022d8497a76c0baf21a32 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 12:24:21 +0900 Subject: [PATCH 42/70] comment: move a comment --- python/private/bzlmod/pip.bzl | 6 ------ python/private/pypi_index.bzl | 8 +++++++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 69838f4694..4469831d32 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -405,12 +405,6 @@ def _pip_impl(module_ctx): # Where hub, whl, and pip are the repo names hub_whl_map = {} - # We use a dictionary as a cache so that we can reuse calls to the simple - # API when evaluating the extension. Using the canonical_id parameter of - # the module_ctx would deposit the simple API responses to the bazel cache - # and that is undesirable because additions to the PyPI index would not be - # reflected when re-evaluating the extension unless we do - # `bazel clean --expunge`. simpleapi_cache = {} for mod in module_ctx.modules: diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index 3080df1b08..1889029604 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -38,7 +38,13 @@ def simpleapi_download(ctx, *, attr, cache = None): * auth_patterns: The auth_patterns parameter for ctx.download, see http_file for docs. cache: A dictionary that can be used as a cache between calls during a - single evaluation of the extension. + single evaluation of the extension. We use a dictionary as a cache + so that we can reuse calls to the simple API when evaluating the + extension. Using the canonical_id parameter of the module_ctx would + deposit the simple API responses to the bazel cache and that is + undesirable because additions to the PyPI index would not be + reflected when re-evaluating the extension unless we do + `bazel clean --expunge`. Returns: dict of pkg name to the HTML contents. From 09c724d2ba01276da23e4daf646367354dc63aab Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 12:35:48 +0900 Subject: [PATCH 43/70] comment: adress comments in pypi_index code 1/n --- python/private/pypi_index.bzl | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index 1889029604..95f0d8cc35 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -23,7 +23,7 @@ load(":auth.bzl", "get_auth") load(":envsubst.bzl", "envsubst") load(":normalize_name.bzl", "normalize_name") -def simpleapi_download(ctx, *, attr, cache = None): +def simpleapi_download(ctx, *, attr, cache): """Download Simple API HTML. Args: @@ -73,13 +73,13 @@ def simpleapi_download(ctx, *, attr, cache = None): contents = {} for pkg, args in srcs.items(): all_urls = list(args["urls"].keys()) - cache_key = "" - if cache != None: - # FIXME @aignas 2024-03-28: should I envsub this? - cache_key = ",".join(all_urls) - if cache_key in cache: - contents[pkg] = cache[cache_key] - continue + + # FIXME @aignas 2024-03-28: should I envsub this? + # Sort for a stable cache key + cache_key = ",".join(sorted(all_urls)) + if cache_key in cache: + contents[pkg] = cache[cache_key] + continue downloads[pkg] = struct( cache_key = cache_key, @@ -92,11 +92,11 @@ def simpleapi_download(ctx, *, attr, cache = None): ), ) + # If we use `block` == False, then we need to have a second loop that is + # collecting all of the results as they were being downloaded in parallel. for pkg, download in downloads.items(): contents[pkg] = download.packages.contents() - - if cache != None and download.cache_key: - cache[download.cache_key] = contents[pkg] + cache.setdefault(download.cache_key, contents[pkg]) return contents @@ -138,6 +138,7 @@ def read_simple_api(ctx, url, attr, **download_kwargs): ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get, ) + # NOTE: this may have block = True or block = False in the download_kwargs download = ctx.download( url = [real_url], output = output, @@ -146,7 +147,7 @@ def read_simple_api(ctx, url, attr, **download_kwargs): ) return struct( - contents = lambda: _read_contents( + contents = lambda: _read_index_result( ctx, download.wait() if download_kwargs.get("block") == False else download, output, @@ -154,7 +155,7 @@ def read_simple_api(ctx, url, attr, **download_kwargs): ), ) -def _read_contents(ctx, result, output, url): +def _read_index_result(ctx, result, output, url): if not result.success: fail("Failed to download from {}: {}".format(url, result)) From 133119393a601f4325c815dd5f650a759256c965 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 12:36:52 +0900 Subject: [PATCH 44/70] comment: improve naming --- python/private/bzlmod/pip.bzl | 2 +- python/private/pypi_index.bzl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 4469831d32..c2a7948e6a 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -265,7 +265,7 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca ) if whl: - whl_library_args["requirement"] = srcs.wo_shas + whl_library_args["requirement"] = srcs.requirement whl_library_args["urls"] = [whl.url] whl_library_args["sha256"] = whl.sha256 whl_library_args["filename"] = whl.filenam2 diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index 95f0d8cc35..2fde8801b7 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -220,7 +220,7 @@ def get_simpleapi_sources(line): head = head + ";" + maybe_hashes.partition("--hash=")[0].strip() return struct( - wo_shas = line if not shas else head, + requirement = line if not shas else head, version = version, shas = sorted(shas), ) From 02f7e8948491482b2e24d0d6391c043f919eb17f Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 12:37:24 +0900 Subject: [PATCH 45/70] fixup! comment: improve naming --- python/private/bzlmod/pip.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index c2a7948e6a..bc9cbbe05c 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -268,7 +268,7 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca whl_library_args["requirement"] = srcs.requirement whl_library_args["urls"] = [whl.url] whl_library_args["sha256"] = whl.sha256 - whl_library_args["filename"] = whl.filenam2 + whl_library_args["filename"] = whl.filename if pip_attr.netrc: whl_library_args["netrc"] = pip_attr.netrc if pip_attr.auth_patterns: From b05094b6245ec16c9d95e59c348435590ea1409d Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 12:56:00 +0900 Subject: [PATCH 46/70] simplify yanking behaviour and remove want_shas --- python/private/bzlmod/pip.bzl | 5 ++- python/private/pypi_index.bzl | 65 ++++++++--------------------------- 2 files changed, 19 insertions(+), 51 deletions(-) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index bc9cbbe05c..8971091343 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -251,7 +251,10 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca whls = [ src for src in index_urls[whl_name] - if src.sha256 in srcs.shas and src.filename.endswith(".whl") + # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api + # + # For now we just exclude such artifacts. + if src.sha256 in srcs.shas and src.filename.endswith(".whl") and not src.yanked ], want_abis = [ "none", diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index 2fde8801b7..5207d9f8b0 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -17,7 +17,6 @@ A file that houses private functions used in the `bzlmod` extension with the sam """ load("@bazel_features//:features.bzl", "bazel_features") -load("@bazel_skylib//lib:sets.bzl", "sets") load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") load(":auth.bzl", "get_auth") load(":envsubst.bzl", "envsubst") @@ -49,21 +48,11 @@ def simpleapi_download(ctx, *, attr, cache): Returns: dict of pkg name to the HTML contents. """ - sources = get_packages_from_requirements(attr.sources) index_url_overrides = { normalize_name(p): i for p, i in (attr.index_url_overrides or {}).items() } - srcs = {} - for pkg, want_shas in sources.simpleapi.items(): - entry = srcs.setdefault(pkg, {"urls": {}, "want_shas": sets.make()}) - - # ensure that we have a trailing slash, because we will otherwise get redirects - # which may not work on private indexes with netrc authentication. - entry["urls"]["{}/{}/".format(index_url_overrides.get(pkg, attr.index_url).rstrip("/"), pkg)] = True - entry["want_shas"] = sets.union(entry["want_shas"], want_shas) - download_kwargs = {} if bazel_features.external_deps.download_has_block_param: download_kwargs["block"] = False @@ -71,22 +60,23 @@ def simpleapi_download(ctx, *, attr, cache): # Download in parallel if possible downloads = {} contents = {} - for pkg, args in srcs.items(): - all_urls = list(args["urls"].keys()) + for pkg in get_packages_from_requirements(attr.sources): + url = "{}/{}/".format( + index_url_overrides.get(pkg, attr.index_url).rstrip("/"), + pkg, + ) - # FIXME @aignas 2024-03-28: should I envsub this? - # Sort for a stable cache key - cache_key = ",".join(sorted(all_urls)) + # FIXME @aignas 2024-03-28: should I envsubt this? + cache_key = url if cache_key in cache: contents[pkg] = cache[cache_key] continue downloads[pkg] = struct( cache_key = cache_key, - urls = all_urls, packages = read_simple_api( ctx = ctx, - url = all_urls, + url = [url], attr = attr, **download_kwargs ), @@ -175,19 +165,11 @@ def get_packages_from_requirements(requirements_files): want_packages = {} for contents in requirements_files: parse_result = parse_requirements(contents) - for distribution, line in parse_result.requirements: + for distribution, _ in parse_result.requirements: distribution = normalize_name(distribution) - shas = want_packages.get(distribution) - if not shas: - shas = sets.make() - want_packages[distribution] = shas + want_packages[distribution] = None - for sha in get_simpleapi_sources(line).shas: - sets.insert(shas, sha) - - return struct( - simpleapi = want_packages, - ) + return want_packages def get_simpleapi_sources(line): """Get PyPI sources from a requirements.txt line. @@ -225,13 +207,12 @@ def get_simpleapi_sources(line): shas = sorted(shas), ) -def get_packages(index_urls, content, want_shas = None): +def get_packages(index_urls, content): """Get the package URLs for given shas by parsing the Simple API HTML. Args: index_urls(list[str]): The URLs that the HTML content can be downloaded from. content(str): The Simple API HTML content. - want_shas(set[str], optional): The list of shas that we need to get, otherwise we'll get all. Returns: A list of structs with: @@ -262,17 +243,8 @@ def get_packages(index_urls, content, want_shas = None): url, _, tail = line.partition("#sha256=") sha256, _, tail = tail.partition("\"") - if want_shas: - if sets.contains(want_shas, sha256): - continue - elif "data-yanked" in line: - # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api - # - # For now we just fail and inform the user to relock the requirements with a - # different version. - fail("The package with '--hash=sha256:{}' was yanked, relock your requirements".format(sha256)) - else: - sets.remove(want_shas, sha256) + # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api + yanked = "data-yanked" in line maybe_metadata, _, tail = tail.partition(">") filename, _, tail = tail.partition("<") @@ -294,14 +266,7 @@ def get_packages(index_urls, content, want_shas = None): sha256 = sha256, metadata_sha256 = metadata_sha256, metadata_url = metadata_url, - ), - ) - - if want_shas: - fail( - "Indexes {} did not provide packages with all shas: {}".format( - index_urls, - ", ".join(want_shas.keys()), + yanked = yanked, ), ) From 881211a4526750dda8d8abcc88a55e2cb0457700 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 12:57:12 +0900 Subject: [PATCH 47/70] comment: better error msg --- python/private/whl_target_platforms.bzl | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/python/private/whl_target_platforms.bzl b/python/private/whl_target_platforms.bzl index ef6030efb7..9b0e771c82 100644 --- a/python/private/whl_target_platforms.bzl +++ b/python/private/whl_target_platforms.bzl @@ -142,7 +142,7 @@ def _whl_priority(value): return version + offset if not "win" in value: - fail("BUG") + fail("BUG: only windows, linux and mac platforms are supported, but got: {}".format(value)) # Windows does not have multiple wheels for the same target platform return offset @@ -167,11 +167,6 @@ def select_whl(*, whls, want_abis, want_os, want_cpu): parsed = parse_whl_name(whl.filename) if parsed.abi_tag not in want_abis: # Filter out incompatible ABIs - # print("Skipping {} because {} is not in {}".format( - # whl.filename, - # parsed.abi_tag, - # want_abis, - # )) continue platform_tags = list({_LEGACY_ALIASES.get(p, p): True for p in parsed.platform_tag.split(".")}) From 8fc212c93a9973b5de4a57c05ffdf87488678d8f Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 12:57:56 +0900 Subject: [PATCH 48/70] comment: docs --- python/private/whl_target_platforms.bzl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/private/whl_target_platforms.bzl b/python/private/whl_target_platforms.bzl index 9b0e771c82..b8e4a9e3f0 100644 --- a/python/private/whl_target_platforms.bzl +++ b/python/private/whl_target_platforms.bzl @@ -157,7 +157,8 @@ def select_whl(*, whls, want_abis, want_os, want_cpu): want_cpu(str): The module_ctx.os.arch. Returns: - A struct with `url`, `sha256` and `filename` attributes for the selected whl. + None or a struct with `url`, `sha256` and `filename` attributes for the + selected whl. If no match is found, None is returned. """ if not whls: return None From b76bd05cd7c626ab8bddf78c41d92d3982c6ec8b Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 13:09:26 +0900 Subject: [PATCH 49/70] comment: use a tuple as a sorting key --- python/private/whl_target_platforms.bzl | 29 ++++++++++--------- .../whl_target_platforms/select_whl_tests.bzl | 18 ++++++------ 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/python/private/whl_target_platforms.bzl b/python/private/whl_target_platforms.bzl index b8e4a9e3f0..c7693649a6 100644 --- a/python/private/whl_target_platforms.bzl +++ b/python/private/whl_target_platforms.bzl @@ -91,7 +91,7 @@ _OS_PREFIXES = { } # buildifier: disable=unsorted-dict-items def _whl_priority(value): - """Return a number for sorting whl lists, larger number means lower priority. + """Return a value for sorting whl lists. TODO @aignas 2024-03-29: In the future we should create a repo for each repo that matches the abi and then we could have config flags for the @@ -109,43 +109,44 @@ def _whl_priority(value): All of these can be expressed as configuration settings and included in the select statements in the `whl` repo. This means that the user can configure for a particular target what they need. + + Returns a 4-tuple where the items are: + * bool - is it an 'any' wheel? True if it is. + * bool - is it an 'universal' wheel? True if it is. (e.g. macos universal2 wheels) + * int - the minor plaform version (e.g. osx os version, libc version) + * int - the major plaform version (e.g. osx os version, libc version) """ if "." in value: value, _, _ = value.partition(".") if "any" == value: # This is just a big value that should be larger than any other value returned by this function - return 100000 - - # The offset is for ensuring that the universal wheels are less - # preferred. - offset = (len(whl_target_platforms(value)) - 1) * 10000 + return (True, False, 0, 0) if "linux" in value: os, _, tail = value.partition("_") if os == "linux": # If the platform tag starts with 'linux', then return something less than what 'any' returns - version = 99 + minor = 0 + major = 0 else: - _major, _, tail = tail.partition("_") # We don't need to use that because it's the same for all candidates now - version, _, _ = tail.partition("_") + major, _, tail = tail.partition("_") # We don't need to use that because it's the same for all candidates now + minor, _, _ = tail.partition("_") - return int(version) + offset + return (False, os == "linux", int(minor), int(major)) if "mac" in value or "osx" in value: _, _, tail = value.partition("_") major, _, tail = tail.partition("_") minor, _, _ = tail.partition("_") - # the major is >= 10, so let's just multiply by 10 - version = int(major) * 100 + int(minor) - return version + offset + return (False, "universal2" in value, int(minor), int(major)) if not "win" in value: fail("BUG: only windows, linux and mac platforms are supported, but got: {}".format(value)) # Windows does not have multiple wheels for the same target platform - return offset + return (False, False, 0, 0) def select_whl(*, whls, want_abis, want_os, want_cpu): """Select a suitable wheel from a list. diff --git a/tests/private/whl_target_platforms/select_whl_tests.bzl b/tests/private/whl_target_platforms/select_whl_tests.bzl index bea5e037fe..45d9708c32 100644 --- a/tests/private/whl_target_platforms/select_whl_tests.bzl +++ b/tests/private/whl_target_platforms/select_whl_tests.bzl @@ -83,37 +83,37 @@ def _match(env, got, want_filename): _tests = [] def _test_selecting(env): - got = select_whl(whls = WHL_LIST, want_abis = ["none"], want_platform = "ignored") + got = select_whl(whls = WHL_LIST, want_abis = ["none"], want_os = "ignored", want_cpu = "ignored") _match(env, got, "pkg-0.0.1-py3-none-any.whl") - got = select_whl(whls = WHL_LIST, want_abis = ["abi3"], want_platform = "ignored") + got = select_whl(whls = WHL_LIST, want_abis = ["abi3"], want_os = "ignored", want_cpu = "ignored") _match(env, got, "pkg-0.0.1-py3-abi3-any.whl") # Check the selection failure - got = select_whl(whls = WHL_LIST, want_abis = ["cp39"], want_platform = "does-not-exist") + got = select_whl(whls = WHL_LIST, want_abis = ["cp39"], want_os = "fancy", want_cpu = "exotic") _match(env, got, None) # Check we match the ABI and not the py version - got = select_whl(whls = WHL_LIST, want_abis = ["cp37m"], want_platform = "linux_x86_64") + got = select_whl(whls = WHL_LIST, want_abis = ["cp37m"], want_os = "linux", want_cpu = "amd64") _match(env, got, "pkg-0.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl") # Check we can select a filename with many platform tags - got = select_whl(whls = WHL_LIST, want_abis = ["cp39"], want_platform = "linux_x86_32") + got = select_whl(whls = WHL_LIST, want_abis = ["cp39"], want_os = "linux", want_cpu = "i686") _match(env, got, "pkg-0.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl") # Check that we prefer the specific wheel - got = select_whl(whls = WHL_LIST, want_abis = ["cp311"], want_platform = "osx_x86_64") + got = select_whl(whls = WHL_LIST, want_abis = ["cp311"], want_os = "mac os", want_cpu = "x86_64") _match(env, got, "pkg-0.0.1-cp311-cp311-macosx_10_9_x86_64.whl") - got = select_whl(whls = WHL_LIST, want_abis = ["cp311"], want_platform = "osx_aarch64") + got = select_whl(whls = WHL_LIST, want_abis = ["cp311"], want_os = "mac os", want_cpu = "aarch64") _match(env, got, "pkg-0.0.1-cp311-cp311-macosx_11_0_arm64.whl") # Check that we can use the universal2 if the arm wheel is not available - got = select_whl(whls = [w for w in WHL_LIST if "arm64" not in w.filename], want_abis = ["cp311"], want_platform = "osx_aarch64") + got = select_whl(whls = [w for w in WHL_LIST if "arm64" not in w.filename], want_abis = ["cp311"], want_os = "mac os", want_cpu = "aarch64") _match(env, got, "pkg-0.0.1-cp311-cp311-macosx_10_9_universal2.whl") # Check we prefer platform specific wheels - got = select_whl(whls = WHL_LIST, want_abis = ["none", "abi3", "cp39"], want_platform = "linux_x86_64") + got = select_whl(whls = WHL_LIST, want_abis = ["none", "abi3", "cp39"], want_os = "linux", want_cpu = "x86_64") _match(env, got, "pkg-0.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl") _tests.append(_test_selecting) From 9acf7d3b0dcd39a97bd2651c8a061402e93838a3 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 13:12:38 +0900 Subject: [PATCH 50/70] comment: date --- tests/private/whl_target_platforms/select_whl_tests.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/private/whl_target_platforms/select_whl_tests.bzl b/tests/private/whl_target_platforms/select_whl_tests.bzl index 45d9708c32..0d6f97d7a5 100644 --- a/tests/private/whl_target_platforms/select_whl_tests.bzl +++ b/tests/private/whl_target_platforms/select_whl_tests.bzl @@ -1,4 +1,4 @@ -# Copyright 2023 The Bazel Authors. All rights reserved. +# Copyright 2024 The Bazel Authors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 01eccf59b3172bcf0f1355847ad2b22114ddc9d7 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 13:14:14 +0900 Subject: [PATCH 51/70] comment: clarify docs --- python/private/pypi_index.bzl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index 5207d9f8b0..98deadf23a 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -46,7 +46,7 @@ def simpleapi_download(ctx, *, attr, cache): `bazel clean --expunge`. Returns: - dict of pkg name to the HTML contents. + dict of pkg name to the parsed HTML contents - a list of structs. """ index_url_overrides = { normalize_name(p): i @@ -117,6 +117,8 @@ def read_simple_api(ctx, url, attr, **download_kwargs): url = url if type(url) == type("") else url[0] output_str = url + + # Transform the URL into a valid filename for char in [".", ":", "/", "\\", "$", "[", "]", "{", "}", "'", "\"", "-"]: output_str = output_str.replace(char, "_") From e80d37c6b4b132e640d6733f4d49fb91334dd965 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 13:16:05 +0900 Subject: [PATCH 52/70] comment: improve the doc --- python/private/pypi_index.bzl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index 98deadf23a..1906c5da0f 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -29,10 +29,12 @@ def simpleapi_download(ctx, *, attr, cache): ctx: The module_ctx or repository_ctx. attr: Contains the parameters for the download. They are grouped into a struct for better clarity. It must have attributes: - * index_url: The index. - * index_url_overrides: The index overrides for separate packages. - * sources: The sources to download things for. - * envsubst: The envsubst vars. + * index_url: str, the index. + * index_url_overrides: dict[str, str], the index overrides for + separate packages. + * sources: list[str], the sources to download things for. Each value is + the contents of requirements files. + * envsubst: list[str], the envsubst vars for performing substitution in index url. * netrc: The netrc parameter for ctx.download, see http_file for docs. * auth_patterns: The auth_patterns parameter for ctx.download, see http_file for docs. From 28d40e37674982e009e63d8c35fa21bf75ec3596 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 13:17:58 +0900 Subject: [PATCH 53/70] comment: remove todo --- python/private/bzlmod/pip.bzl | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 8971091343..71dbc17d37 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -187,8 +187,6 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca module_ctx, attr = struct( index_url = pip_attr.experimental_index_url, - # TODO @aignas 2024-03-28: support index overrides for specific packages - # We should never attempt to join index contents ourselves. index_url_overrides = pip_attr.experimental_index_url_overrides or {}, sources = [requirements_lock_content], envsubst = pip_attr.envsubst, From bdfef56444b7516f571e693c36b644eb224205f1 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 13:24:27 +0900 Subject: [PATCH 54/70] simplify whl library attrs --- python/pip_install/pip_repository.bzl | 120 ++++++++++++-------------- 1 file changed, 57 insertions(+), 63 deletions(-) diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index 3b7bf06290..71c49f47f8 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -23,7 +23,6 @@ load("//python/pip_install/private:generate_group_library_build_bazel.bzl", "gen load("//python/pip_install/private:generate_whl_library_build_bazel.bzl", "generate_whl_library_build_bazel") load("//python/pip_install/private:srcs.bzl", "PIP_INSTALL_PY_SRCS") load("//python/private:auth.bzl", "AUTH_ATTRS", "get_auth") -load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED") load("//python/private:envsubst.bzl", "envsubst") load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:parse_whl_name.bzl", "parse_whl_name") @@ -918,71 +917,66 @@ if __name__ == "__main__": return contents # NOTE @aignas 2024-03-21: The usage of dict({}, **common) ensures that all args to `dict` are unique -def whl_library_attrs(): - attrs = dict({ - "annotation": attr.label( - doc = ( - "Optional json encoded file containing annotation to apply to the extracted wheel. " + - "See `package_annotation`" - ), - allow_files = True, +whl_library_attrs = dict({ + "annotation": attr.label( + doc = ( + "Optional json encoded file containing annotation to apply to the extracted wheel. " + + "See `package_annotation`" ), - "filename": attr.string( - doc = "Download the whl file to this filename.", - ), - "group_deps": attr.string_list( - doc = "List of dependencies to skip in order to break the cycles within a dependency group.", - default = [], - ), - "group_name": attr.string( - doc = "Name of the group, if any.", - ), - "repo": attr.string( - mandatory = True, - doc = "Pointer to parent repo name. Used to make these rules rerun if the parent repo changes.", - ), - "requirement": attr.string( - mandatory = True, - doc = "Python requirement string describing the package to make available, if 'urls' or 'whl_file' is given, then this only needs to include foo[any_extras] as a bare minimum.", - ), - "sha256": attr.string( - doc = "The sha256 of the downloaded whl", - ), - "urls": attr.string_list( - doc = "The url of the whl to be downloaded using bazel downloader", - ), - "whl_file": attr.label( - doc = "The whl file that should be used instead of downloading", - ), - "whl_patches": attr.label_keyed_string_dict( - doc = """a label-keyed-string dict that has - json.encode(struct([whl_file], patch_strip]) as values. This - is to maintain flexibility and correct bzlmod extension interface - until we have a better way to define whl_library and move whl - patching to a separate place. INTERNAL USE ONLY.""", - ), - "_python_path_entries": attr.label_list( - # Get the root directory of these rules and keep them as a default attribute - # in order to avoid unnecessary repository fetching restarts. - # - # This is very similar to what was done in https://github.com/bazelbuild/rules_go/pull/3478 - default = [ - Label("//:BUILD.bazel"), - ] + [ - # Includes all the external dependencies from repositories.bzl - Label("@" + repo + "//:BUILD.bazel") - for repo in all_requirements - ], - ), - }, **common_attrs) - - if BZLMOD_ENABLED: - attrs.update(AUTH_ATTRS) - - return attrs + allow_files = True, + ), + "filename": attr.string( + doc = "Download the whl file to this filename.", + ), + "group_deps": attr.string_list( + doc = "List of dependencies to skip in order to break the cycles within a dependency group.", + default = [], + ), + "group_name": attr.string( + doc = "Name of the group, if any.", + ), + "repo": attr.string( + mandatory = True, + doc = "Pointer to parent repo name. Used to make these rules rerun if the parent repo changes.", + ), + "requirement": attr.string( + mandatory = True, + doc = "Python requirement string describing the package to make available, if 'urls' or 'whl_file' is given, then this only needs to include foo[any_extras] as a bare minimum.", + ), + "sha256": attr.string( + doc = "The sha256 of the downloaded whl", + ), + "urls": attr.string_list( + doc = "The url of the whl to be downloaded using bazel downloader", + ), + "whl_file": attr.label( + doc = "The whl file that should be used instead of downloading", + ), + "whl_patches": attr.label_keyed_string_dict( + doc = """a label-keyed-string dict that has + json.encode(struct([whl_file], patch_strip]) as values. This + is to maintain flexibility and correct bzlmod extension interface + until we have a better way to define whl_library and move whl + patching to a separate place. INTERNAL USE ONLY.""", + ), + "_python_path_entries": attr.label_list( + # Get the root directory of these rules and keep them as a default attribute + # in order to avoid unnecessary repository fetching restarts. + # + # This is very similar to what was done in https://github.com/bazelbuild/rules_go/pull/3478 + default = [ + Label("//:BUILD.bazel"), + ] + [ + # Includes all the external dependencies from repositories.bzl + Label("@" + repo + "//:BUILD.bazel") + for repo in all_requirements + ], + ), +}, **common_attrs) +whl_library_attrs.update(AUTH_ATTRS) whl_library = repository_rule( - attrs = whl_library_attrs(), + attrs = whl_library_attrs, doc = """ Download and extracts a single wheel based into a bazel repo based on the requirement string passed in. Instantiated from pip_repository and inherits config options from there.""", From 0b1de6ede32c28378733f108924d2ddce5d09df6 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 13:28:58 +0900 Subject: [PATCH 55/70] comment: improve error message --- python/pip_install/pip_repository.bzl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index 71c49f47f8..177fa0c530 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -776,8 +776,9 @@ def _whl_library_impl(rctx): filename = rctx.attr.filename if not filename: _, _, filename = rctx.attr.urls[0].rpartition("/") - if not (filename.endswith(".whl") or filename.endswith("tar.gz") or filename.endswith("zip")): - fail("'filename' needs to be provided when passing if it is not possible to auto-detect filename extension from URL:" + rctx.attr.urls[0]) + fail("Only '.whl', '.tar.gz' or '.zip' files are supported, detected '{}' from url: {}".format(filename, rctx.attr.urls[0])) + elif not (filename.endswith(".whl") or filename.endswith("tar.gz") or filename.endswith(".zip")): + fail("Only '.whl', '.tar.gz' or '.zip' files are supported, got: {}".format(filename)) result = rctx.download( url = rctx.attr.urls, From 9b0be62b897a0ba080fec677bb0b31b431bf3351 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 13:31:45 +0900 Subject: [PATCH 56/70] comment: improve error message --- python/pip_install/pip_repository.bzl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index 177fa0c530..fbba529f57 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -776,9 +776,13 @@ def _whl_library_impl(rctx): filename = rctx.attr.filename if not filename: _, _, filename = rctx.attr.urls[0].rpartition("/") - fail("Only '.whl', '.tar.gz' or '.zip' files are supported, detected '{}' from url: {}".format(filename, rctx.attr.urls[0])) - elif not (filename.endswith(".whl") or filename.endswith("tar.gz") or filename.endswith(".zip")): - fail("Only '.whl', '.tar.gz' or '.zip' files are supported, got: {}".format(filename)) + + if not (filename.endswith(".whl") or filename.endswith("tar.gz") or filename.endswith(".zip")): + if rctx.attr.filename: + msg = "got '{}'".format(filename) + else: + msg = "detected '{}' from url:\n{}".format(filename, rctx.attr.urls[0]) + fail("Only '.whl', '.tar.gz' or '.zip' files are supported, {}".format(msg)) result = rctx.download( url = rctx.attr.urls, From 8dc3bdcf21c7af644f6e505327ed47282a166831 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 13:34:14 +0900 Subject: [PATCH 57/70] comment: improve error msg --- python/pip_install/pip_repository.bzl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index fbba529f57..8142537d9d 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -774,25 +774,26 @@ def _whl_library_impl(rctx): whl_path = rctx.path(whl_path.basename) elif rctx.attr.urls: filename = rctx.attr.filename + urls = rctx.attr.urls if not filename: - _, _, filename = rctx.attr.urls[0].rpartition("/") + _, _, filename = urls[0].rpartition("/") if not (filename.endswith(".whl") or filename.endswith("tar.gz") or filename.endswith(".zip")): if rctx.attr.filename: msg = "got '{}'".format(filename) else: - msg = "detected '{}' from url:\n{}".format(filename, rctx.attr.urls[0]) + msg = "detected '{}' from url:\n{}".format(filename, urls[0]) fail("Only '.whl', '.tar.gz' or '.zip' files are supported, {}".format(msg)) result = rctx.download( - url = rctx.attr.urls, + url = urls, output = rctx.attr.filename, sha256 = rctx.attr.sha256, - auth = get_auth(rctx, rctx.attr.urls), + auth = get_auth(rctx, urls), ) if not result.success: - fail(result) + fail("could not download the '{}' from {}:\n{}".format(filename, urls, result)) whl_path = rctx.path(rctx.attr.filename) else: From 5b6a444dae392183b97a731f655eff47920b25b7 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 30 Mar 2024 13:40:38 +0900 Subject: [PATCH 58/70] extra docs --- CHANGELOG.md | 2 ++ MODULE.bazel | 2 +- python/pip_install/pip_repository.bzl | 10 ++++++---- python/private/bzlmod/pip.bzl | 6 ++++++ 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3fa5f1fef..f2b6b00295 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,8 @@ A brief description of the categories of changes: * (bzlmod): The `MODULE.bazel.lock` `whl_library` rule attributes are now sorted in the attributes section. We are also removing values that are not default in order to reduce the noise. +* (deps): Bumped bazel_features to 1.9.1 to detect optional support + non-blocking downloads. ### Fixed diff --git a/MODULE.bazel b/MODULE.bazel index ac4c12cfad..f9ebb076b3 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -5,7 +5,7 @@ module( ) bazel_dep(name = "bazel_features", version = "1.9.1") -bazel_dep(name = "bazel_skylib", version = "1.5.0") +bazel_dep(name = "bazel_skylib", version = "1.3.0") bazel_dep(name = "platforms", version = "0.0.4") # Those are loaded only when using py_proto_library diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index 8142537d9d..9df1b33770 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -932,7 +932,7 @@ whl_library_attrs = dict({ allow_files = True, ), "filename": attr.string( - doc = "Download the whl file to this filename.", + doc = "Download the whl file to this filename. Only used when the `urls` is passed. If not specified, will be auto-detected from the `urls`.", ), "group_deps": attr.string_list( doc = "List of dependencies to skip in order to break the cycles within a dependency group.", @@ -950,13 +950,15 @@ whl_library_attrs = dict({ doc = "Python requirement string describing the package to make available, if 'urls' or 'whl_file' is given, then this only needs to include foo[any_extras] as a bare minimum.", ), "sha256": attr.string( - doc = "The sha256 of the downloaded whl", + doc = "The sha256 of the downloaded whl. Only used when the `urls` is passed.", ), "urls": attr.string_list( - doc = "The url of the whl to be downloaded using bazel downloader", + doc = """\ +The list of urls of the whl to be downloaded using bazel downloader. Using this +attr makes `extra_pip_args` and `download_only` ignored.""", ), "whl_file": attr.label( - doc = "The whl file that should be used instead of downloading", + doc = "The whl file that should be used instead of downloading or building the whl.", ), "whl_patches": attr.label_keyed_string_dict( doc = """a label-keyed-string dict that has diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 71dbc17d37..6521d306d8 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -462,6 +462,9 @@ def _pip_parse_ext_attrs(): doc = """\ The index URL to use for downloading wheels using bazel downloader. This value is going to be subject to `envsubst` substitutions if necessary. + +The indexes must support Simple API as described here: +https://packaging.python.org/en/latest/specifications/simple-repository-api/ """, ), "experimental_index_url_overrides": attr.string_dict( @@ -472,6 +475,9 @@ if necessary. The key is the package name (will be normalized before usage) and the value is the index URL. + +The indexes must support Simple API as described here: +https://packaging.python.org/en/latest/specifications/simple-repository-api/ """, ), "hub_name": attr.string( From f438e6e3535fb0b53297ee5fab2c5acdc1d188a0 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 31 Mar 2024 23:06:47 +0900 Subject: [PATCH 59/70] feat: support extra index URLs --- examples/bzlmod/MODULE.bazel | 14 ++-- python/private/bzlmod/pip.bzl | 22 +++++ python/private/pypi_index.bzl | 147 +++++++++++++++++++++++----------- 3 files changed, 131 insertions(+), 52 deletions(-) diff --git a/examples/bzlmod/MODULE.bazel b/examples/bzlmod/MODULE.bazel index 1c9524143e..1134487145 100644 --- a/examples/bzlmod/MODULE.bazel +++ b/examples/bzlmod/MODULE.bazel @@ -99,6 +99,15 @@ pip.parse( # Use the bazel downloader to query the simple API for downloading the sources # Note, that we can use envsubst for this value. experimental_index_url = "${PIP_INDEX_URL:-https://pypi.org/simple}", + # One can also select a particular index for a particular package. + # This ensures that the setup is resistant against confusion attacks. + # experimental_index_url_overrides = { + # "my_package": "https://different-index-url.com", + # }, + # Or you can specify extra indexes like with `pip`: + # experimental_extra_index_urls = [ + # "https://different-index-url.com", + # ], experimental_requirement_cycles = { "sphinx": [ "sphinx", @@ -117,11 +126,6 @@ pip.parse( "cp39_linux_*", "cp39_*", ], - # One can also select a particular index for a particular package. - # This ensures that the setup is resistant against confusion attacks. - # experimental_index_url_overrides = { - # "my_package": "https://different-index-url.com", - # }, hub_name = "pip", python_version = "3.9", requirements_lock = "//:requirements_lock_3_9.txt", diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 6521d306d8..d30ffd3af5 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -187,6 +187,7 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca module_ctx, attr = struct( index_url = pip_attr.experimental_index_url, + extra_index_urls = pip_attr.experimental_extra_index_urls or [], index_url_overrides = pip_attr.experimental_index_url_overrides or {}, sources = [requirements_lock_content], envsubst = pip_attr.envsubst, @@ -458,6 +459,18 @@ def _pip_impl(module_ctx): def _pip_parse_ext_attrs(): attrs = dict({ + "experimental_extra_index_urls": attr.string_list( + doc = """\ +The extra index URLs to use for downloading wheels using bazel downloader. +Each value is going to be subject to `envsubst` substitutions if necessary. + +The indexes must support Simple API as described here: +https://packaging.python.org/en/latest/specifications/simple-repository-api/ + +This is equivalent to `--extra-index-urls` `pip` option. +""", + default = [], + ), "experimental_index_url": attr.string( doc = """\ The index URL to use for downloading wheels using bazel downloader. This value is going @@ -465,6 +478,11 @@ to be subject to `envsubst` substitutions if necessary. The indexes must support Simple API as described here: https://packaging.python.org/en/latest/specifications/simple-repository-api/ + +In the future this could be defaulted to `https://pypi.org` when this feature becomes +stable. + +This is equivalent to `--index-url` `pip` option. """, ), "experimental_index_url_overrides": attr.string_dict( @@ -476,6 +494,10 @@ if necessary. The key is the package name (will be normalized before usage) and the value is the index URL. +This design pattern has been chosen in order to be fully deterministic about which +packages come from which source. We want to avoid issues similar to what happened in +https://pytorch.org/blog/compromised-nightly-dependency/. + The indexes must support Simple API as described here: https://packaging.python.org/en/latest/specifications/simple-repository-api/ """, diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index 1906c5da0f..bbbed39faa 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -32,6 +32,8 @@ def simpleapi_download(ctx, *, attr, cache): * index_url: str, the index. * index_url_overrides: dict[str, str], the index overrides for separate packages. + * extra_index_urls: Extra index URLs that will be looked up after + the main is looked up. * sources: list[str], the sources to download things for. Each value is the contents of requirements files. * envsubst: list[str], the envsubst vars for performing substitution in index url. @@ -59,51 +61,86 @@ def simpleapi_download(ctx, *, attr, cache): if bazel_features.external_deps.download_has_block_param: download_kwargs["block"] = False - # Download in parallel if possible - downloads = {} + # Download in parallel if possible. This will download (potentially + # duplicate) data for multiple packages if there is more than one index + # available, but that is the price of convenience. However, that price + # should be mostly negligible because the simple API calls are very cheap + # and the user should not notice any extra overhead. + # + # If we are in synchronous mode, then we will use the first result that we + # find. + # + # NOTE @aignas 2024-03-31: we are not merging results from multiple indexes + # to replicate how `pip` would handle this case. + async_downloads = {} contents = {} + index_urls = [attr.index_url] + attr.extra_index_urls for pkg in get_packages_from_requirements(attr.sources): - url = "{}/{}/".format( - index_url_overrides.get(pkg, attr.index_url).rstrip("/"), - pkg, - ) - - # FIXME @aignas 2024-03-28: should I envsubt this? - cache_key = url - if cache_key in cache: - contents[pkg] = cache[cache_key] - continue - - downloads[pkg] = struct( - cache_key = cache_key, - packages = read_simple_api( + success = False + for index_url in index_urls: + url = "{}/{}/".format( + index_url_overrides.get(pkg, index_url).rstrip("/"), + pkg, + ) + + result = read_simple_api( ctx = ctx, - url = [url], + url = url, attr = attr, + cache = cache, **download_kwargs - ), - ) + ) + if download_kwargs.get("block") == False: + # We will process it in a separate loop: + async_downloads.setdefault(pkg, []).append(struct(wait = result.wait)) + continue + + if result.success: + contents[pkg] = result.output + success = True + break + + if not async_downloads and not success: + fail("Failed to download metadata about '{}' from urls: {}".format( + pkg, + ", ".join(index_urls), + )) + + if not async_downloads: + return contents # If we use `block` == False, then we need to have a second loop that is # collecting all of the results as they were being downloaded in parallel. - for pkg, download in downloads.items(): - contents[pkg] = download.packages.contents() - cache.setdefault(download.cache_key, contents[pkg]) + for pkg, downloads in async_downloads.items(): + success = False + for download in downloads: + result = download.wait() + + if result.success: + contents[pkg] = result.output + success = True + break + + if not success: + fail("Failed to download metadata about '{}' from urls: {}".format( + pkg, + ", ".join(index_urls), + )) return contents -def read_simple_api(ctx, url, attr, **download_kwargs): +def read_simple_api(ctx, url, attr, cache, **download_kwargs): """Read SimpleAPI. Args: ctx: The module_ctx or repository_ctx. - url: The url parameter that can be passed to ctx.download. + url: str, the url parameter that can be passed to ctx.download. attr: The attribute that contains necessary info for downloading. The following attributes must be present: - * envsubst: The env vars to do env sub before downloading. * netrc: The netrc parameter for ctx.download, see http_file for docs. * auth_patterns: The auth_patterns parameter for ctx.download, see http_file for docs. + cache: A dict for storing the results. **download_kwargs: Any extra params to ctx.download. Note that output and auth will be passed for you. @@ -113,48 +150,64 @@ def read_simple_api(ctx, url, attr, **download_kwargs): """ # TODO: Add a test that env subbed index urls do not leak into the lock file. - if type(url) == type([]) and len(url) > 1: - fail("Only a single url is supported") + # NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for + # the whl location and we cannot handle multiple URLs at once by passing + # them to ctx.download if we want to correctly handle the relative URLs. - url = url if type(url) == type("") else url[0] + real_url = envsubst( + url, + attr.envsubst, + ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get, + ) - output_str = url + cache_key = real_url + if cache_key in cache: + return struct(success = True, output = cache[cache_key]) + + output_str = envsubst( + url, + attr.envsubst, + # Use env names in the subst values - this will be unique over + # the lifetime of the execution of this function and we also use + # `~` as the separator to ensure that we don't get clashes. + {e: "~{}~".format(e) for e in attr.envsubst}.get, + ) # Transform the URL into a valid filename - for char in [".", ":", "/", "\\", "$", "[", "]", "{", "}", "'", "\"", "-"]: + for char in [".", ":", "/", "\\", "-"]: output_str = output_str.replace(char, "_") output = ctx.path(output_str.strip("_").lower() + ".html") - real_url = envsubst( - url, - attr.envsubst, - ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get, - ) - # NOTE: this may have block = True or block = False in the download_kwargs download = ctx.download( url = [real_url], output = output, auth = get_auth(ctx, [real_url], ctx_attr = attr), + allow_fail = True, **download_kwargs ) - return struct( - contents = lambda: _read_index_result( - ctx, - download.wait() if download_kwargs.get("block") == False else download, - output, - url, - ), - ) + if download_kwargs.get("block") == False: + # Simulate the same API as ctx.download has + return struct( + wait = lambda: _read_index_result(ctx, download.wait(), output, url, cache, cache_key), + ) -def _read_index_result(ctx, result, output, url): + return _read_index_result(ctx, download, output, url, cache, cache_key) + +def _read_index_result(ctx, result, output, url, cache, cache_key): if not result.success: - fail("Failed to download from {}: {}".format(url, result)) + return struct(success = False) html = ctx.read(output) - return get_packages(url, html) + + output = get_packages(url, html) + if output: + cache.setdefault(cache_key, output) + return struct(success = True, output = output, cache_key = cache_key) + else: + return struct(success = False) def get_packages_from_requirements(requirements_files): """Get Simple API sources from a list of requirements files and merge them. From 038e45eac4592928dbbcd9a3220e9c15f7847819 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 2 Apr 2024 09:52:19 +0900 Subject: [PATCH 60/70] test: add a test for HTML parsing --- MODULE.bazel | 2 +- python/private/pypi_index.bzl | 64 +++++++------- tests/private/pypi_index/pypi_index_tests.bzl | 85 ++++++++++++++++++- 3 files changed, 119 insertions(+), 32 deletions(-) diff --git a/MODULE.bazel b/MODULE.bazel index f9ebb076b3..ae4686140c 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -69,7 +69,7 @@ use_repo(pip, "rules_python_publish_deps") # ===== DEV ONLY DEPS AND SETUP BELOW HERE ===== bazel_dep(name = "stardoc", version = "0.6.2", dev_dependency = True, repo_name = "io_bazel_stardoc") bazel_dep(name = "rules_bazel_integration_test", version = "0.20.0", dev_dependency = True) -bazel_dep(name = "rules_testing", version = "0.5.0", dev_dependency = True) +bazel_dep(name = "rules_testing", version = "0.6.0", dev_dependency = True) bazel_dep(name = "rules_cc", version = "0.0.9", dev_dependency = True) # Extra gazelle plugin deps so that WORKSPACE.bzlmod can continue including it for e2e tests. diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index bbbed39faa..ea9a0974f8 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -17,6 +17,7 @@ A file that houses private functions used in the `bzlmod` extension with the sam """ load("@bazel_features//:features.bzl", "bazel_features") +load("@bazel_skylib//lib:sets.bzl", "sets") load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") load(":auth.bzl", "get_auth") load(":envsubst.bzl", "envsubst") @@ -76,33 +77,37 @@ def simpleapi_download(ctx, *, attr, cache): contents = {} index_urls = [attr.index_url] + attr.extra_index_urls for pkg in get_packages_from_requirements(attr.sources): + pkg_normalized = normalize_name(pkg) + success = False for index_url in index_urls: - url = "{}/{}/".format( - index_url_overrides.get(pkg, index_url).rstrip("/"), - pkg, - ) - result = read_simple_api( ctx = ctx, - url = url, + url = "{}/{}/".format( + index_url_overrides.get(pkg_normalized, index_url).rstrip("/"), + pkg, + ), attr = attr, cache = cache, **download_kwargs ) if download_kwargs.get("block") == False: # We will process it in a separate loop: - async_downloads.setdefault(pkg, []).append(struct(wait = result.wait)) + async_downloads.setdefault(pkg_normalized, []).append( + struct( + pkg_normalized = pkg_normalized, + wait = result.wait, + ), + ) continue if result.success: - contents[pkg] = result.output + contents[pkg_normalized] = result.output success = True break if not async_downloads and not success: - fail("Failed to download metadata about '{}' from urls: {}".format( - pkg, + fail("Failed to download metadata from urls: {}".format( ", ".join(index_urls), )) @@ -116,14 +121,12 @@ def simpleapi_download(ctx, *, attr, cache): for download in downloads: result = download.wait() - if result.success: - contents[pkg] = result.output + if result.success and download.pkg_normalized not in contents: + contents[download.pkg_normalized] = result.output success = True - break if not success: - fail("Failed to download metadata about '{}' from urls: {}".format( - pkg, + fail("Failed to download metadata from urls: {}".format( ", ".join(index_urls), )) @@ -137,6 +140,7 @@ def read_simple_api(ctx, url, attr, cache, **download_kwargs): url: str, the url parameter that can be passed to ctx.download. attr: The attribute that contains necessary info for downloading. The following attributes must be present: + * envsubst: The envsubst values for performing substitutions in the URL. * netrc: The netrc parameter for ctx.download, see http_file for docs. * auth_patterns: The auth_patterns parameter for ctx.download, see http_file for docs. @@ -148,11 +152,10 @@ def read_simple_api(ctx, url, attr, cache, **download_kwargs): A similar object to what `download` would return except that in result.out will be the parsed simple api contents. """ - # TODO: Add a test that env subbed index urls do not leak into the lock file. - # NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for # the whl location and we cannot handle multiple URLs at once by passing # them to ctx.download if we want to correctly handle the relative URLs. + # TODO: Add a test that env subbed index urls do not leak into the lock file. real_url = envsubst( url, @@ -200,9 +203,9 @@ def _read_index_result(ctx, result, output, url, cache, cache_key): if not result.success: return struct(success = False) - html = ctx.read(output) + content = ctx.read(output) - output = get_packages(url, html) + output = parse_simple_api_html(url = url, content = content) if output: cache.setdefault(cache_key, output) return struct(success = True, output = output, cache_key = cache_key) @@ -216,17 +219,18 @@ def get_packages_from_requirements(requirements_files): requirements_files(list[str]): A list of requirements files contents. Returns: - A struct with `simpleapi` attribute that contains a dict of normalized package - name to a list of shas that we should index. + A list. """ - want_packages = {} + want_packages = sets.make() for contents in requirements_files: parse_result = parse_requirements(contents) for distribution, _ in parse_result.requirements: - distribution = normalize_name(distribution) - want_packages[distribution] = None + # NOTE: we'll be querying the PyPI servers multiple times if the + # requirements contains non-normalized names, but this is what user + # is specifying to us. + sets.insert(want_packages, distribution) - return want_packages + return sets.to_list(want_packages) def get_simpleapi_sources(line): """Get PyPI sources from a requirements.txt line. @@ -264,11 +268,11 @@ def get_simpleapi_sources(line): shas = sorted(shas), ) -def get_packages(index_urls, content): +def parse_simple_api_html(*, url, content): """Get the package URLs for given shas by parsing the Simple API HTML. Args: - index_urls(list[str]): The URLs that the HTML content can be downloaded from. + url(str): The URL that the HTML content can be downloaded from. content(str): The Simple API HTML content. Returns: @@ -297,7 +301,7 @@ def get_packages(index_urls, content): fail("Unsupported API version: {}".format(api_version)) for line in lines[1:]: - url, _, tail = line.partition("#sha256=") + dist_url, _, tail = line.partition("#sha256=") sha256, _, tail = tail.partition("\"") # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api @@ -311,7 +315,7 @@ def get_packages(index_urls, content): # Implement https://peps.python.org/pep-0714/ _, _, tail = maybe_metadata.partition(metadata_marker) metadata_sha256, _, _ = tail.partition("\"") - metadata_url = url + ".metadata" + metadata_url = dist_url + ".metadata" else: metadata_sha256 = "" metadata_url = "" @@ -319,7 +323,7 @@ def get_packages(index_urls, content): packages.append( struct( filename = filename, - url = _absolute_urls(index_urls[0], url), + url = _absolute_urls(url, dist_url), sha256 = sha256, metadata_sha256 = metadata_sha256, metadata_url = metadata_url, diff --git a/tests/private/pypi_index/pypi_index_tests.bzl b/tests/private/pypi_index/pypi_index_tests.bzl index 4320c7c2c5..27320577e7 100644 --- a/tests/private/pypi_index/pypi_index_tests.bzl +++ b/tests/private/pypi_index/pypi_index_tests.bzl @@ -15,7 +15,8 @@ "" load("@rules_testing//lib:test_suite.bzl", "test_suite") -load("//python/private:pypi_index.bzl", "get_simpleapi_sources") # buildifier: disable=bzl-visibility +load("@rules_testing//lib:truth.bzl", "subjects") +load("//python/private:pypi_index.bzl", "get_simpleapi_sources", "parse_simple_api_html") # buildifier: disable=bzl-visibility _tests = [] @@ -51,6 +52,88 @@ def _test_simple_api_sources(env): _tests.append(_test_simple_api_sources) +def _generate_html(*items): + return """\ + + + + Links for foo + + +

Links for cengal

+{} + + +""".format( + "\n".join([ + "
{}
".format( + " ".join(["{}=\"{}\"".format(key, value) for key, value in item.attrs.items()]), + item.filename, + ) + for item in items + ]), + ) + +def _test_parse_simple_api_html(env): + tests = [ + ( + struct( + attrs = { + "data-requires-python": ">=3.7", + "href": "https://example.org/full-url/foo-0.0.1.tar.gz#sha256=deadbeefasource", + }, + filename = "foo-0.0.1.tar.gz", + ), + struct( + filename = "foo-0.0.1.tar.gz", + metadata_sha256 = "", + metadata_url = "", + sha256 = "deadbeefasource", + url = "https://example.org/full-url/foo-0.0.1.tar.gz", + yanked = False, + ), + ), + ] + + for (input, want) in tests: + html = _generate_html(input) + got = parse_simple_api_html(url = "ignored", content = html) + env.expect.that_collection(got).has_size(1) + actual = env.expect.that_struct( + got[0], + attrs = dict( + filename = subjects.str, + metadata_sha256 = subjects.str, + metadata_url = subjects.str, + sha256 = subjects.str, + url = subjects.str, + yanked = subjects.bool, + ), + ) + actual.filename().equals(want.filename) + actual.metadata_sha256().equals(want.metadata_sha256) + actual.metadata_url().equals(want.metadata_url) + actual.sha256().equals(want.sha256) + actual.url().equals(want.url) + actual.yanked().equals(want.yanked) + +_tests.append(_test_parse_simple_api_html) + +# foo-0.0.1.tar.gz
+# cengal-3.2.5.tar.gz
+# cengal-3.2.5.tar.gz
+# cengal-3.2.6.tar.gz
+# cengal-3.3.0.tar.gz
+# cengal-3.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
+# cengal-3.4.0-cp310-cp310-musllinux_1_1_x86_64.whl
+# cengal-3.4.0-cp310-cp310-win_amd64.whl
+# cengal-3.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
+# cengal-3.4.0-cp311-cp311-win_amd64.whl
+# cengal-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
+# cengal-3.4.0-cp312-cp312-win_amd64.whl
+# cengal-3.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
+# cengal-3.4.0-cp38-cp38-musllinux_1_1_x86_64.whl
+ def pypi_index_test_suite(name): """Create the test suite. From 04953d4212117b7270e10648db59f6f447b90371 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 2 Apr 2024 09:57:40 +0900 Subject: [PATCH 61/70] test: add a test for HTML parsing --- tests/private/pypi_index/pypi_index_tests.bzl | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/private/pypi_index/pypi_index_tests.bzl b/tests/private/pypi_index/pypi_index_tests.bzl index 27320577e7..becdecd7e6 100644 --- a/tests/private/pypi_index/pypi_index_tests.bzl +++ b/tests/private/pypi_index/pypi_index_tests.bzl @@ -75,6 +75,7 @@ def _generate_html(*items): ) def _test_parse_simple_api_html(env): + # buildifier: disable=unsorted-dict-items tests = [ ( struct( @@ -93,6 +94,25 @@ def _test_parse_simple_api_html(env): yanked = False, ), ), + ( + struct( + attrs = { + "href": "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=deadbeef", + "data-requires-python": ">=3.7", + "data-dist-info-metadata": "sha256=deadb00f", + "data-core-metadata": "sha256=deadb00f", + }, + filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + ), + struct( + filename = "foo-0.0.1.tar.gz", + metadata_sha256 = "", + metadata_url = "", + sha256 = "deadbeefasource", + url = "https://example.org/full-url/foo-0.0.1.tar.gz", + yanked = False, + ), + ), ] for (input, want) in tests: @@ -119,7 +139,6 @@ def _test_parse_simple_api_html(env): _tests.append(_test_parse_simple_api_html) -# foo-0.0.1.tar.gz
# cengal-3.2.5.tar.gz
# cengal-3.2.5.tar.gz
# cengal-3.2.6.tar.gz
From 02ccc55b9cb93eb5957fb3e3c077fa6ea8d93012 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 2 Apr 2024 12:40:19 +0900 Subject: [PATCH 62/70] finish adding the first test --- tests/private/pypi_index/pypi_index_tests.bzl | 49 +++++++------------ 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/tests/private/pypi_index/pypi_index_tests.bzl b/tests/private/pypi_index/pypi_index_tests.bzl index becdecd7e6..4a99eea6b1 100644 --- a/tests/private/pypi_index/pypi_index_tests.bzl +++ b/tests/private/pypi_index/pypi_index_tests.bzl @@ -67,7 +67,7 @@ def _generate_html(*items): """.format( "\n".join([ "{}
".format( - " ".join(["{}=\"{}\"".format(key, value) for key, value in item.attrs.items()]), + " ".join(item.attrs), item.filename, ) for item in items @@ -79,10 +79,10 @@ def _test_parse_simple_api_html(env): tests = [ ( struct( - attrs = { - "data-requires-python": ">=3.7", - "href": "https://example.org/full-url/foo-0.0.1.tar.gz#sha256=deadbeefasource", - }, + attrs = [ + 'href="https://example.org/full-url/foo-0.0.1.tar.gz#sha256=deadbeefasource"', + 'data-requires-python=">=3.7"', + ], filename = "foo-0.0.1.tar.gz", ), struct( @@ -96,20 +96,20 @@ def _test_parse_simple_api_html(env): ), ( struct( - attrs = { - "href": "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=deadbeef", - "data-requires-python": ">=3.7", - "data-dist-info-metadata": "sha256=deadb00f", - "data-core-metadata": "sha256=deadb00f", - }, + attrs = [ + 'href="https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=deadbeef"', + 'data-requires-python=">=3.7"', + 'data-dist-info-metadata="sha256=deadb00f"', + 'data-core-metadata="sha256=deadb00f"', + ], filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", ), struct( - filename = "foo-0.0.1.tar.gz", - metadata_sha256 = "", - metadata_url = "", - sha256 = "deadbeefasource", - url = "https://example.org/full-url/foo-0.0.1.tar.gz", + filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + metadata_sha256 = "deadb00f", + metadata_url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata", + sha256 = "deadbeef", + url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", yanked = False, ), ), @@ -119,6 +119,9 @@ def _test_parse_simple_api_html(env): html = _generate_html(input) got = parse_simple_api_html(url = "ignored", content = html) env.expect.that_collection(got).has_size(1) + if not got: + fail("expected at least one element, but did not get anything from:\n{}".format(html)) + actual = env.expect.that_struct( got[0], attrs = dict( @@ -139,20 +142,6 @@ def _test_parse_simple_api_html(env): _tests.append(_test_parse_simple_api_html) -# cengal-3.2.5.tar.gz
-# cengal-3.2.5.tar.gz
-# cengal-3.2.6.tar.gz
-# cengal-3.3.0.tar.gz
-# cengal-3.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-# cengal-3.4.0-cp310-cp310-musllinux_1_1_x86_64.whl
-# cengal-3.4.0-cp310-cp310-win_amd64.whl
-# cengal-3.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-# cengal-3.4.0-cp311-cp311-win_amd64.whl
-# cengal-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-# cengal-3.4.0-cp312-cp312-win_amd64.whl
-# cengal-3.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-# cengal-3.4.0-cp38-cp38-musllinux_1_1_x86_64.whl
- def pypi_index_test_suite(name): """Create the test suite. From b4a9aa8809dd766afcc51eeb66a4b3f616ab5f04 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Tue, 2 Apr 2024 14:47:10 +0900 Subject: [PATCH 63/70] test: more tests for the pypi html parsing --- python/private/pypi_index.bzl | 25 +++--- tests/private/pypi_index/pypi_index_tests.bzl | 79 ++++++++++++++++++- 2 files changed, 91 insertions(+), 13 deletions(-) diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index ea9a0974f8..8d6fa7d70c 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -310,30 +310,31 @@ def parse_simple_api_html(*, url, content): maybe_metadata, _, tail = tail.partition(">") filename, _, tail = tail.partition("<") - metadata_marker = "data-core-metadata=\"sha256=" - if metadata_marker in maybe_metadata: - # Implement https://peps.python.org/pep-0714/ - _, _, tail = maybe_metadata.partition(metadata_marker) - metadata_sha256, _, _ = tail.partition("\"") - metadata_url = dist_url + ".metadata" - else: - metadata_sha256 = "" - metadata_url = "" + metadata_sha256 = "" + metadata_url = "" + for metadata_marker in ["data-core-metadata", "data-dist-info-metadata"]: + metadata_marker = metadata_marker + "=\"sha256=" + if metadata_marker in maybe_metadata: + # Implement https://peps.python.org/pep-0714/ + _, _, tail = maybe_metadata.partition(metadata_marker) + metadata_sha256, _, _ = tail.partition("\"") + metadata_url = dist_url + ".metadata" + break packages.append( struct( filename = filename, - url = _absolute_urls(url, dist_url), + url = _absolute_url(url, dist_url), sha256 = sha256, metadata_sha256 = metadata_sha256, - metadata_url = metadata_url, + metadata_url = _absolute_url(url, metadata_url), yanked = yanked, ), ) return packages -def _absolute_urls(index_url, candidate): +def _absolute_url(index_url, candidate): if not candidate.startswith(".."): return candidate diff --git a/tests/private/pypi_index/pypi_index_tests.bzl b/tests/private/pypi_index/pypi_index_tests.bzl index 4a99eea6b1..4f9a7e4ffb 100644 --- a/tests/private/pypi_index/pypi_index_tests.bzl +++ b/tests/private/pypi_index/pypi_index_tests.bzl @@ -84,6 +84,7 @@ def _test_parse_simple_api_html(env): 'data-requires-python=">=3.7"', ], filename = "foo-0.0.1.tar.gz", + url = "ignored", ), struct( filename = "foo-0.0.1.tar.gz", @@ -103,6 +104,7 @@ def _test_parse_simple_api_html(env): 'data-core-metadata="sha256=deadb00f"', ], filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + url = "ignored", ), struct( filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", @@ -113,11 +115,86 @@ def _test_parse_simple_api_html(env): yanked = False, ), ), + ( + struct( + attrs = [ + 'href="https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=deadbeef"', + 'data-requires-python=">=3.7"', + 'data-core-metadata="sha256=deadb00f"', + ], + filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + url = "ignored", + ), + struct( + filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + metadata_sha256 = "deadb00f", + metadata_url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata", + sha256 = "deadbeef", + url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + yanked = False, + ), + ), + ( + struct( + attrs = [ + 'href="https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=deadbeef"', + 'data-requires-python=">=3.7"', + 'data-dist-info-metadata="sha256=deadb00f"', + ], + filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + url = "ignored", + ), + struct( + filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + metadata_sha256 = "deadb00f", + metadata_url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata", + sha256 = "deadbeef", + url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + yanked = False, + ), + ), + ( + struct( + attrs = [ + 'href="https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=deadbeef"', + 'data-requires-python=">=3.7"', + ], + filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + url = "ignored", + ), + struct( + filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + metadata_sha256 = "", + metadata_url = "", + sha256 = "deadbeef", + url = "https://example.org/full-url/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + yanked = False, + ), + ), + ( + struct( + attrs = [ + 'href="../../foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=deadbeef"', + 'data-requires-python=">=3.7"', + 'data-dist-info-metadata="sha256=deadb00f"', + ], + filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + url = "https://example.org/python-wheels/bar/foo/", + ), + struct( + filename = "foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + metadata_sha256 = "deadb00f", + metadata_url = "https://example.org/python-wheels/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata", + sha256 = "deadbeef", + url = "https://example.org/python-wheels/foo-0.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + yanked = False, + ), + ), ] for (input, want) in tests: html = _generate_html(input) - got = parse_simple_api_html(url = "ignored", content = html) + got = parse_simple_api_html(url = input.url, content = html) env.expect.that_collection(got).has_size(1) if not got: fail("expected at least one element, but did not get anything from:\n{}".format(html)) From d62003759187709617811778f76d1940dbffea62 Mon Sep 17 00:00:00 2001 From: aignas <240938+aignas@users.noreply.github.com> Date: Tue, 2 Apr 2024 14:47:37 +0900 Subject: [PATCH 64/70] doc: changelog --- CHANGELOG.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2b6b00295..50f535de2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,9 +52,10 @@ A brief description of the categories of changes: ([#1777](https://github.com/bazelbuild/rules_python/issues/1777)) * (py_wheel) `bzlmod` installations now provide a `twine` setup for the default Python toolchain in `rules_python` for version 3.11. -* (bzlmod) New `experimental_index_url` and `experimental_index_url_overrides` to - `pip.parse` for using the bazel downloader. This is currently only working for - `whl-only` setups and may contain bugs. If you see any issues, report in +* (bzlmod) New `experimental_index_url`, `experimental_extra_index_urls` and + `experimental_index_url_overrides` to `pip.parse` for using the bazel + downloader. This is currently only working for `whl-only` setups and may + contain bugs. If you see any issues, report in [#1357](https://github.com/bazelbuild/rules_python/issues/1357). The URLs for the whl files will be written to the lock file. From f30a5dfd313abdb91f5186d11e0737867c3e86ba Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 5 Apr 2024 09:00:07 +0900 Subject: [PATCH 65/70] feat: support sdists using bazel downloader --- CHANGELOG.md | 7 ++-- MODULE.bazel | 1 + python/pip_install/pip_repository.bzl | 22 +++++++---- python/private/bzlmod/pip.bzl | 55 +++++++++++++++++---------- python/private/pypi_index.bzl | 26 +++++++++---- 5 files changed, 72 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50f535de2f..11728a1ef2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,7 @@ A brief description of the categories of changes: * (bzlmod): The `MODULE.bazel.lock` `whl_library` rule attributes are now sorted in the attributes section. We are also removing values that are not - default in order to reduce the noise. + default in order to reduce the size of the lock file. * (deps): Bumped bazel_features to 1.9.1 to detect optional support non-blocking downloads. @@ -54,10 +54,9 @@ A brief description of the categories of changes: Python toolchain in `rules_python` for version 3.11. * (bzlmod) New `experimental_index_url`, `experimental_extra_index_urls` and `experimental_index_url_overrides` to `pip.parse` for using the bazel - downloader. This is currently only working for `whl-only` setups and may - contain bugs. If you see any issues, report in + downloader. If you see any issues, report in [#1357](https://github.com/bazelbuild/rules_python/issues/1357). The URLs for - the whl files will be written to the lock file. + the whl and sdist files will be written to the lock file. [0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0 [python_default_visibility]: gazelle/README.md#directive-python_default_visibility diff --git a/MODULE.bazel b/MODULE.bazel index ae4686140c..fc32a3e51f 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -58,6 +58,7 @@ register_toolchains("@pythons_hub//:all") pip = use_extension("//python/extensions:pip.bzl", "pip") pip.parse( + experimental_index_url = "https://pypi.org/simple", hub_name = "rules_python_publish_deps", python_version = "3.11", requirements_darwin = "//tools/publish:requirements_darwin.txt", diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index 9df1b33770..8ddaffa2b6 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -188,7 +188,7 @@ def use_isolated(ctx, attr): return use_isolated -def _parse_optional_attrs(rctx, args): +def _parse_optional_attrs(rctx, args, extra_pip_args = None): """Helper function to parse common attributes of pip_repository and whl_library repository rules. This function also serializes the structured arguments as JSON @@ -197,6 +197,7 @@ def _parse_optional_attrs(rctx, args): Args: rctx: Handle to the rule repository context. args: A list of parsed args for the rule. + extra_pip_args: The pip args to pass. Returns: Augmented args list. """ @@ -213,7 +214,7 @@ def _parse_optional_attrs(rctx, args): # Check for None so we use empty default types from our attrs. # Some args want to be list, and some want to be dict. - if rctx.attr.extra_pip_args != None: + if extra_pip_args != None: args += [ "--extra_pip_args", json.encode(struct(arg = [ @@ -760,12 +761,13 @@ def _whl_library_impl(rctx): "--requirement", rctx.attr.requirement, ] - - args = _parse_optional_attrs(rctx, args) + extra_pip_args = [] + extra_pip_args.extend(rctx.attr.extra_pip_args) # Manually construct the PYTHONPATH since we cannot use the toolchain here environment = _create_repository_execution_environment(rctx, python_interpreter) + whl_path = None if rctx.attr.whl_file: whl_path = rctx.path(rctx.attr.whl_file) @@ -787,7 +789,7 @@ def _whl_library_impl(rctx): result = rctx.download( url = urls, - output = rctx.attr.filename, + output = filename, sha256 = rctx.attr.sha256, auth = get_auth(rctx, urls), ) @@ -795,8 +797,14 @@ def _whl_library_impl(rctx): if not result.success: fail("could not download the '{}' from {}:\n{}".format(filename, urls, result)) - whl_path = rctx.path(rctx.attr.filename) - else: + if filename.endswith(".whl"): + whl_path = rctx.path(rctx.attr.filename) + else: + extra_pip_args.extend(["--no-index", "--find-links", "."]) + + args = _parse_optional_attrs(rctx, args, extra_pip_args) + + if not whl_path: repo_utils.execute_checked( rctx, op = "whl_library.ResolveRequirement({}, {})".format(rctx.attr.name, rctx.attr.requirement), diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index d30ffd3af5..ab7d7fe6cd 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -218,9 +218,6 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca repo = pip_name, repo_prefix = pip_name + "_", requirement = requirement_line, - isolated = use_isolated(module_ctx, pip_attr), - quiet = pip_attr.quiet, - timeout = pip_attr.timeout, ) maybe_args = dict( # The following values are safe to omit if they have false like values @@ -242,19 +239,38 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca }, ) whl_library_args.update({k: v for k, v in maybe_args.items() if v}) + maybe_args_with_default = dict( + # The following values have defaults next to them + isolated = (use_isolated(module_ctx, pip_attr), True), + quiet = (pip_attr.quiet, True), + timeout = (pip_attr.timeout, 600), + ) + whl_library_args.update({k: v for k, (v, default) in maybe_args_with_default.items() if v == default}) if index_urls: srcs = get_simpleapi_sources(requirement_line) - whl = select_whl( - whls = [ - src - for src in index_urls[whl_name] - # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api - # - # For now we just exclude such artifacts. - if src.sha256 in srcs.shas and src.filename.endswith(".whl") and not src.yanked - ], + whls = [] + sdist = None + for sha256 in srcs.shas: + # For now if the artifact is marked as yanked we just ignore it. + # + # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api + + maybe_whl = index_urls[whl_name].whls.get(sha256) + if maybe_whl and not maybe_whl.yanked: + whls.append(maybe_whl) + continue + + maybe_sdist = index_urls[whl_name].sdists.get(sha256) + if maybe_sdist and not maybe_sdist.yanked: + sdist = maybe_sdist + continue + + print("WARNING: Could not find a whl or an sdist with sha256={}".format(sha256)) # buildifier: disable=print + + distribution = select_whl( + whls = whls, want_abis = [ "none", "abi3", @@ -264,13 +280,13 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca ], want_os = module_ctx.os.name, want_cpu = module_ctx.os.arch, - ) + ) or sdist - if whl: + if distribution: whl_library_args["requirement"] = srcs.requirement - whl_library_args["urls"] = [whl.url] - whl_library_args["sha256"] = whl.sha256 - whl_library_args["filename"] = whl.filename + whl_library_args["urls"] = [distribution.url] + whl_library_args["sha256"] = distribution.sha256 + whl_library_args["filename"] = distribution.filename if pip_attr.netrc: whl_library_args["netrc"] = pip_attr.netrc if pip_attr.auth_patterns: @@ -282,10 +298,7 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca # This is no-op because pip is not used to download the wheel. whl_library_args.pop("download_only", None) else: - # TODO @aignas 2024-03-29: in the future we should probably just - # use an `sdist` but having this makes it easy to debug issues - # in early development stages. - fail("Could not find whl for: {}".format(requirement_line)) + print("WARNING: falling back to pip for installing the right file for {}".format(requirement_line)) # buildifier: disable=print # We sort so that the lock-file remains the same no matter the order of how the # args are manipulated in the code going before. Maybe this will not be needed diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index 8d6fa7d70c..e716831d5a 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -91,7 +91,7 @@ def simpleapi_download(ctx, *, attr, cache): cache = cache, **download_kwargs ) - if download_kwargs.get("block") == False: + if hasattr(result, "wait"): # We will process it in a separate loop: async_downloads.setdefault(pkg_normalized, []).append( struct( @@ -284,7 +284,8 @@ def parse_simple_api_html(*, url, content): present, then the 'metadata_url' is also present. Defaults to "". * metadata_url: The URL for the METADATA if we can download it. Defaults to "". """ - packages = [] + sdists = {} + whls = {} lines = content.split(" Date: Fri, 5 Apr 2024 09:04:57 +0900 Subject: [PATCH 66/70] test: fix tests --- tests/private/pypi_index/pypi_index_tests.bzl | 36 ++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/tests/private/pypi_index/pypi_index_tests.bzl b/tests/private/pypi_index/pypi_index_tests.bzl index 4f9a7e4ffb..d62295c226 100644 --- a/tests/private/pypi_index/pypi_index_tests.bzl +++ b/tests/private/pypi_index/pypi_index_tests.bzl @@ -88,13 +88,39 @@ def _test_parse_simple_api_html(env): ), struct( filename = "foo-0.0.1.tar.gz", - metadata_sha256 = "", - metadata_url = "", sha256 = "deadbeefasource", url = "https://example.org/full-url/foo-0.0.1.tar.gz", yanked = False, ), ), + ] + + for (input, want) in tests: + html = _generate_html(input) + got = parse_simple_api_html(url = input.url, content = html) + env.expect.that_collection(got.sdists).has_size(1) + if not got: + fail("expected at least one element, but did not get anything from:\n{}".format(html)) + + actual = env.expect.that_struct( + got.sdists[want.sha256], + attrs = dict( + filename = subjects.str, + sha256 = subjects.str, + url = subjects.str, + yanked = subjects.bool, + ), + ) + actual.filename().equals(want.filename) + actual.sha256().equals(want.sha256) + actual.url().equals(want.url) + actual.yanked().equals(want.yanked) + +_tests.append(_test_parse_simple_api_html) + +def _test_parse_simple_api_html_whls(env): + # buildifier: disable=unsorted-dict-items + tests = [ ( struct( attrs = [ @@ -195,12 +221,12 @@ def _test_parse_simple_api_html(env): for (input, want) in tests: html = _generate_html(input) got = parse_simple_api_html(url = input.url, content = html) - env.expect.that_collection(got).has_size(1) + env.expect.that_collection(got.whls).has_size(1) if not got: fail("expected at least one element, but did not get anything from:\n{}".format(html)) actual = env.expect.that_struct( - got[0], + got.whls[want.sha256], attrs = dict( filename = subjects.str, metadata_sha256 = subjects.str, @@ -217,7 +243,7 @@ def _test_parse_simple_api_html(env): actual.url().equals(want.url) actual.yanked().equals(want.yanked) -_tests.append(_test_parse_simple_api_html) +_tests.append(_test_parse_simple_api_html_whls) def pypi_index_test_suite(name): """Create the test suite. From ed4e8ab3597dc02f20e661557ff615f754d8ccbf Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 5 Apr 2024 09:05:52 +0900 Subject: [PATCH 67/70] further fixups --- tests/private/pypi_index/pypi_index_tests.bzl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/private/pypi_index/pypi_index_tests.bzl b/tests/private/pypi_index/pypi_index_tests.bzl index d62295c226..e2122b5eeb 100644 --- a/tests/private/pypi_index/pypi_index_tests.bzl +++ b/tests/private/pypi_index/pypi_index_tests.bzl @@ -99,6 +99,7 @@ def _test_parse_simple_api_html(env): html = _generate_html(input) got = parse_simple_api_html(url = input.url, content = html) env.expect.that_collection(got.sdists).has_size(1) + env.expect.that_collection(got.whls).has_size(0) if not got: fail("expected at least one element, but did not get anything from:\n{}".format(html)) @@ -221,6 +222,7 @@ def _test_parse_simple_api_html_whls(env): for (input, want) in tests: html = _generate_html(input) got = parse_simple_api_html(url = input.url, content = html) + env.expect.that_collection(got.sdists).has_size(0) env.expect.that_collection(got.whls).has_size(1) if not got: fail("expected at least one element, but did not get anything from:\n{}".format(html)) From 56438668ed03bd8ba74fa9ee969255f9f93772a4 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 5 Apr 2024 11:51:34 +0900 Subject: [PATCH 68/70] Do not fail on unknown platform tags and just print a warning --- python/private/whl_target_platforms.bzl | 21 +++++-- .../whl_target_platforms_tests.bzl | 56 ++++++++++++++++++- 2 files changed, 70 insertions(+), 7 deletions(-) diff --git a/python/private/whl_target_platforms.bzl b/python/private/whl_target_platforms.bzl index c7693649a6..4e17f2b4c7 100644 --- a/python/private/whl_target_platforms.bzl +++ b/python/private/whl_target_platforms.bzl @@ -78,8 +78,11 @@ _CPU_ALIASES = { "aarch64": "aarch64", "arm64": "aarch64", "ppc": "ppc", + "ppc64": "ppc", "ppc64le": "ppc", "s390x": "s390x", + "armv6l": "arm", + "armv7l": "arm", } # buildifier: disable=unsorted-dict-items _OS_PREFIXES = { @@ -249,7 +252,8 @@ def whl_target_platforms(platform_tag, abi_tag = ""): for cpu in cpus ] - fail("unknown platform_tag os: {}".format(platform_tag)) + print("WARNING: ignoring unknown platform_tag os: {}".format(platform_tag)) # buildifier: disable=print + return [] def _cpu_from_tag(tag): candidate = [ @@ -262,7 +266,14 @@ def _cpu_from_tag(tag): if tag == "win32": return ["x86_32"] - elif tag.endswith("universal2") and tag.startswith("macosx"): - return ["x86_64", "aarch64"] - else: - fail("Unrecognized tag: '{}': cannot determine CPU".format(tag)) + elif tag == "win_ia64": + return [] + elif tag.startswith("macosx"): + if tag.endswith("universal2"): + return ["x86_64", "aarch64"] + elif tag.endswith("universal"): + return ["x86_64", "aarch64"] + elif tag.endswith("intel"): + return ["x86_32"] + + return [] diff --git a/tests/private/whl_target_platforms/whl_target_platforms_tests.bzl b/tests/private/whl_target_platforms/whl_target_platforms_tests.bzl index f52437fd3c..a06147b946 100644 --- a/tests/private/whl_target_platforms/whl_target_platforms_tests.bzl +++ b/tests/private/whl_target_platforms/whl_target_platforms_tests.bzl @@ -72,10 +72,62 @@ def _test_with_abi(env): _tests.append(_test_with_abi) +def _can_parse_existing_tags(env): + examples = { + "linux_armv6l": 1, + "linux_armv7l": 1, + "macosx_11_12_arm64": 1, + "macosx_11_12_i386": 1, + "macosx_11_12_intel": 1, + "macosx_11_12_universal": 2, + "macosx_11_12_universal2": 2, + "macosx_11_12_x86_64": 1, + "manylinux1_i686": 1, + "manylinux1_x86_64": 1, + "manylinux2010_i686": 1, + "manylinux2010_x86_64": 1, + "manylinux2014_aarch64": 1, + "manylinux2014_armv7l": 1, + "manylinux2014_i686": 1, + "manylinux2014_ppc64": 1, + "manylinux2014_ppc64le": 1, + "manylinux2014_s390x": 1, + "manylinux2014_x86_64": 1, + "manylinux_11_12_aarch64": 1, + "manylinux_11_12_armv7l": 1, + "manylinux_11_12_i686": 1, + "manylinux_11_12_ppc64": 1, + "manylinux_11_12_ppc64le": 1, + "manylinux_11_12_s390x": 1, + "manylinux_11_12_x86_64": 1, + "manylinux_1_2_aarch64": 1, + "manylinux_1_2_x86_64": 1, + "musllinux_11_12_aarch64": 1, + "musllinux_11_12_armv7l": 1, + "musllinux_11_12_i686": 1, + "musllinux_11_12_ppc64le": 1, + "musllinux_11_12_s390x": 1, + "musllinux_11_12_x86_64": 1, + "win32": 1, + "win_amd64": 1, + "win_arm64": 1, + "win_ia64": 0, + } + + for major_version in [2, 10, 13]: + for minor_version in [0, 1, 2, 10, 45]: + for give, want_size in examples.items(): + give = give.replace("_11_", "_{}_".format(major_version)) + give = give.replace("_12_", "_{}_".format(minor_version)) + got = whl_target_platforms(give) + env.expect.that_str("{}: {}".format(give, len(got))).equals("{}: {}".format(give, want_size)) + +_tests.append(_can_parse_existing_tags) + def whl_target_platforms_test_suite(name): - """Create the test suite. + """create the test suite. - Args: + args: name: the name of the test suite """ test_suite(name = name, basic_tests = _tests) From eb67002baad77af31278f1dbc2665f4cf4f9f328 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 5 Apr 2024 16:57:56 +0900 Subject: [PATCH 69/70] Add comments --- python/pip_install/pip_repository.bzl | 2 ++ python/private/bzlmod/pip.bzl | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index 8ddaffa2b6..55d61fcea0 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -800,6 +800,8 @@ def _whl_library_impl(rctx): if filename.endswith(".whl"): whl_path = rctx.path(rctx.attr.filename) else: + # It is an sdist and we need to tell PyPI to use a file in this directory + # and not use any indexes. extra_pip_args.extend(["--no-index", "--find-links", "."]) args = _parse_optional_attrs(rctx, args, extra_pip_args) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index ab7d7fe6cd..e87679d794 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -301,8 +301,7 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca print("WARNING: falling back to pip for installing the right file for {}".format(requirement_line)) # buildifier: disable=print # We sort so that the lock-file remains the same no matter the order of how the - # args are manipulated in the code going before. Maybe this will not be needed - # in the future. + # args are manipulated in the code going before. whl_library(name = repo_name, **dict(sorted(whl_library_args.items()))) whl_map[hub_name].setdefault(whl_name, []).append( whl_alias( From d984559756d1a2a111861692769b10452fff9bc4 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 5 Apr 2024 17:00:42 +0900 Subject: [PATCH 70/70] bump rules_testing in non-bzlmod --- internal_deps.bzl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal_deps.bzl b/internal_deps.bzl index f3be3247e5..2ef0dc5751 100644 --- a/internal_deps.bzl +++ b/internal_deps.bzl @@ -57,9 +57,9 @@ def rules_python_internal_deps(): http_archive( name = "rules_testing", - sha256 = "b84ed8546f1969d700ead4546de9f7637e0f058d835e47e865dcbb13c4210aed", - strip_prefix = "rules_testing-0.5.0", - url = "https://github.com/bazelbuild/rules_testing/releases/download/v0.5.0/rules_testing-v0.5.0.tar.gz", + sha256 = "02c62574631876a4e3b02a1820cb51167bb9cdcdea2381b2fa9d9b8b11c407c4", + strip_prefix = "rules_testing-0.6.0", + url = "https://github.com/bazelbuild/rules_testing/releases/download/v0.6.0/rules_testing-v0.6.0.tar.gz", ) http_archive(