From 9a638ea7751aef414cec4a12b56862799b359ece Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 23 Apr 2024 08:29:02 +0900 Subject: [PATCH] feat(bzlmod): add a flag to control if the parallel downloading is used (#1854) I have found this to be useful when debugging auth issues when using a private repo and I thought that having it configurable from the user's MODULE.bazel is a better user experience. Ammending #1827. --- CHANGELOG.md | 4 +++- python/private/bzlmod/pip.bzl | 18 ++++++++++++++++++ python/private/pypi_index.bzl | 16 ++++------------ 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d4c059ec5f..2c2b388f1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -70,7 +70,9 @@ A brief description of the categories of changes: `experimental_index_url_overrides` to `pip.parse` for using the bazel downloader. If you see any issues, report in [#1357](https://github.com/bazelbuild/rules_python/issues/1357). The URLs for - the whl and sdist files will be written to the lock file. + the whl and sdist files will be written to the lock file. Controlling whether + the downloading of metadata is done in parallel can be done using + `parallel_download` attribute. [0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0 [python_default_visibility]: gazelle/README.md#directive-python_default_visibility diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 2ba275bb0e..3d5c0f5a86 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -199,6 +199,7 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_ca auth_patterns = pip_attr.auth_patterns, ), cache = simpleapi_cache, + parallel_download = pip_attr.parallel_download, ) major_minor = _major_minor_version(pip_attr.python_version) @@ -539,6 +540,23 @@ hubs can be created, and each program can use its respective hub's targets. Targets from different hubs should not be used together. """, ), + "parallel_download": attr.bool( + doc = """\ +The flag allows to make use of parallel downloading feature in bazel 7.1 and above +when the bazel downloader is used. This is by default enabled as it improves the +performance by a lot, but in case the queries to the simple API are very expensive +or when debugging authentication issues one may want to disable this feature. + +NOTE, This will download (potentially duplicate) data for multiple packages if +there is more than one index available, but in general this should be negligible +because the simple API calls are very cheap and the user should not notice any +extra overhead. + +If we are in synchronous mode, then we will use the first result that we +find in case extra indexes are specified. +""", + default = True, + ), "python_version": attr.string( mandatory = True, doc = """ diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl index e716831d5a..28f1007b48 100644 --- a/python/private/pypi_index.bzl +++ b/python/private/pypi_index.bzl @@ -23,7 +23,7 @@ load(":auth.bzl", "get_auth") load(":envsubst.bzl", "envsubst") load(":normalize_name.bzl", "normalize_name") -def simpleapi_download(ctx, *, attr, cache): +def simpleapi_download(ctx, *, attr, cache, parallel_download = True): """Download Simple API HTML. Args: @@ -49,6 +49,7 @@ def simpleapi_download(ctx, *, attr, cache): undesirable because additions to the PyPI index would not be reflected when re-evaluating the extension unless we do `bazel clean --expunge`. + parallel_download: A boolean to enable usage of bazel 7.1 non-blocking downloads. Returns: dict of pkg name to the parsed HTML contents - a list of structs. @@ -60,17 +61,8 @@ def simpleapi_download(ctx, *, attr, cache): download_kwargs = {} if bazel_features.external_deps.download_has_block_param: - download_kwargs["block"] = False - - # Download in parallel if possible. This will download (potentially - # duplicate) data for multiple packages if there is more than one index - # available, but that is the price of convenience. However, that price - # should be mostly negligible because the simple API calls are very cheap - # and the user should not notice any extra overhead. - # - # If we are in synchronous mode, then we will use the first result that we - # find. - # + download_kwargs["block"] = not parallel_download + # NOTE @aignas 2024-03-31: we are not merging results from multiple indexes # to replicate how `pip` would handle this case. async_downloads = {}